1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_root-inl.h"
22 #include "class_table.h"
23 #include "code_generator_utils.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_list.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86_64.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/method_type.h"
39 #include "mirror/object_reference.h"
40 #include "mirror/var_handle.h"
41 #include "optimizing/nodes.h"
42 #include "profiling_info_builder.h"
43 #include "scoped_thread_state_change-inl.h"
44 #include "thread.h"
45 #include "trace.h"
46 #include "utils/assembler.h"
47 #include "utils/stack_checks.h"
48 #include "utils/x86_64/assembler_x86_64.h"
49 #include "utils/x86_64/constants_x86_64.h"
50 #include "utils/x86_64/managed_register_x86_64.h"
51
52 namespace art HIDDEN {
53
54 template<class MirrorType>
55 class GcRoot;
56
57 namespace x86_64 {
58
59 static constexpr int kCurrentMethodStackOffset = 0;
60 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
61 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
62 // generates less code/data with a small num_entries.
63 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
64
65 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
66 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
67
68 static constexpr int kC2ConditionMask = 0x400;
69
OneRegInReferenceOutSaveEverythingCallerSaves()70 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
71 // Custom calling convention: RAX serves as both input and output.
72 RegisterSet caller_saves = RegisterSet::Empty();
73 caller_saves.Add(Location::RegisterLocation(RAX));
74 return caller_saves;
75 }
76
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
79 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
80
81 class NullCheckSlowPathX86_64 : public SlowPathCode {
82 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)83 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
84
EmitNativeCode(CodeGenerator * codegen)85 void EmitNativeCode(CodeGenerator* codegen) override {
86 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
87 __ Bind(GetEntryLabel());
88 if (instruction_->CanThrowIntoCatchBlock()) {
89 // Live registers will be restored in the catch block if caught.
90 SaveLiveRegisters(codegen, instruction_->GetLocations());
91 }
92 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
93 instruction_,
94 instruction_->GetDexPc(),
95 this);
96 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
97 }
98
IsFatal() const99 bool IsFatal() const override { return true; }
100
GetDescription() const101 const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
102
103 private:
104 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
105 };
106
107 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
108 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)109 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
110
EmitNativeCode(CodeGenerator * codegen)111 void EmitNativeCode(CodeGenerator* codegen) override {
112 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
113 __ Bind(GetEntryLabel());
114 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
115 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
116 }
117
IsFatal() const118 bool IsFatal() const override { return true; }
119
GetDescription() const120 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
121
122 private:
123 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
124 };
125
126 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
127 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)128 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
129 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
130
EmitNativeCode(CodeGenerator * codegen)131 void EmitNativeCode(CodeGenerator* codegen) override {
132 __ Bind(GetEntryLabel());
133 if (type_ == DataType::Type::kInt32) {
134 if (is_div_) {
135 __ negl(cpu_reg_);
136 } else {
137 __ xorl(cpu_reg_, cpu_reg_);
138 }
139
140 } else {
141 DCHECK_EQ(DataType::Type::kInt64, type_);
142 if (is_div_) {
143 __ negq(cpu_reg_);
144 } else {
145 __ xorl(cpu_reg_, cpu_reg_);
146 }
147 }
148 __ jmp(GetExitLabel());
149 }
150
GetDescription() const151 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
152
153 private:
154 const CpuRegister cpu_reg_;
155 const DataType::Type type_;
156 const bool is_div_;
157 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
158 };
159
160 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
161 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)162 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
163 : SlowPathCode(instruction), successor_(successor) {}
164
EmitNativeCode(CodeGenerator * codegen)165 void EmitNativeCode(CodeGenerator* codegen) override {
166 LocationSummary* locations = instruction_->GetLocations();
167 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
168 __ Bind(GetEntryLabel());
169 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
170 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
171 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
172 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
173 if (successor_ == nullptr) {
174 __ jmp(GetReturnLabel());
175 } else {
176 __ jmp(x86_64_codegen->GetLabelOf(successor_));
177 }
178 }
179
GetReturnLabel()180 Label* GetReturnLabel() {
181 DCHECK(successor_ == nullptr);
182 return &return_label_;
183 }
184
GetSuccessor() const185 HBasicBlock* GetSuccessor() const {
186 return successor_;
187 }
188
GetDescription() const189 const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
190
191 private:
192 HBasicBlock* const successor_;
193 Label return_label_;
194
195 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
196 };
197
198 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
199 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)200 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
201 : SlowPathCode(instruction) {}
202
EmitNativeCode(CodeGenerator * codegen)203 void EmitNativeCode(CodeGenerator* codegen) override {
204 LocationSummary* locations = instruction_->GetLocations();
205 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
206 __ Bind(GetEntryLabel());
207 if (instruction_->CanThrowIntoCatchBlock()) {
208 // Live registers will be restored in the catch block if caught.
209 SaveLiveRegisters(codegen, locations);
210 }
211
212 Location index_loc = locations->InAt(0);
213 Location length_loc = locations->InAt(1);
214 InvokeRuntimeCallingConvention calling_convention;
215 Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
216 Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
217
218 // Are we using an array length from memory?
219 if (!length_loc.IsValid()) {
220 DCHECK(instruction_->InputAt(1)->IsArrayLength());
221 HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
222 DCHECK(array_length->IsEmittedAtUseSite());
223 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
224 Location array_loc = array_length->GetLocations()->InAt(0);
225 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
226 if (!index_loc.Equals(length_arg)) {
227 // The index is not clobbered by loading the length directly to `length_arg`.
228 __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
229 x86_64_codegen->Move(index_arg, index_loc);
230 } else if (!array_loc.Equals(index_arg)) {
231 // The array reference is not clobbered by the index move.
232 x86_64_codegen->Move(index_arg, index_loc);
233 __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
234 } else {
235 // Load the array length into `TMP`.
236 DCHECK(codegen->IsBlockedCoreRegister(TMP));
237 __ movl(CpuRegister(TMP), array_len);
238 // Single move to CPU register does not clobber `TMP`.
239 x86_64_codegen->Move(index_arg, index_loc);
240 __ movl(length_arg.AsRegister<CpuRegister>(), CpuRegister(TMP));
241 }
242 if (mirror::kUseStringCompression && array_length->IsStringLength()) {
243 __ shrl(length_arg.AsRegister<CpuRegister>(), Immediate(1));
244 }
245 } else {
246 // We're moving two locations to locations that could overlap,
247 // so we need a parallel move resolver.
248 codegen->EmitParallelMoves(
249 index_loc,
250 index_arg,
251 DataType::Type::kInt32,
252 length_loc,
253 length_arg,
254 DataType::Type::kInt32);
255 }
256
257 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
258 ? kQuickThrowStringBounds
259 : kQuickThrowArrayBounds;
260 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
261 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
262 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
263 }
264
IsFatal() const265 bool IsFatal() const override { return true; }
266
GetDescription() const267 const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
268
269 private:
270 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
271 };
272
273 class LoadMethodTypeSlowPathX86_64: public SlowPathCode {
274 public:
LoadMethodTypeSlowPathX86_64(HLoadMethodType * mt)275 explicit LoadMethodTypeSlowPathX86_64(HLoadMethodType* mt) : SlowPathCode(mt) {}
276
EmitNativeCode(CodeGenerator * codegen)277 void EmitNativeCode(CodeGenerator* codegen) override {
278 LocationSummary* locations = instruction_->GetLocations();
279 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
280
281 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
282 __ Bind(GetEntryLabel());
283 SaveLiveRegisters(codegen, locations);
284
285 const dex::ProtoIndex proto_index = instruction_->AsLoadMethodType()->GetProtoIndex();
286 // Custom calling convention: RAX serves as both input and output.
287 __ movl(CpuRegister(RAX), Immediate(proto_index.index_));
288 x86_64_codegen->InvokeRuntime(kQuickResolveMethodType,
289 instruction_,
290 instruction_->GetDexPc(),
291 this);
292 CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>();
293 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
294 RestoreLiveRegisters(codegen, locations);
295
296 __ jmp(GetExitLabel());
297 }
298
GetDescription() const299 const char* GetDescription() const override { return "LoadMethodTypeSlowPathX86_64"; }
300
301 private:
302 DISALLOW_COPY_AND_ASSIGN(LoadMethodTypeSlowPathX86_64);
303 };
304
305 class LoadClassSlowPathX86_64 : public SlowPathCode {
306 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)307 LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
308 : SlowPathCode(at), cls_(cls) {
309 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
310 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
311 }
312
EmitNativeCode(CodeGenerator * codegen)313 void EmitNativeCode(CodeGenerator* codegen) override {
314 LocationSummary* locations = instruction_->GetLocations();
315 Location out = locations->Out();
316 const uint32_t dex_pc = instruction_->GetDexPc();
317 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
318 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
319
320 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
321 __ Bind(GetEntryLabel());
322 SaveLiveRegisters(codegen, locations);
323
324 // Custom calling convention: RAX serves as both input and output.
325 if (must_resolve_type) {
326 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()) ||
327 x86_64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
328 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
329 &cls_->GetDexFile()));
330 dex::TypeIndex type_index = cls_->GetTypeIndex();
331 __ movl(CpuRegister(RAX), Immediate(type_index.index_));
332 if (cls_->NeedsAccessCheck()) {
333 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
334 x86_64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
335 } else {
336 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
337 x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
338 }
339 // If we also must_do_clinit, the resolved type is now in the correct register.
340 } else {
341 DCHECK(must_do_clinit);
342 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
343 x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
344 }
345 if (must_do_clinit) {
346 x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
347 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
348 }
349
350 // Move the class to the desired location.
351 if (out.IsValid()) {
352 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
353 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
354 }
355
356 RestoreLiveRegisters(codegen, locations);
357 __ jmp(GetExitLabel());
358 }
359
GetDescription() const360 const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
361
362 private:
363 // The class this slow path will load.
364 HLoadClass* const cls_;
365
366 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
367 };
368
369 class LoadStringSlowPathX86_64 : public SlowPathCode {
370 public:
LoadStringSlowPathX86_64(HLoadString * instruction)371 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
372
EmitNativeCode(CodeGenerator * codegen)373 void EmitNativeCode(CodeGenerator* codegen) override {
374 LocationSummary* locations = instruction_->GetLocations();
375 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
376
377 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
378 __ Bind(GetEntryLabel());
379 SaveLiveRegisters(codegen, locations);
380
381 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
382 // Custom calling convention: RAX serves as both input and output.
383 __ movl(CpuRegister(RAX), Immediate(string_index.index_));
384 x86_64_codegen->InvokeRuntime(kQuickResolveString,
385 instruction_,
386 instruction_->GetDexPc(),
387 this);
388 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
389 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
390 RestoreLiveRegisters(codegen, locations);
391
392 __ jmp(GetExitLabel());
393 }
394
GetDescription() const395 const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
396
397 private:
398 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
399 };
400
401 class TypeCheckSlowPathX86_64 : public SlowPathCode {
402 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)403 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
404 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
405
EmitNativeCode(CodeGenerator * codegen)406 void EmitNativeCode(CodeGenerator* codegen) override {
407 LocationSummary* locations = instruction_->GetLocations();
408 uint32_t dex_pc = instruction_->GetDexPc();
409 DCHECK(instruction_->IsCheckCast()
410 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
411
412 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
413 __ Bind(GetEntryLabel());
414
415 if (kPoisonHeapReferences &&
416 instruction_->IsCheckCast() &&
417 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
418 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
419 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
420 }
421
422 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
423 SaveLiveRegisters(codegen, locations);
424 }
425
426 // We're moving two locations to locations that could overlap, so we need a parallel
427 // move resolver.
428 InvokeRuntimeCallingConvention calling_convention;
429 codegen->EmitParallelMoves(locations->InAt(0),
430 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
431 DataType::Type::kReference,
432 locations->InAt(1),
433 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
434 DataType::Type::kReference);
435 if (instruction_->IsInstanceOf()) {
436 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
437 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
438 } else {
439 DCHECK(instruction_->IsCheckCast());
440 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
441 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
442 }
443
444 if (!is_fatal_) {
445 if (instruction_->IsInstanceOf()) {
446 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
447 }
448
449 RestoreLiveRegisters(codegen, locations);
450 __ jmp(GetExitLabel());
451 }
452 }
453
GetDescription() const454 const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
455
IsFatal() const456 bool IsFatal() const override { return is_fatal_; }
457
458 private:
459 const bool is_fatal_;
460
461 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
462 };
463
464 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
465 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)466 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
467 : SlowPathCode(instruction) {}
468
EmitNativeCode(CodeGenerator * codegen)469 void EmitNativeCode(CodeGenerator* codegen) override {
470 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
471 __ Bind(GetEntryLabel());
472 LocationSummary* locations = instruction_->GetLocations();
473 SaveLiveRegisters(codegen, locations);
474 InvokeRuntimeCallingConvention calling_convention;
475 x86_64_codegen->Load32BitValue(
476 CpuRegister(calling_convention.GetRegisterAt(0)),
477 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
478 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
479 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
480 }
481
GetDescription() const482 const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
483
484 private:
485 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
486 };
487
488 class ArraySetSlowPathX86_64 : public SlowPathCode {
489 public:
ArraySetSlowPathX86_64(HInstruction * instruction)490 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
491
EmitNativeCode(CodeGenerator * codegen)492 void EmitNativeCode(CodeGenerator* codegen) override {
493 LocationSummary* locations = instruction_->GetLocations();
494 __ Bind(GetEntryLabel());
495 SaveLiveRegisters(codegen, locations);
496
497 InvokeRuntimeCallingConvention calling_convention;
498 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
499 parallel_move.AddMove(
500 locations->InAt(0),
501 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
502 DataType::Type::kReference,
503 nullptr);
504 parallel_move.AddMove(
505 locations->InAt(1),
506 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
507 DataType::Type::kInt32,
508 nullptr);
509 parallel_move.AddMove(
510 locations->InAt(2),
511 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
512 DataType::Type::kReference,
513 nullptr);
514 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
515
516 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
517 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
518 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
519 RestoreLiveRegisters(codegen, locations);
520 __ jmp(GetExitLabel());
521 }
522
GetDescription() const523 const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
524
525 private:
526 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
527 };
528
529 // Slow path marking an object reference `ref` during a read
530 // barrier. The field `obj.field` in the object `obj` holding this
531 // reference does not get updated by this slow path after marking (see
532 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
533 //
534 // This means that after the execution of this slow path, `ref` will
535 // always be up-to-date, but `obj.field` may not; i.e., after the
536 // flip, `ref` will be a to-space reference, but `obj.field` will
537 // probably still be a from-space reference (unless it gets updated by
538 // another thread, or if another thread installed another object
539 // reference (different from `ref`) in `obj.field`).
540 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
541 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)542 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
543 Location ref,
544 bool unpoison_ref_before_marking)
545 : SlowPathCode(instruction),
546 ref_(ref),
547 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
548 }
549
GetDescription() const550 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
551
EmitNativeCode(CodeGenerator * codegen)552 void EmitNativeCode(CodeGenerator* codegen) override {
553 DCHECK(codegen->EmitReadBarrier());
554 LocationSummary* locations = instruction_->GetLocations();
555 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
556 Register ref_reg = ref_cpu_reg.AsRegister();
557 DCHECK(locations->CanCall());
558 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
559 DCHECK(instruction_->IsInstanceFieldGet() ||
560 instruction_->IsStaticFieldGet() ||
561 instruction_->IsArrayGet() ||
562 instruction_->IsArraySet() ||
563 instruction_->IsLoadClass() ||
564 instruction_->IsLoadMethodType() ||
565 instruction_->IsLoadString() ||
566 instruction_->IsInstanceOf() ||
567 instruction_->IsCheckCast() ||
568 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
569 << "Unexpected instruction in read barrier marking slow path: "
570 << instruction_->DebugName();
571
572 __ Bind(GetEntryLabel());
573 if (unpoison_ref_before_marking_) {
574 // Object* ref = ref_addr->AsMirrorPtr()
575 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
576 }
577 // No need to save live registers; it's taken care of by the
578 // entrypoint. Also, there is no need to update the stack mask,
579 // as this runtime call will not trigger a garbage collection.
580 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
581 DCHECK_NE(ref_reg, RSP);
582 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
583 // "Compact" slow path, saving two moves.
584 //
585 // Instead of using the standard runtime calling convention (input
586 // and output in R0):
587 //
588 // RDI <- ref
589 // RAX <- ReadBarrierMark(RDI)
590 // ref <- RAX
591 //
592 // we just use rX (the register containing `ref`) as input and output
593 // of a dedicated entrypoint:
594 //
595 // rX <- ReadBarrierMarkRegX(rX)
596 //
597 int32_t entry_point_offset =
598 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
599 // This runtime call does not require a stack map.
600 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
601 __ jmp(GetExitLabel());
602 }
603
604 private:
605 // The location (register) of the marked object reference.
606 const Location ref_;
607 // Should the reference in `ref_` be unpoisoned prior to marking it?
608 const bool unpoison_ref_before_marking_;
609
610 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
611 };
612
613 // Slow path marking an object reference `ref` during a read barrier,
614 // and if needed, atomically updating the field `obj.field` in the
615 // object `obj` holding this reference after marking (contrary to
616 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
617 // `obj.field`).
618 //
619 // This means that after the execution of this slow path, both `ref`
620 // and `obj.field` will be up-to-date; i.e., after the flip, both will
621 // hold the same to-space reference (unless another thread installed
622 // another object reference (different from `ref`) in `obj.field`).
623 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
624 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)625 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
626 Location ref,
627 CpuRegister obj,
628 const Address& field_addr,
629 bool unpoison_ref_before_marking,
630 CpuRegister temp1,
631 CpuRegister temp2)
632 : SlowPathCode(instruction),
633 ref_(ref),
634 obj_(obj),
635 field_addr_(field_addr),
636 unpoison_ref_before_marking_(unpoison_ref_before_marking),
637 temp1_(temp1),
638 temp2_(temp2) {
639 }
640
GetDescription() const641 const char* GetDescription() const override {
642 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
643 }
644
EmitNativeCode(CodeGenerator * codegen)645 void EmitNativeCode(CodeGenerator* codegen) override {
646 DCHECK(codegen->EmitReadBarrier());
647 LocationSummary* locations = instruction_->GetLocations();
648 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
649 Register ref_reg = ref_cpu_reg.AsRegister();
650 DCHECK(locations->CanCall());
651 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
652 DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
653 << "Unexpected instruction in read barrier marking and field updating slow path: "
654 << instruction_->DebugName();
655 HInvoke* invoke = instruction_->AsInvoke();
656 DCHECK(IsUnsafeCASReference(invoke) ||
657 IsUnsafeGetAndSetReference(invoke) ||
658 IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
659
660 __ Bind(GetEntryLabel());
661 if (unpoison_ref_before_marking_) {
662 // Object* ref = ref_addr->AsMirrorPtr()
663 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
664 }
665
666 // Save the old (unpoisoned) reference.
667 __ movl(temp1_, ref_cpu_reg);
668
669 // No need to save live registers; it's taken care of by the
670 // entrypoint. Also, there is no need to update the stack mask,
671 // as this runtime call will not trigger a garbage collection.
672 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
673 DCHECK_NE(ref_reg, RSP);
674 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
675 // "Compact" slow path, saving two moves.
676 //
677 // Instead of using the standard runtime calling convention (input
678 // and output in R0):
679 //
680 // RDI <- ref
681 // RAX <- ReadBarrierMark(RDI)
682 // ref <- RAX
683 //
684 // we just use rX (the register containing `ref`) as input and output
685 // of a dedicated entrypoint:
686 //
687 // rX <- ReadBarrierMarkRegX(rX)
688 //
689 int32_t entry_point_offset =
690 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
691 // This runtime call does not require a stack map.
692 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
693
694 // If the new reference is different from the old reference,
695 // update the field in the holder (`*field_addr`).
696 //
697 // Note that this field could also hold a different object, if
698 // another thread had concurrently changed it. In that case, the
699 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
700 // operation below would abort the CAS, leaving the field as-is.
701 NearLabel done;
702 __ cmpl(temp1_, ref_cpu_reg);
703 __ j(kEqual, &done);
704
705 // Update the holder's field atomically. This may fail if
706 // mutator updates before us, but it's OK. This is achived
707 // using a strong compare-and-set (CAS) operation with relaxed
708 // memory synchronization ordering, where the expected value is
709 // the old reference and the desired value is the new reference.
710 // This operation is implemented with a 32-bit LOCK CMPXLCHG
711 // instruction, which requires the expected value (the old
712 // reference) to be in EAX. Save RAX beforehand, and move the
713 // expected value (stored in `temp1_`) into EAX.
714 __ movq(temp2_, CpuRegister(RAX));
715 __ movl(CpuRegister(RAX), temp1_);
716
717 // Convenience aliases.
718 CpuRegister base = obj_;
719 CpuRegister expected = CpuRegister(RAX);
720 CpuRegister value = ref_cpu_reg;
721
722 bool base_equals_value = (base.AsRegister() == value.AsRegister());
723 Register value_reg = ref_reg;
724 if (kPoisonHeapReferences) {
725 if (base_equals_value) {
726 // If `base` and `value` are the same register location, move
727 // `value_reg` to a temporary register. This way, poisoning
728 // `value_reg` won't invalidate `base`.
729 value_reg = temp1_.AsRegister();
730 __ movl(CpuRegister(value_reg), base);
731 }
732
733 // Check that the register allocator did not assign the location
734 // of `expected` (RAX) to `value` nor to `base`, so that heap
735 // poisoning (when enabled) works as intended below.
736 // - If `value` were equal to `expected`, both references would
737 // be poisoned twice, meaning they would not be poisoned at
738 // all, as heap poisoning uses address negation.
739 // - If `base` were equal to `expected`, poisoning `expected`
740 // would invalidate `base`.
741 DCHECK_NE(value_reg, expected.AsRegister());
742 DCHECK_NE(base.AsRegister(), expected.AsRegister());
743
744 __ PoisonHeapReference(expected);
745 __ PoisonHeapReference(CpuRegister(value_reg));
746 }
747
748 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
749
750 // If heap poisoning is enabled, we need to unpoison the values
751 // that were poisoned earlier.
752 if (kPoisonHeapReferences) {
753 if (base_equals_value) {
754 // `value_reg` has been moved to a temporary register, no need
755 // to unpoison it.
756 } else {
757 __ UnpoisonHeapReference(CpuRegister(value_reg));
758 }
759 // No need to unpoison `expected` (RAX), as it is be overwritten below.
760 }
761
762 // Restore RAX.
763 __ movq(CpuRegister(RAX), temp2_);
764
765 __ Bind(&done);
766 __ jmp(GetExitLabel());
767 }
768
769 private:
770 // The location (register) of the marked object reference.
771 const Location ref_;
772 // The register containing the object holding the marked object reference field.
773 const CpuRegister obj_;
774 // The address of the marked reference field. The base of this address must be `obj_`.
775 const Address field_addr_;
776
777 // Should the reference in `ref_` be unpoisoned prior to marking it?
778 const bool unpoison_ref_before_marking_;
779
780 const CpuRegister temp1_;
781 const CpuRegister temp2_;
782
783 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
784 };
785
786 // Slow path generating a read barrier for a heap reference.
787 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
788 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)789 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
790 Location out,
791 Location ref,
792 Location obj,
793 uint32_t offset,
794 Location index)
795 : SlowPathCode(instruction),
796 out_(out),
797 ref_(ref),
798 obj_(obj),
799 offset_(offset),
800 index_(index) {
801 // If `obj` is equal to `out` or `ref`, it means the initial
802 // object has been overwritten by (or after) the heap object
803 // reference load to be instrumented, e.g.:
804 //
805 // __ movl(out, Address(out, offset));
806 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
807 //
808 // In that case, we have lost the information about the original
809 // object, and the emitted read barrier cannot work properly.
810 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
811 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
812 }
813
EmitNativeCode(CodeGenerator * codegen)814 void EmitNativeCode(CodeGenerator* codegen) override {
815 DCHECK(codegen->EmitReadBarrier());
816 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
817 LocationSummary* locations = instruction_->GetLocations();
818 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
819 DCHECK(locations->CanCall());
820 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
821 DCHECK(instruction_->IsInstanceFieldGet() ||
822 instruction_->IsStaticFieldGet() ||
823 instruction_->IsArrayGet() ||
824 instruction_->IsInstanceOf() ||
825 instruction_->IsCheckCast() ||
826 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
827 << "Unexpected instruction in read barrier for heap reference slow path: "
828 << instruction_->DebugName();
829
830 __ Bind(GetEntryLabel());
831 SaveLiveRegisters(codegen, locations);
832
833 // We may have to change the index's value, but as `index_` is a
834 // constant member (like other "inputs" of this slow path),
835 // introduce a copy of it, `index`.
836 Location index = index_;
837 if (index_.IsValid()) {
838 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
839 if (instruction_->IsArrayGet()) {
840 // Compute real offset and store it in index_.
841 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
842 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
843 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
844 // We are about to change the value of `index_reg` (see the
845 // calls to art::x86_64::X86_64Assembler::shll and
846 // art::x86_64::X86_64Assembler::AddImmediate below), but it
847 // has not been saved by the previous call to
848 // art::SlowPathCode::SaveLiveRegisters, as it is a
849 // callee-save register --
850 // art::SlowPathCode::SaveLiveRegisters does not consider
851 // callee-save registers, as it has been designed with the
852 // assumption that callee-save registers are supposed to be
853 // handled by the called function. So, as a callee-save
854 // register, `index_reg` _would_ eventually be saved onto
855 // the stack, but it would be too late: we would have
856 // changed its value earlier. Therefore, we manually save
857 // it here into another freely available register,
858 // `free_reg`, chosen of course among the caller-save
859 // registers (as a callee-save `free_reg` register would
860 // exhibit the same problem).
861 //
862 // Note we could have requested a temporary register from
863 // the register allocator instead; but we prefer not to, as
864 // this is a slow path, and we know we can find a
865 // caller-save register that is available.
866 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
867 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
868 index_reg = free_reg;
869 index = Location::RegisterLocation(index_reg);
870 } else {
871 // The initial register stored in `index_` has already been
872 // saved in the call to art::SlowPathCode::SaveLiveRegisters
873 // (as it is not a callee-save register), so we can freely
874 // use it.
875 }
876 // Shifting the index value contained in `index_reg` by the
877 // scale factor (2) cannot overflow in practice, as the
878 // runtime is unable to allocate object arrays with a size
879 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
880 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
881 static_assert(
882 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
883 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
884 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
885 } else {
886 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
887 // intrinsics, `index_` is not shifted by a scale factor of 2
888 // (as in the case of ArrayGet), as it is actually an offset
889 // to an object field within an object.
890 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
891 DCHECK(instruction_->GetLocations()->Intrinsified());
892 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
893 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
894 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
895 (instruction_->AsInvoke()->GetIntrinsic() ==
896 Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
897 (instruction_->AsInvoke()->GetIntrinsic() ==
898 Intrinsics::kJdkUnsafeGetReferenceAcquire))
899 << instruction_->AsInvoke()->GetIntrinsic();
900 DCHECK_EQ(offset_, 0U);
901 DCHECK(index_.IsRegister());
902 }
903 }
904
905 // We're moving two or three locations to locations that could
906 // overlap, so we need a parallel move resolver.
907 InvokeRuntimeCallingConvention calling_convention;
908 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
909 parallel_move.AddMove(ref_,
910 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
911 DataType::Type::kReference,
912 nullptr);
913 parallel_move.AddMove(obj_,
914 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
915 DataType::Type::kReference,
916 nullptr);
917 if (index.IsValid()) {
918 parallel_move.AddMove(index,
919 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
920 DataType::Type::kInt32,
921 nullptr);
922 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
923 } else {
924 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
925 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
926 }
927 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
928 instruction_,
929 instruction_->GetDexPc(),
930 this);
931 CheckEntrypointTypes<
932 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
933 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
934
935 RestoreLiveRegisters(codegen, locations);
936 __ jmp(GetExitLabel());
937 }
938
GetDescription() const939 const char* GetDescription() const override {
940 return "ReadBarrierForHeapReferenceSlowPathX86_64";
941 }
942
943 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)944 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
945 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
946 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
947 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
948 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
949 return static_cast<CpuRegister>(i);
950 }
951 }
952 // We shall never fail to find a free caller-save register, as
953 // there are more than two core caller-save registers on x86-64
954 // (meaning it is possible to find one which is different from
955 // `ref` and `obj`).
956 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
957 LOG(FATAL) << "Could not find a free caller-save register";
958 UNREACHABLE();
959 }
960
961 const Location out_;
962 const Location ref_;
963 const Location obj_;
964 const uint32_t offset_;
965 // An additional location containing an index to an array.
966 // Only used for HArrayGet and the UnsafeGetObject &
967 // UnsafeGetObjectVolatile intrinsics.
968 const Location index_;
969
970 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
971 };
972
973 // Slow path generating a read barrier for a GC root.
974 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
975 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)976 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
977 : SlowPathCode(instruction), out_(out), root_(root) {
978 }
979
EmitNativeCode(CodeGenerator * codegen)980 void EmitNativeCode(CodeGenerator* codegen) override {
981 DCHECK(codegen->EmitReadBarrier());
982 LocationSummary* locations = instruction_->GetLocations();
983 DCHECK(locations->CanCall());
984 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
985 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
986 << "Unexpected instruction in read barrier for GC root slow path: "
987 << instruction_->DebugName();
988
989 __ Bind(GetEntryLabel());
990 SaveLiveRegisters(codegen, locations);
991
992 InvokeRuntimeCallingConvention calling_convention;
993 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
994 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
995 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
996 instruction_,
997 instruction_->GetDexPc(),
998 this);
999 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
1000 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
1001
1002 RestoreLiveRegisters(codegen, locations);
1003 __ jmp(GetExitLabel());
1004 }
1005
GetDescription() const1006 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
1007
1008 private:
1009 const Location out_;
1010 const Location root_;
1011
1012 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
1013 };
1014
1015 class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
1016 public:
MethodEntryExitHooksSlowPathX86_64(HInstruction * instruction)1017 explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction)
1018 : SlowPathCode(instruction) {}
1019
EmitNativeCode(CodeGenerator * codegen)1020 void EmitNativeCode(CodeGenerator* codegen) override {
1021 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1022 LocationSummary* locations = instruction_->GetLocations();
1023 QuickEntrypointEnum entry_point =
1024 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
1025 __ Bind(GetEntryLabel());
1026 SaveLiveRegisters(codegen, locations);
1027 if (instruction_->IsMethodExitHook()) {
1028 // Load FrameSize to pass to the exit hook.
1029 __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize()));
1030 }
1031 x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
1032 RestoreLiveRegisters(codegen, locations);
1033 __ jmp(GetExitLabel());
1034 }
1035
GetDescription() const1036 const char* GetDescription() const override {
1037 return "MethodEntryExitHooksSlowPath";
1038 }
1039
1040 private:
1041 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64);
1042 };
1043
1044 class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
1045 public:
CompileOptimizedSlowPathX86_64(HSuspendCheck * suspend_check,uint64_t counter_address)1046 CompileOptimizedSlowPathX86_64(HSuspendCheck* suspend_check, uint64_t counter_address)
1047 : SlowPathCode(suspend_check),
1048 counter_address_(counter_address) {}
1049
EmitNativeCode(CodeGenerator * codegen)1050 void EmitNativeCode(CodeGenerator* codegen) override {
1051 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1052 __ Bind(GetEntryLabel());
1053 __ movq(CpuRegister(TMP), Immediate(counter_address_));
1054 __ movw(Address(CpuRegister(TMP), 0), Immediate(ProfilingInfo::GetOptimizeThreshold()));
1055 if (instruction_ != nullptr) {
1056 // Only saves full width XMM for SIMD.
1057 SaveLiveRegisters(codegen, instruction_->GetLocations());
1058 }
1059 x86_64_codegen->GenerateInvokeRuntime(
1060 GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1061 if (instruction_ != nullptr) {
1062 // Only restores full width XMM for SIMD.
1063 RestoreLiveRegisters(codegen, instruction_->GetLocations());
1064 }
1065 __ jmp(GetExitLabel());
1066 }
1067
GetDescription() const1068 const char* GetDescription() const override {
1069 return "CompileOptimizedSlowPath";
1070 }
1071
1072 private:
1073 uint64_t counter_address_;
1074
1075 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64);
1076 };
1077
1078 #undef __
1079 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1080 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
1081
X86_64IntegerCondition(IfCondition cond)1082 inline Condition X86_64IntegerCondition(IfCondition cond) {
1083 switch (cond) {
1084 case kCondEQ: return kEqual;
1085 case kCondNE: return kNotEqual;
1086 case kCondLT: return kLess;
1087 case kCondLE: return kLessEqual;
1088 case kCondGT: return kGreater;
1089 case kCondGE: return kGreaterEqual;
1090 case kCondB: return kBelow;
1091 case kCondBE: return kBelowEqual;
1092 case kCondA: return kAbove;
1093 case kCondAE: return kAboveEqual;
1094 }
1095 LOG(FATAL) << "Unreachable";
1096 UNREACHABLE();
1097 }
1098
1099 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)1100 inline Condition X86_64FPCondition(IfCondition cond) {
1101 switch (cond) {
1102 case kCondEQ: return kEqual;
1103 case kCondNE: return kNotEqual;
1104 case kCondLT: return kBelow;
1105 case kCondLE: return kBelowEqual;
1106 case kCondGT: return kAbove;
1107 case kCondGE: return kAboveEqual;
1108 default: break; // should not happen
1109 }
1110 LOG(FATAL) << "Unreachable";
1111 UNREACHABLE();
1112 }
1113
BlockNonVolatileXmmRegisters(LocationSummary * locations)1114 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
1115 // We have to ensure that the native code we call directly (such as @CriticalNative
1116 // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
1117 // which are non-volatile for ART, but volatile for Native calls. This will ensure
1118 // that they are saved in the prologue and properly restored.
1119 for (FloatRegister fp_reg : non_volatile_xmm_regs) {
1120 locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
1121 }
1122 }
1123
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)1124 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
1125 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
1126 [[maybe_unused]] ArtMethod* method) {
1127 return desired_dispatch_info;
1128 }
1129
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)1130 void CodeGeneratorX86_64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
1131 switch (load_kind) {
1132 case MethodLoadKind::kBootImageLinkTimePcRelative:
1133 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1134 __ leal(temp.AsRegister<CpuRegister>(),
1135 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1136 RecordBootImageMethodPatch(invoke);
1137 break;
1138 case MethodLoadKind::kBootImageRelRo: {
1139 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1140 __ movl(temp.AsRegister<CpuRegister>(),
1141 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1142 RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1143 break;
1144 }
1145 case MethodLoadKind::kAppImageRelRo: {
1146 DCHECK(GetCompilerOptions().IsAppImage());
1147 __ movl(temp.AsRegister<CpuRegister>(),
1148 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1149 RecordAppImageMethodPatch(invoke);
1150 break;
1151 }
1152 case MethodLoadKind::kBssEntry: {
1153 __ movq(temp.AsRegister<CpuRegister>(),
1154 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1155 RecordMethodBssEntryPatch(invoke);
1156 // No need for memory fence, thanks to the x86-64 memory model.
1157 break;
1158 }
1159 case MethodLoadKind::kJitDirectAddress: {
1160 Load64BitValue(temp.AsRegister<CpuRegister>(),
1161 reinterpret_cast<int64_t>(invoke->GetResolvedMethod()));
1162 break;
1163 }
1164 case MethodLoadKind::kRuntimeCall: {
1165 // Test situation, don't do anything.
1166 break;
1167 }
1168 default: {
1169 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
1170 UNREACHABLE();
1171 }
1172 }
1173 }
1174
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)1175 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
1176 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1177 // All registers are assumed to be correctly set up.
1178
1179 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
1180 switch (invoke->GetMethodLoadKind()) {
1181 case MethodLoadKind::kStringInit: {
1182 // temp = thread->string_init_entrypoint
1183 uint32_t offset =
1184 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1185 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1186 break;
1187 }
1188 case MethodLoadKind::kRecursive: {
1189 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1190 break;
1191 }
1192 case MethodLoadKind::kRuntimeCall: {
1193 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1194 return; // No code pointer retrieval; the runtime performs the call directly.
1195 }
1196 case MethodLoadKind::kBootImageLinkTimePcRelative:
1197 // For kCallCriticalNative we skip loading the method and do the call directly.
1198 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
1199 break;
1200 }
1201 FALLTHROUGH_INTENDED;
1202 default: {
1203 LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
1204 break;
1205 }
1206 }
1207
1208 switch (invoke->GetCodePtrLocation()) {
1209 case CodePtrLocation::kCallSelf:
1210 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
1211 __ call(&frame_entry_label_);
1212 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1213 break;
1214 case CodePtrLocation::kCallCriticalNative: {
1215 size_t out_frame_size =
1216 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1217 kNativeStackAlignment,
1218 GetCriticalNativeDirectCallFrameSize>(invoke);
1219 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
1220 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1221 __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1222 RecordBootImageJniEntrypointPatch(invoke);
1223 } else {
1224 // (callee_method + offset_of_jni_entry_point)()
1225 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1226 ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1227 }
1228 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1229 // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1230 switch (invoke->GetType()) {
1231 case DataType::Type::kBool:
1232 __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1233 break;
1234 case DataType::Type::kInt8:
1235 __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1236 break;
1237 case DataType::Type::kUint16:
1238 __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1239 break;
1240 case DataType::Type::kInt16:
1241 __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1242 break;
1243 case DataType::Type::kInt32:
1244 case DataType::Type::kInt64:
1245 case DataType::Type::kFloat32:
1246 case DataType::Type::kFloat64:
1247 case DataType::Type::kVoid:
1248 break;
1249 default:
1250 DCHECK(false) << invoke->GetType();
1251 break;
1252 }
1253 if (out_frame_size != 0u) {
1254 DecreaseFrame(out_frame_size);
1255 }
1256 break;
1257 }
1258 case CodePtrLocation::kCallArtMethod:
1259 // (callee_method + offset_of_quick_compiled_code)()
1260 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1261 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1262 kX86_64PointerSize).SizeValue()));
1263 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1264 break;
1265 }
1266
1267 DCHECK(!IsLeafMethod());
1268 }
1269
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1270 void CodeGeneratorX86_64::GenerateVirtualCall(
1271 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1272 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1273 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1274 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1275
1276 // Use the calling convention instead of the location of the receiver, as
1277 // intrinsics may have put the receiver in a different register. In the intrinsics
1278 // slow path, the arguments have been moved to the right place, so here we are
1279 // guaranteed that the receiver is the first register of the calling convention.
1280 InvokeDexCallingConvention calling_convention;
1281 Register receiver = calling_convention.GetRegisterAt(0);
1282
1283 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1284 // /* HeapReference<Class> */ temp = receiver->klass_
1285 __ movl(temp, Address(CpuRegister(receiver), class_offset));
1286 MaybeRecordImplicitNullCheck(invoke);
1287 // Instead of simply (possibly) unpoisoning `temp` here, we should
1288 // emit a read barrier for the previous class reference load.
1289 // However this is not required in practice, as this is an
1290 // intermediate/temporary reference and because the current
1291 // concurrent copying collector keeps the from-space memory
1292 // intact/accessible until the end of the marking phase (the
1293 // concurrent copying collector may not in the future).
1294 __ MaybeUnpoisonHeapReference(temp);
1295
1296 MaybeGenerateInlineCacheCheck(invoke, temp);
1297
1298 // temp = temp->GetMethodAt(method_offset);
1299 __ movq(temp, Address(temp, method_offset));
1300 // call temp->GetEntryPoint();
1301 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1302 kX86_64PointerSize).SizeValue()));
1303 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1304 }
1305
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1306 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1307 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1308 __ Bind(&boot_image_other_patches_.back().label);
1309 }
1310
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1311 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1312 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1313 __ Bind(&boot_image_other_patches_.back().label);
1314 }
1315
RecordBootImageMethodPatch(HInvoke * invoke)1316 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvoke* invoke) {
1317 boot_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1318 invoke->GetResolvedMethodReference().index);
1319 __ Bind(&boot_image_method_patches_.back().label);
1320 }
1321
RecordAppImageMethodPatch(HInvoke * invoke)1322 void CodeGeneratorX86_64::RecordAppImageMethodPatch(HInvoke* invoke) {
1323 app_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1324 invoke->GetResolvedMethodReference().index);
1325 __ Bind(&app_image_method_patches_.back().label);
1326 }
1327
RecordMethodBssEntryPatch(HInvoke * invoke)1328 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvoke* invoke) {
1329 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
1330 GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
1331 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
1332 invoke->GetMethodReference().dex_file));
1333 method_bss_entry_patches_.emplace_back(invoke->GetMethodReference().dex_file,
1334 invoke->GetMethodReference().index);
1335 __ Bind(&method_bss_entry_patches_.back().label);
1336 }
1337
RecordBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1338 void CodeGeneratorX86_64::RecordBootImageTypePatch(const DexFile& dex_file,
1339 dex::TypeIndex type_index) {
1340 boot_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1341 __ Bind(&boot_image_type_patches_.back().label);
1342 }
1343
RecordAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1344 void CodeGeneratorX86_64::RecordAppImageTypePatch(const DexFile& dex_file,
1345 dex::TypeIndex type_index) {
1346 app_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1347 __ Bind(&app_image_type_patches_.back().label);
1348 }
1349
NewTypeBssEntryPatch(HLoadClass * load_class)1350 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1351 ArenaDeque<PatchInfo<Label>>* patches = nullptr;
1352 switch (load_class->GetLoadKind()) {
1353 case HLoadClass::LoadKind::kBssEntry:
1354 patches = &type_bss_entry_patches_;
1355 break;
1356 case HLoadClass::LoadKind::kBssEntryPublic:
1357 patches = &public_type_bss_entry_patches_;
1358 break;
1359 case HLoadClass::LoadKind::kBssEntryPackage:
1360 patches = &package_type_bss_entry_patches_;
1361 break;
1362 default:
1363 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
1364 UNREACHABLE();
1365 }
1366 patches->emplace_back(&load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1367 return &patches->back().label;
1368 }
1369
RecordBootImageStringPatch(HLoadString * load_string)1370 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1371 boot_image_string_patches_.emplace_back(
1372 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1373 __ Bind(&boot_image_string_patches_.back().label);
1374 }
1375
NewStringBssEntryPatch(HLoadString * load_string)1376 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1377 string_bss_entry_patches_.emplace_back(
1378 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1379 return &string_bss_entry_patches_.back().label;
1380 }
1381
NewMethodTypeBssEntryPatch(HLoadMethodType * load_method_type)1382 Label* CodeGeneratorX86_64::NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type) {
1383 method_type_bss_entry_patches_.emplace_back(
1384 &load_method_type->GetDexFile(), load_method_type->GetProtoIndex().index_);
1385 return &method_type_bss_entry_patches_.back().label;
1386 }
1387
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)1388 void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
1389 boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1390 invoke->GetResolvedMethodReference().index);
1391 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
1392 }
1393
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1394 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1395 if (GetCompilerOptions().IsBootImage()) {
1396 __ leal(reg,
1397 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1398 RecordBootImageIntrinsicPatch(boot_image_reference);
1399 } else if (GetCompilerOptions().GetCompilePic()) {
1400 __ movl(reg,
1401 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1402 RecordBootImageRelRoPatch(boot_image_reference);
1403 } else {
1404 DCHECK(GetCompilerOptions().IsJitCompiler());
1405 gc::Heap* heap = Runtime::Current()->GetHeap();
1406 DCHECK(!heap->GetBootImageSpaces().empty());
1407 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1408 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1409 }
1410 }
1411
LoadIntrinsicDeclaringClass(CpuRegister reg,HInvoke * invoke)1412 void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
1413 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
1414 if (GetCompilerOptions().IsBootImage()) {
1415 // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1416 __ leal(reg,
1417 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1418 MethodReference target_method = invoke->GetResolvedMethodReference();
1419 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1420 boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1421 __ Bind(&boot_image_type_patches_.back().label);
1422 } else {
1423 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
1424 LoadBootImageAddress(reg, boot_image_offset);
1425 }
1426 }
1427
LoadClassRootForIntrinsic(CpuRegister reg,ClassRoot class_root)1428 void CodeGeneratorX86_64::LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root) {
1429 if (GetCompilerOptions().IsBootImage()) {
1430 ScopedObjectAccess soa(Thread::Current());
1431 ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
1432 boot_image_type_patches_.emplace_back(&klass->GetDexFile(), klass->GetDexTypeIndex().index_);
1433 __ Bind(&boot_image_type_patches_.back().label);
1434 } else {
1435 uint32_t boot_image_offset = GetBootImageOffset(class_root);
1436 LoadBootImageAddress(reg, boot_image_offset);
1437 }
1438 }
1439
1440 // The label points to the end of the "movl" or another instruction but the literal offset
1441 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1442 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1443
1444 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1445 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1446 const ArenaDeque<PatchInfo<Label>>& infos,
1447 ArenaVector<linker::LinkerPatch>* linker_patches) {
1448 for (const PatchInfo<Label>& info : infos) {
1449 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1450 linker_patches->push_back(
1451 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1452 }
1453 }
1454
1455 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1456 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1457 const DexFile* target_dex_file,
1458 uint32_t pc_insn_offset,
1459 uint32_t boot_image_offset) {
1460 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
1461 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1462 }
1463
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1464 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1465 DCHECK(linker_patches->empty());
1466 size_t size =
1467 boot_image_method_patches_.size() +
1468 app_image_method_patches_.size() +
1469 method_bss_entry_patches_.size() +
1470 boot_image_type_patches_.size() +
1471 app_image_type_patches_.size() +
1472 type_bss_entry_patches_.size() +
1473 public_type_bss_entry_patches_.size() +
1474 package_type_bss_entry_patches_.size() +
1475 boot_image_string_patches_.size() +
1476 string_bss_entry_patches_.size() +
1477 method_type_bss_entry_patches_.size() +
1478 boot_image_jni_entrypoint_patches_.size() +
1479 boot_image_other_patches_.size();
1480 linker_patches->reserve(size);
1481 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1482 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1483 boot_image_method_patches_, linker_patches);
1484 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1485 boot_image_type_patches_, linker_patches);
1486 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1487 boot_image_string_patches_, linker_patches);
1488 } else {
1489 DCHECK(boot_image_method_patches_.empty());
1490 DCHECK(boot_image_type_patches_.empty());
1491 DCHECK(boot_image_string_patches_.empty());
1492 }
1493 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
1494 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
1495 if (GetCompilerOptions().IsBootImage()) {
1496 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1497 boot_image_other_patches_, linker_patches);
1498 } else {
1499 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
1500 boot_image_other_patches_, linker_patches);
1501 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
1502 app_image_method_patches_, linker_patches);
1503 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
1504 app_image_type_patches_, linker_patches);
1505 }
1506 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1507 method_bss_entry_patches_, linker_patches);
1508 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1509 type_bss_entry_patches_, linker_patches);
1510 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
1511 public_type_bss_entry_patches_, linker_patches);
1512 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
1513 package_type_bss_entry_patches_, linker_patches);
1514 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1515 string_bss_entry_patches_, linker_patches);
1516 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodTypeBssEntryPatch>(
1517 method_type_bss_entry_patches_, linker_patches);
1518 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
1519 boot_image_jni_entrypoint_patches_, linker_patches);
1520 DCHECK_EQ(size, linker_patches->size());
1521 }
1522
DumpCoreRegister(std::ostream & stream,int reg) const1523 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1524 stream << Register(reg);
1525 }
1526
DumpFloatingPointRegister(std::ostream & stream,int reg) const1527 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1528 stream << FloatRegister(reg);
1529 }
1530
GetInstructionSetFeatures() const1531 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1532 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1533 }
1534
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1535 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1536 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1537 return kX86_64WordSize;
1538 }
1539
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1540 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1541 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1542 return kX86_64WordSize;
1543 }
1544
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1545 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1546 if (GetGraph()->HasSIMD()) {
1547 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1548 } else {
1549 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1550 }
1551 return GetSlowPathFPWidth();
1552 }
1553
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1554 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1555 if (GetGraph()->HasSIMD()) {
1556 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1557 } else {
1558 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1559 }
1560 return GetSlowPathFPWidth();
1561 }
1562
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1563 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1564 HInstruction* instruction,
1565 uint32_t dex_pc,
1566 SlowPathCode* slow_path) {
1567 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1568 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1569 if (EntrypointRequiresStackMap(entrypoint)) {
1570 RecordPcInfo(instruction, dex_pc, slow_path);
1571 }
1572 }
1573
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1574 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1575 HInstruction* instruction,
1576 SlowPathCode* slow_path) {
1577 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1578 GenerateInvokeRuntime(entry_point_offset);
1579 }
1580
GenerateInvokeRuntime(int32_t entry_point_offset)1581 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1582 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1583 }
1584
1585 namespace detail {
1586
1587 // Mark which intrinsics we don't have handcrafted code for.
1588 template <Intrinsics T>
1589 struct IsUnimplemented {
1590 bool is_unimplemented = false;
1591 };
1592
1593 #define TRUE_OVERRIDE(Name) \
1594 template <> \
1595 struct IsUnimplemented<Intrinsics::k##Name> { \
1596 bool is_unimplemented = true; \
1597 };
1598 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
1599 #undef TRUE_OVERRIDE
1600
1601 static constexpr bool kIsIntrinsicUnimplemented[] = {
1602 false, // kNone
1603 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1604 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1605 ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1606 #undef IS_UNIMPLEMENTED
1607 };
1608
1609 } // namespace detail
1610
1611 static constexpr int kNumberOfCpuRegisterPairs = 0;
1612 // Use a fake return address register to mimic Quick.
1613 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1614 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1615 const CompilerOptions& compiler_options,
1616 OptimizingCompilerStats* stats)
1617 : CodeGenerator(graph,
1618 kNumberOfCpuRegisters,
1619 kNumberOfFloatRegisters,
1620 kNumberOfCpuRegisterPairs,
1621 ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
1622 | (1 << kFakeReturnRegister),
1623 ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
1624 compiler_options,
1625 stats,
1626 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1627 block_labels_(nullptr),
1628 location_builder_(graph, this),
1629 instruction_visitor_(graph, this),
1630 move_resolver_(graph->GetAllocator(), this),
1631 assembler_(graph->GetAllocator(),
1632 compiler_options.GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures()),
1633 constant_area_start_(0),
1634 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1635 app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1636 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1637 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1638 app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1639 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1640 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1641 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1642 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1643 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1644 method_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1645 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1646 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1647 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1648 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1649 jit_method_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1650 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1651 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1652 }
1653
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1654 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1655 CodeGeneratorX86_64* codegen)
1656 : InstructionCodeGenerator(graph, codegen),
1657 assembler_(codegen->GetAssembler()),
1658 codegen_(codegen) {}
1659
SetupBlockedRegisters() const1660 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1661 // Stack register is always reserved.
1662 blocked_core_registers_[RSP] = true;
1663
1664 // Block the register used as TMP.
1665 blocked_core_registers_[TMP] = true;
1666 }
1667
DWARFReg(Register reg)1668 static dwarf::Reg DWARFReg(Register reg) {
1669 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1670 }
1671
DWARFReg(FloatRegister reg)1672 static dwarf::Reg DWARFReg(FloatRegister reg) {
1673 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1674 }
1675
VisitMethodEntryHook(HMethodEntryHook * method_hook)1676 void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1677 LocationSummary* locations = new (GetGraph()->GetAllocator())
1678 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1679 // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1680 // two 32-bit values in EAX + EDX even on 64-bit architectures.
1681 locations->AddTemp(Location::RegisterLocation(RAX));
1682 locations->AddTemp(Location::RegisterLocation(RDX));
1683 }
1684
GenerateMethodEntryExitHook(HInstruction * instruction)1685 void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1686 SlowPathCode* slow_path =
1687 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
1688 LocationSummary* locations = instruction->GetLocations();
1689 codegen_->AddSlowPath(slow_path);
1690
1691 if (instruction->IsMethodExitHook()) {
1692 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1693 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1694 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1695 // disabled in debuggable runtime. The other bit is used when this method itself requires a
1696 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1697 __ cmpl(Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()),
1698 Immediate(0));
1699 __ j(kNotEqual, slow_path->GetEntryLabel());
1700 }
1701
1702 uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1703 MemberOffset offset = instruction->IsMethodExitHook() ?
1704 instrumentation::Instrumentation::HaveMethodExitListenersOffset()
1705 : instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1706 __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
1707 __ cmpb(Address(CpuRegister(TMP), 0),
1708 Immediate(instrumentation::Instrumentation::kFastTraceListeners));
1709 // Check if there are any method entry / exit listeners. If no, continue with execution.
1710 __ j(kLess, slow_path->GetExitLabel());
1711 // Check if there are any slow method entry / exit listeners. If yes, take the slow path.
1712 __ j(kGreater, slow_path->GetEntryLabel());
1713
1714 // Check if there is place in the buffer for a new entry, if no, take slow path.
1715 CpuRegister init_entry = locations->GetTemp(0).AsRegister<CpuRegister>();
1716 // Use a register that is different from RAX and RDX. RDTSC returns result in RAX and RDX and we
1717 // use curr entry to store the result into the buffer.
1718 CpuRegister curr_entry = CpuRegister(TMP);
1719 DCHECK(curr_entry.AsRegister() != RAX);
1720 DCHECK(curr_entry.AsRegister() != RDX);
1721 uint64_t trace_buffer_curr_entry_offset =
1722 Thread::TraceBufferCurrPtrOffset<kX86_64PointerSize>().SizeValue();
1723 __ gs()->movq(CpuRegister(curr_entry),
1724 Address::Absolute(trace_buffer_curr_entry_offset, /* no_rip= */ true));
1725 __ subq(CpuRegister(curr_entry), Immediate(kNumEntriesForWallClock * sizeof(void*)));
1726 __ gs()->movq(init_entry,
1727 Address::Absolute(Thread::TraceBufferPtrOffset<kX86_64PointerSize>().SizeValue(),
1728 /* no_rip= */ true));
1729 __ cmpq(curr_entry, init_entry);
1730 __ j(kLess, slow_path->GetEntryLabel());
1731
1732 // Update the index in the `Thread`.
1733 __ gs()->movq(Address::Absolute(trace_buffer_curr_entry_offset, /* no_rip= */ true),
1734 CpuRegister(curr_entry));
1735
1736 // Record method pointer and action.
1737 CpuRegister method = init_entry;
1738 __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1739 // Use last two bits to encode trace method action. For MethodEntry it is 0
1740 // so no need to set the bits since they are 0 already.
1741 if (instruction->IsMethodExitHook()) {
1742 DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1743 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1744 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1745 __ orq(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1746 }
1747 __ movq(Address(curr_entry, kMethodOffsetInBytes), CpuRegister(method));
1748 // Get the timestamp. rdtsc returns timestamp in RAX + RDX even in 64-bit architectures.
1749 __ rdtsc();
1750 __ shlq(CpuRegister(RDX), Immediate(32));
1751 __ orq(CpuRegister(RAX), CpuRegister(RDX));
1752 __ movq(Address(curr_entry, kTimestampOffsetInBytes), CpuRegister(RAX));
1753 __ Bind(slow_path->GetExitLabel());
1754 }
1755
VisitMethodEntryHook(HMethodEntryHook * instruction)1756 void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1757 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1758 DCHECK(codegen_->RequiresCurrentMethod());
1759 GenerateMethodEntryExitHook(instruction);
1760 }
1761
SetInForReturnValue(HInstruction * instr,LocationSummary * locations)1762 void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) {
1763 switch (instr->InputAt(0)->GetType()) {
1764 case DataType::Type::kReference:
1765 case DataType::Type::kBool:
1766 case DataType::Type::kUint8:
1767 case DataType::Type::kInt8:
1768 case DataType::Type::kUint16:
1769 case DataType::Type::kInt16:
1770 case DataType::Type::kInt32:
1771 case DataType::Type::kInt64:
1772 locations->SetInAt(0, Location::RegisterLocation(RAX));
1773 break;
1774
1775 case DataType::Type::kFloat32:
1776 case DataType::Type::kFloat64:
1777 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1778 break;
1779
1780 case DataType::Type::kVoid:
1781 locations->SetInAt(0, Location::NoLocation());
1782 break;
1783
1784 default:
1785 LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType();
1786 }
1787 }
1788
VisitMethodExitHook(HMethodExitHook * method_hook)1789 void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1790 LocationSummary* locations = new (GetGraph()->GetAllocator())
1791 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1792 SetInForReturnValue(method_hook, locations);
1793 // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1794 // two 32-bit values in EAX + EDX even on 64-bit architectures.
1795 locations->AddTemp(Location::RegisterLocation(RAX));
1796 locations->AddTemp(Location::RegisterLocation(RDX));
1797 }
1798
VisitMethodExitHook(HMethodExitHook * instruction)1799 void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
1800 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1801 DCHECK(codegen_->RequiresCurrentMethod());
1802 GenerateMethodEntryExitHook(instruction);
1803 }
1804
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1805 void CodeGeneratorX86_64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1806 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1807 NearLabel overflow;
1808 Register method = kMethodRegisterArgument;
1809 if (!is_frame_entry) {
1810 CHECK(RequiresCurrentMethod());
1811 method = TMP;
1812 __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1813 }
1814 __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1815 Immediate(interpreter::kNterpHotnessValue));
1816 __ j(kEqual, &overflow);
1817 __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1818 Immediate(-1));
1819 __ Bind(&overflow);
1820 }
1821
1822 if (GetGraph()->IsCompilingBaseline() &&
1823 GetGraph()->IsUsefulOptimizing() &&
1824 !Runtime::Current()->IsAotCompiler()) {
1825 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1826 DCHECK(info != nullptr);
1827 CHECK(!HasEmptyFrame());
1828 uint64_t address = reinterpret_cast64<uint64_t>(info) +
1829 ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1830 SlowPathCode* slow_path =
1831 new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64(suspend_check, address);
1832 AddSlowPath(slow_path);
1833 // Note: if the address was in the 32bit range, we could use
1834 // Address::Absolute and avoid this movq.
1835 __ movq(CpuRegister(TMP), Immediate(address));
1836 // With multiple threads, this can overflow. This is OK, we will eventually get to see
1837 // it reaching 0. Also, at this point we have no register available to look
1838 // at the counter directly.
1839 __ addw(Address(CpuRegister(TMP), 0), Immediate(-1));
1840 __ j(kEqual, slow_path->GetEntryLabel());
1841 __ Bind(slow_path->GetExitLabel());
1842 }
1843 }
1844
GenerateFrameEntry()1845 void CodeGeneratorX86_64::GenerateFrameEntry() {
1846 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1847
1848 // Check if we need to generate the clinit check. We will jump to the
1849 // resolution stub if the class is not initialized and the executing thread is
1850 // not the thread initializing it.
1851 // We do this before constructing the frame to get the correct stack trace if
1852 // an exception is thrown.
1853 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1854 NearLabel resolution;
1855 // Check if we're visibly initialized.
1856
1857 // We don't emit a read barrier here to save on code size. We rely on the
1858 // resolution trampoline to do a suspend check before re-entering this code.
1859 __ movl(CpuRegister(TMP),
1860 Address(CpuRegister(kMethodRegisterArgument),
1861 ArtMethod::DeclaringClassOffset().Int32Value()));
1862 __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1863 Immediate(kShiftedVisiblyInitializedValue));
1864 __ j(kAboveEqual, &frame_entry_label_);
1865
1866 // Check if we're initializing and the thread initializing is the one
1867 // executing the code.
1868 __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1869 Immediate(kShiftedInitializingValue));
1870 __ j(kBelow, &resolution);
1871
1872 __ movl(CpuRegister(TMP),
1873 Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value()));
1874 __ gs()->cmpl(
1875 CpuRegister(TMP),
1876 Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true));
1877 __ j(kEqual, &frame_entry_label_);
1878 __ Bind(&resolution);
1879
1880 // Jump to the resolution stub.
1881 ThreadOffset64 entrypoint_offset =
1882 GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline);
1883 __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true));
1884 }
1885
1886 __ Bind(&frame_entry_label_);
1887 bool skip_overflow_check = IsLeafMethod()
1888 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1889 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1890
1891
1892 if (!skip_overflow_check) {
1893 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1894 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1895 RecordPcInfo(nullptr, 0);
1896 }
1897
1898 if (!HasEmptyFrame()) {
1899 // Make sure the frame size isn't unreasonably large.
1900 DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1901
1902 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1903 Register reg = kCoreCalleeSaves[i];
1904 if (allocated_registers_.ContainsCoreRegister(reg)) {
1905 __ pushq(CpuRegister(reg));
1906 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1907 __ cfi().RelOffset(DWARFReg(reg), 0);
1908 }
1909 }
1910
1911 int adjust = GetFrameSize() - GetCoreSpillSize();
1912 IncreaseFrame(adjust);
1913 uint32_t xmm_spill_location = GetFpuSpillStart();
1914 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1915
1916 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1917 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1918 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1919 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1920 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1921 }
1922 }
1923
1924 // Save the current method if we need it. Note that we do not
1925 // do this in HCurrentMethod, as the instruction might have been removed
1926 // in the SSA graph.
1927 if (RequiresCurrentMethod()) {
1928 CHECK(!HasEmptyFrame());
1929 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1930 CpuRegister(kMethodRegisterArgument));
1931 }
1932
1933 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1934 CHECK(!HasEmptyFrame());
1935 // Initialize should_deoptimize flag to 0.
1936 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1937 }
1938 }
1939
1940 MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1941 }
1942
GenerateFrameExit()1943 void CodeGeneratorX86_64::GenerateFrameExit() {
1944 __ cfi().RememberState();
1945 if (!HasEmptyFrame()) {
1946 uint32_t xmm_spill_location = GetFpuSpillStart();
1947 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1948 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1949 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1950 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1951 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1952 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1953 }
1954 }
1955
1956 int adjust = GetFrameSize() - GetCoreSpillSize();
1957 DecreaseFrame(adjust);
1958
1959 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1960 Register reg = kCoreCalleeSaves[i];
1961 if (allocated_registers_.ContainsCoreRegister(reg)) {
1962 __ popq(CpuRegister(reg));
1963 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1964 __ cfi().Restore(DWARFReg(reg));
1965 }
1966 }
1967 }
1968 __ ret();
1969 __ cfi().RestoreState();
1970 __ cfi().DefCFAOffset(GetFrameSize());
1971 }
1972
Bind(HBasicBlock * block)1973 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1974 __ Bind(GetLabelOf(block));
1975 }
1976
Move(Location destination,Location source)1977 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1978 if (source.Equals(destination)) {
1979 return;
1980 }
1981 if (destination.IsRegister()) {
1982 CpuRegister dest = destination.AsRegister<CpuRegister>();
1983 if (source.IsRegister()) {
1984 __ movq(dest, source.AsRegister<CpuRegister>());
1985 } else if (source.IsFpuRegister()) {
1986 __ movd(dest, source.AsFpuRegister<XmmRegister>());
1987 } else if (source.IsStackSlot()) {
1988 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1989 } else if (source.IsConstant()) {
1990 HConstant* constant = source.GetConstant();
1991 if (constant->IsLongConstant()) {
1992 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1993 } else if (constant->IsDoubleConstant()) {
1994 Load64BitValue(dest, GetInt64ValueOf(constant));
1995 } else {
1996 Load32BitValue(dest, GetInt32ValueOf(constant));
1997 }
1998 } else {
1999 DCHECK(source.IsDoubleStackSlot());
2000 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2001 }
2002 } else if (destination.IsFpuRegister()) {
2003 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
2004 if (source.IsRegister()) {
2005 __ movd(dest, source.AsRegister<CpuRegister>());
2006 } else if (source.IsFpuRegister()) {
2007 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
2008 } else if (source.IsConstant()) {
2009 HConstant* constant = source.GetConstant();
2010 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
2011 if (constant->IsFloatConstant()) {
2012 Load32BitValue(dest, static_cast<int32_t>(value));
2013 } else {
2014 Load64BitValue(dest, value);
2015 }
2016 } else if (source.IsStackSlot()) {
2017 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2018 } else {
2019 DCHECK(source.IsDoubleStackSlot());
2020 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2021 }
2022 } else if (destination.IsStackSlot()) {
2023 if (source.IsRegister()) {
2024 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
2025 source.AsRegister<CpuRegister>());
2026 } else if (source.IsFpuRegister()) {
2027 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
2028 source.AsFpuRegister<XmmRegister>());
2029 } else if (source.IsConstant()) {
2030 HConstant* constant = source.GetConstant();
2031 int32_t value = GetInt32ValueOf(constant);
2032 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
2033 } else {
2034 DCHECK(source.IsStackSlot()) << source;
2035 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2036 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2037 }
2038 } else {
2039 DCHECK(destination.IsDoubleStackSlot());
2040 if (source.IsRegister()) {
2041 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
2042 source.AsRegister<CpuRegister>());
2043 } else if (source.IsFpuRegister()) {
2044 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
2045 source.AsFpuRegister<XmmRegister>());
2046 } else if (source.IsConstant()) {
2047 HConstant* constant = source.GetConstant();
2048 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
2049 int64_t value = GetInt64ValueOf(constant);
2050 Store64BitValueToStack(destination, value);
2051 } else {
2052 DCHECK(source.IsDoubleStackSlot());
2053 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2054 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2055 }
2056 }
2057 }
2058
LoadFromMemoryNoReference(DataType::Type type,Location dst,Address src)2059 void CodeGeneratorX86_64::LoadFromMemoryNoReference(DataType::Type type,
2060 Location dst,
2061 Address src) {
2062 switch (type) {
2063 case DataType::Type::kBool:
2064 case DataType::Type::kUint8:
2065 __ movzxb(dst.AsRegister<CpuRegister>(), src);
2066 break;
2067 case DataType::Type::kInt8:
2068 __ movsxb(dst.AsRegister<CpuRegister>(), src);
2069 break;
2070 case DataType::Type::kUint16:
2071 __ movzxw(dst.AsRegister<CpuRegister>(), src);
2072 break;
2073 case DataType::Type::kInt16:
2074 __ movsxw(dst.AsRegister<CpuRegister>(), src);
2075 break;
2076 case DataType::Type::kInt32:
2077 case DataType::Type::kUint32:
2078 __ movl(dst.AsRegister<CpuRegister>(), src);
2079 break;
2080 case DataType::Type::kInt64:
2081 case DataType::Type::kUint64:
2082 __ movq(dst.AsRegister<CpuRegister>(), src);
2083 break;
2084 case DataType::Type::kFloat32:
2085 __ movss(dst.AsFpuRegister<XmmRegister>(), src);
2086 break;
2087 case DataType::Type::kFloat64:
2088 __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
2089 break;
2090 case DataType::Type::kVoid:
2091 case DataType::Type::kReference:
2092 LOG(FATAL) << "Unreachable type " << type;
2093 UNREACHABLE();
2094 }
2095 }
2096
MoveConstant(Location location,int32_t value)2097 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
2098 DCHECK(location.IsRegister());
2099 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
2100 }
2101
MoveLocation(Location dst,Location src,DataType::Type dst_type)2102 void CodeGeneratorX86_64::MoveLocation(Location dst,
2103 Location src,
2104 [[maybe_unused]] DataType::Type dst_type) {
2105 Move(dst, src);
2106 }
2107
AddLocationAsTemp(Location location,LocationSummary * locations)2108 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
2109 if (location.IsRegister()) {
2110 locations->AddTemp(location);
2111 } else {
2112 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2113 }
2114 }
2115
HandleGoto(HInstruction * got,HBasicBlock * successor)2116 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2117 if (successor->IsExitBlock()) {
2118 DCHECK(got->GetPrevious()->AlwaysThrows());
2119 return; // no code needed
2120 }
2121
2122 HBasicBlock* block = got->GetBlock();
2123 HInstruction* previous = got->GetPrevious();
2124
2125 HLoopInformation* info = block->GetLoopInformation();
2126 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2127 codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
2128 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2129 return;
2130 }
2131
2132 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2133 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2134 }
2135 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
2136 __ jmp(codegen_->GetLabelOf(successor));
2137 }
2138 }
2139
VisitGoto(HGoto * got)2140 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
2141 got->SetLocations(nullptr);
2142 }
2143
VisitGoto(HGoto * got)2144 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
2145 HandleGoto(got, got->GetSuccessor());
2146 }
2147
VisitTryBoundary(HTryBoundary * try_boundary)2148 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2149 try_boundary->SetLocations(nullptr);
2150 }
2151
VisitTryBoundary(HTryBoundary * try_boundary)2152 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2153 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2154 if (!successor->IsExitBlock()) {
2155 HandleGoto(try_boundary, successor);
2156 }
2157 }
2158
VisitExit(HExit * exit)2159 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
2160 exit->SetLocations(nullptr);
2161 }
2162
VisitExit(HExit * exit)2163 void InstructionCodeGeneratorX86_64::VisitExit([[maybe_unused]] HExit* exit) {}
2164
2165 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)2166 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
2167 LabelType* true_label,
2168 LabelType* false_label) {
2169 if (cond->IsFPConditionTrueIfNaN()) {
2170 __ j(kUnordered, true_label);
2171 } else if (cond->IsFPConditionFalseIfNaN()) {
2172 __ j(kUnordered, false_label);
2173 }
2174 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
2175 }
2176
GenerateCompareTest(HCondition * condition)2177 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
2178 LocationSummary* locations = condition->GetLocations();
2179
2180 Location left = locations->InAt(0);
2181 Location right = locations->InAt(1);
2182 DataType::Type type = condition->InputAt(0)->GetType();
2183 switch (type) {
2184 case DataType::Type::kBool:
2185 case DataType::Type::kUint8:
2186 case DataType::Type::kInt8:
2187 case DataType::Type::kUint16:
2188 case DataType::Type::kInt16:
2189 case DataType::Type::kInt32:
2190 case DataType::Type::kReference: {
2191 codegen_->GenerateIntCompare(left, right);
2192 break;
2193 }
2194 case DataType::Type::kInt64: {
2195 codegen_->GenerateLongCompare(left, right);
2196 break;
2197 }
2198 case DataType::Type::kFloat32: {
2199 if (right.IsFpuRegister()) {
2200 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2201 } else if (right.IsConstant()) {
2202 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2203 codegen_->LiteralFloatAddress(
2204 right.GetConstant()->AsFloatConstant()->GetValue()));
2205 } else {
2206 DCHECK(right.IsStackSlot());
2207 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2208 Address(CpuRegister(RSP), right.GetStackIndex()));
2209 }
2210 break;
2211 }
2212 case DataType::Type::kFloat64: {
2213 if (right.IsFpuRegister()) {
2214 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2215 } else if (right.IsConstant()) {
2216 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2217 codegen_->LiteralDoubleAddress(
2218 right.GetConstant()->AsDoubleConstant()->GetValue()));
2219 } else {
2220 DCHECK(right.IsDoubleStackSlot());
2221 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2222 Address(CpuRegister(RSP), right.GetStackIndex()));
2223 }
2224 break;
2225 }
2226 default:
2227 LOG(FATAL) << "Unexpected condition type " << type;
2228 }
2229 }
2230
2231 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2232 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
2233 LabelType* true_target_in,
2234 LabelType* false_target_in) {
2235 // Generated branching requires both targets to be explicit. If either of the
2236 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2237 LabelType fallthrough_target;
2238 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2239 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2240
2241 // Generate the comparison to set the CC.
2242 GenerateCompareTest(condition);
2243
2244 // Now generate the correct jump(s).
2245 DataType::Type type = condition->InputAt(0)->GetType();
2246 switch (type) {
2247 case DataType::Type::kInt64: {
2248 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2249 break;
2250 }
2251 case DataType::Type::kFloat32: {
2252 GenerateFPJumps(condition, true_target, false_target);
2253 break;
2254 }
2255 case DataType::Type::kFloat64: {
2256 GenerateFPJumps(condition, true_target, false_target);
2257 break;
2258 }
2259 default:
2260 LOG(FATAL) << "Unexpected condition type " << type;
2261 }
2262
2263 if (false_target != &fallthrough_target) {
2264 __ jmp(false_target);
2265 }
2266
2267 if (fallthrough_target.IsLinked()) {
2268 __ Bind(&fallthrough_target);
2269 }
2270 }
2271
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch,const CompilerOptions & compiler_options)2272 static bool AreEflagsSetFrom(HInstruction* cond,
2273 HInstruction* branch,
2274 const CompilerOptions& compiler_options) {
2275 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2276 // are set only strictly before `branch`. We can't use the eflags on long
2277 // conditions if they are materialized due to the complex branching.
2278 return cond->IsCondition() &&
2279 cond->GetNext() == branch &&
2280 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
2281 !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
2282 compiler_options.ProfileBranches());
2283 }
2284
2285 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2286 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
2287 size_t condition_input_index,
2288 LabelType* true_target,
2289 LabelType* false_target) {
2290 HInstruction* cond = instruction->InputAt(condition_input_index);
2291
2292 if (true_target == nullptr && false_target == nullptr) {
2293 // Nothing to do. The code always falls through.
2294 return;
2295 } else if (cond->IsIntConstant()) {
2296 // Constant condition, statically compared against "true" (integer value 1).
2297 if (cond->AsIntConstant()->IsTrue()) {
2298 if (true_target != nullptr) {
2299 __ jmp(true_target);
2300 }
2301 } else {
2302 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2303 if (false_target != nullptr) {
2304 __ jmp(false_target);
2305 }
2306 }
2307 return;
2308 }
2309
2310 // The following code generates these patterns:
2311 // (1) true_target == nullptr && false_target != nullptr
2312 // - opposite condition true => branch to false_target
2313 // (2) true_target != nullptr && false_target == nullptr
2314 // - condition true => branch to true_target
2315 // (3) true_target != nullptr && false_target != nullptr
2316 // - condition true => branch to true_target
2317 // - branch to false_target
2318 if (IsBooleanValueOrMaterializedCondition(cond)) {
2319 if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
2320 if (true_target == nullptr) {
2321 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
2322 } else {
2323 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
2324 }
2325 } else {
2326 // Materialized condition, compare against 0.
2327 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2328 if (lhs.IsRegister()) {
2329 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
2330 } else {
2331 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
2332 }
2333 if (true_target == nullptr) {
2334 __ j(kEqual, false_target);
2335 } else {
2336 __ j(kNotEqual, true_target);
2337 }
2338 }
2339 } else {
2340 // Condition has not been materialized, use its inputs as the
2341 // comparison and its condition as the branch condition.
2342 HCondition* condition = cond->AsCondition();
2343
2344 // If this is a long or FP comparison that has been folded into
2345 // the HCondition, generate the comparison directly.
2346 DataType::Type type = condition->InputAt(0)->GetType();
2347 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2348 GenerateCompareTestAndBranch(condition, true_target, false_target);
2349 return;
2350 }
2351
2352 Location lhs = condition->GetLocations()->InAt(0);
2353 Location rhs = condition->GetLocations()->InAt(1);
2354 codegen_->GenerateIntCompare(lhs, rhs);
2355 if (true_target == nullptr) {
2356 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
2357 } else {
2358 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2359 }
2360 }
2361
2362 // If neither branch falls through (case 3), the conditional branch to `true_target`
2363 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2364 if (true_target != nullptr && false_target != nullptr) {
2365 __ jmp(false_target);
2366 }
2367 }
2368
VisitIf(HIf * if_instr)2369 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
2370 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2371 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2372 if (GetGraph()->IsCompilingBaseline() &&
2373 codegen_->GetCompilerOptions().ProfileBranches() &&
2374 !Runtime::Current()->IsAotCompiler()) {
2375 locations->SetInAt(0, Location::RequiresRegister());
2376 locations->AddTemp(Location::RequiresRegister());
2377 } else {
2378 locations->SetInAt(0, Location::Any());
2379 }
2380 }
2381 }
2382
VisitIf(HIf * if_instr)2383 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
2384 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2385 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2386 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2387 nullptr : codegen_->GetLabelOf(true_successor);
2388 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2389 nullptr : codegen_->GetLabelOf(false_successor);
2390 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2391 if (GetGraph()->IsCompilingBaseline() &&
2392 codegen_->GetCompilerOptions().ProfileBranches() &&
2393 !Runtime::Current()->IsAotCompiler()) {
2394 DCHECK(if_instr->InputAt(0)->IsCondition());
2395 CpuRegister temp = if_instr->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
2396 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2397 DCHECK(info != nullptr);
2398 BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
2399 // Currently, not all If branches are profiled.
2400 if (cache != nullptr) {
2401 uint64_t address =
2402 reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
2403 static_assert(
2404 BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
2405 "Unexpected offsets for BranchCache");
2406 NearLabel done;
2407 Location lhs = if_instr->GetLocations()->InAt(0);
2408 __ movq(CpuRegister(TMP), Immediate(address));
2409 __ movzxw(temp, Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0));
2410 __ addw(temp, Immediate(1));
2411 __ j(kZero, &done);
2412 __ movw(Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0), temp);
2413 __ Bind(&done);
2414 }
2415 }
2416 }
2417 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2418 }
2419
VisitDeoptimize(HDeoptimize * deoptimize)2420 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2421 LocationSummary* locations = new (GetGraph()->GetAllocator())
2422 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2423 InvokeRuntimeCallingConvention calling_convention;
2424 RegisterSet caller_saves = RegisterSet::Empty();
2425 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2426 locations->SetCustomSlowPathCallerSaves(caller_saves);
2427 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2428 locations->SetInAt(0, Location::Any());
2429 }
2430 }
2431
VisitDeoptimize(HDeoptimize * deoptimize)2432 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2433 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
2434 GenerateTestAndBranch<Label>(deoptimize,
2435 /* condition_input_index= */ 0,
2436 slow_path->GetEntryLabel(),
2437 /* false_target= */ nullptr);
2438 }
2439
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2440 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2441 LocationSummary* locations = new (GetGraph()->GetAllocator())
2442 LocationSummary(flag, LocationSummary::kNoCall);
2443 locations->SetOut(Location::RequiresRegister());
2444 }
2445
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2446 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2447 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
2448 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2449 }
2450
SelectCanUseCMOV(HSelect * select)2451 static bool SelectCanUseCMOV(HSelect* select) {
2452 // There are no conditional move instructions for XMMs.
2453 if (DataType::IsFloatingPointType(select->GetType())) {
2454 return false;
2455 }
2456
2457 // A FP condition doesn't generate the single CC that we need.
2458 HInstruction* condition = select->GetCondition();
2459 if (condition->IsCondition() &&
2460 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
2461 return false;
2462 }
2463
2464 // We can generate a CMOV for this Select.
2465 return true;
2466 }
2467
VisitSelect(HSelect * select)2468 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
2469 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2470 if (DataType::IsFloatingPointType(select->GetType())) {
2471 locations->SetInAt(0, Location::RequiresFpuRegister());
2472 locations->SetInAt(1, Location::Any());
2473 } else {
2474 locations->SetInAt(0, Location::RequiresRegister());
2475 if (SelectCanUseCMOV(select)) {
2476 if (select->InputAt(1)->IsConstant()) {
2477 locations->SetInAt(1, Location::RequiresRegister());
2478 } else {
2479 locations->SetInAt(1, Location::Any());
2480 }
2481 } else {
2482 locations->SetInAt(1, Location::Any());
2483 }
2484 }
2485 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2486 locations->SetInAt(2, Location::RequiresRegister());
2487 }
2488 locations->SetOut(Location::SameAsFirstInput());
2489 }
2490
VisitSelect(HSelect * select)2491 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
2492 LocationSummary* locations = select->GetLocations();
2493 if (SelectCanUseCMOV(select)) {
2494 // If both the condition and the source types are integer, we can generate
2495 // a CMOV to implement Select.
2496 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
2497 Location value_true_loc = locations->InAt(1);
2498 DCHECK(locations->InAt(0).Equals(locations->Out()));
2499
2500 HInstruction* select_condition = select->GetCondition();
2501 Condition cond = kNotEqual;
2502
2503 // Figure out how to test the 'condition'.
2504 if (select_condition->IsCondition()) {
2505 HCondition* condition = select_condition->AsCondition();
2506 if (!condition->IsEmittedAtUseSite()) {
2507 // This was a previously materialized condition.
2508 // Can we use the existing condition code?
2509 if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
2510 // Materialization was the previous instruction. Condition codes are right.
2511 cond = X86_64IntegerCondition(condition->GetCondition());
2512 } else {
2513 // No, we have to recreate the condition code.
2514 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2515 __ testl(cond_reg, cond_reg);
2516 }
2517 } else {
2518 GenerateCompareTest(condition);
2519 cond = X86_64IntegerCondition(condition->GetCondition());
2520 }
2521 } else {
2522 // Must be a Boolean condition, which needs to be compared to 0.
2523 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2524 __ testl(cond_reg, cond_reg);
2525 }
2526
2527 // If the condition is true, overwrite the output, which already contains false.
2528 // Generate the correct sized CMOV.
2529 bool is_64_bit = DataType::Is64BitType(select->GetType());
2530 if (value_true_loc.IsRegister()) {
2531 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2532 } else {
2533 __ cmov(cond,
2534 value_false,
2535 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2536 }
2537 } else {
2538 NearLabel false_target;
2539 GenerateTestAndBranch<NearLabel>(select,
2540 /* condition_input_index= */ 2,
2541 /* true_target= */ nullptr,
2542 &false_target);
2543 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2544 __ Bind(&false_target);
2545 }
2546 }
2547
VisitNop(HNop * nop)2548 void LocationsBuilderX86_64::VisitNop(HNop* nop) {
2549 new (GetGraph()->GetAllocator()) LocationSummary(nop);
2550 }
2551
VisitNop(HNop *)2552 void InstructionCodeGeneratorX86_64::VisitNop(HNop*) {
2553 // The environment recording already happened in CodeGenerator::Compile.
2554 }
2555
IncreaseFrame(size_t adjustment)2556 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2557 __ subq(CpuRegister(RSP), Immediate(adjustment));
2558 __ cfi().AdjustCFAOffset(adjustment);
2559 }
2560
DecreaseFrame(size_t adjustment)2561 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2562 __ addq(CpuRegister(RSP), Immediate(adjustment));
2563 __ cfi().AdjustCFAOffset(-adjustment);
2564 }
2565
GenerateNop()2566 void CodeGeneratorX86_64::GenerateNop() {
2567 __ nop();
2568 }
2569
HandleCondition(HCondition * cond)2570 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2571 LocationSummary* locations =
2572 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2573 // Handle the long/FP comparisons made in instruction simplification.
2574 switch (cond->InputAt(0)->GetType()) {
2575 case DataType::Type::kInt64:
2576 locations->SetInAt(0, Location::RequiresRegister());
2577 locations->SetInAt(1, Location::Any());
2578 break;
2579 case DataType::Type::kFloat32:
2580 case DataType::Type::kFloat64:
2581 locations->SetInAt(0, Location::RequiresFpuRegister());
2582 locations->SetInAt(1, Location::Any());
2583 break;
2584 default:
2585 locations->SetInAt(0, Location::RequiresRegister());
2586 locations->SetInAt(1, Location::Any());
2587 break;
2588 }
2589 if (!cond->IsEmittedAtUseSite()) {
2590 locations->SetOut(Location::RequiresRegister());
2591 }
2592 }
2593
HandleCondition(HCondition * cond)2594 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2595 if (cond->IsEmittedAtUseSite()) {
2596 return;
2597 }
2598
2599 LocationSummary* locations = cond->GetLocations();
2600 Location lhs = locations->InAt(0);
2601 Location rhs = locations->InAt(1);
2602 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2603 NearLabel true_label, false_label;
2604
2605 switch (cond->InputAt(0)->GetType()) {
2606 default:
2607 // Integer case.
2608
2609 // Clear output register: setcc only sets the low byte.
2610 __ xorl(reg, reg);
2611
2612 codegen_->GenerateIntCompare(lhs, rhs);
2613 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2614 return;
2615 case DataType::Type::kInt64:
2616 // Clear output register: setcc only sets the low byte.
2617 __ xorl(reg, reg);
2618
2619 codegen_->GenerateLongCompare(lhs, rhs);
2620 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2621 return;
2622 case DataType::Type::kFloat32: {
2623 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2624 if (rhs.IsConstant()) {
2625 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2626 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2627 } else if (rhs.IsStackSlot()) {
2628 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2629 } else {
2630 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2631 }
2632 GenerateFPJumps(cond, &true_label, &false_label);
2633 break;
2634 }
2635 case DataType::Type::kFloat64: {
2636 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2637 if (rhs.IsConstant()) {
2638 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2639 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2640 } else if (rhs.IsDoubleStackSlot()) {
2641 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2642 } else {
2643 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2644 }
2645 GenerateFPJumps(cond, &true_label, &false_label);
2646 break;
2647 }
2648 }
2649
2650 // Convert the jumps into the result.
2651 NearLabel done_label;
2652
2653 // False case: result = 0.
2654 __ Bind(&false_label);
2655 __ xorl(reg, reg);
2656 __ jmp(&done_label);
2657
2658 // True case: result = 1.
2659 __ Bind(&true_label);
2660 __ movl(reg, Immediate(1));
2661 __ Bind(&done_label);
2662 }
2663
VisitEqual(HEqual * comp)2664 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2665 HandleCondition(comp);
2666 }
2667
VisitEqual(HEqual * comp)2668 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2669 HandleCondition(comp);
2670 }
2671
VisitNotEqual(HNotEqual * comp)2672 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2673 HandleCondition(comp);
2674 }
2675
VisitNotEqual(HNotEqual * comp)2676 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2677 HandleCondition(comp);
2678 }
2679
VisitLessThan(HLessThan * comp)2680 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2681 HandleCondition(comp);
2682 }
2683
VisitLessThan(HLessThan * comp)2684 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2685 HandleCondition(comp);
2686 }
2687
VisitLessThanOrEqual(HLessThanOrEqual * comp)2688 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2689 HandleCondition(comp);
2690 }
2691
VisitLessThanOrEqual(HLessThanOrEqual * comp)2692 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2693 HandleCondition(comp);
2694 }
2695
VisitGreaterThan(HGreaterThan * comp)2696 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2697 HandleCondition(comp);
2698 }
2699
VisitGreaterThan(HGreaterThan * comp)2700 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2701 HandleCondition(comp);
2702 }
2703
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2704 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2705 HandleCondition(comp);
2706 }
2707
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2708 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2709 HandleCondition(comp);
2710 }
2711
VisitBelow(HBelow * comp)2712 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2713 HandleCondition(comp);
2714 }
2715
VisitBelow(HBelow * comp)2716 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2717 HandleCondition(comp);
2718 }
2719
VisitBelowOrEqual(HBelowOrEqual * comp)2720 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2721 HandleCondition(comp);
2722 }
2723
VisitBelowOrEqual(HBelowOrEqual * comp)2724 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2725 HandleCondition(comp);
2726 }
2727
VisitAbove(HAbove * comp)2728 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2729 HandleCondition(comp);
2730 }
2731
VisitAbove(HAbove * comp)2732 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2733 HandleCondition(comp);
2734 }
2735
VisitAboveOrEqual(HAboveOrEqual * comp)2736 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2737 HandleCondition(comp);
2738 }
2739
VisitAboveOrEqual(HAboveOrEqual * comp)2740 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2741 HandleCondition(comp);
2742 }
2743
VisitCompare(HCompare * compare)2744 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2745 LocationSummary* locations =
2746 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2747 switch (compare->GetComparisonType()) {
2748 case DataType::Type::kBool:
2749 case DataType::Type::kUint8:
2750 case DataType::Type::kInt8:
2751 case DataType::Type::kUint16:
2752 case DataType::Type::kInt16:
2753 case DataType::Type::kInt32:
2754 case DataType::Type::kUint32:
2755 case DataType::Type::kInt64:
2756 case DataType::Type::kUint64: {
2757 locations->SetInAt(0, Location::RequiresRegister());
2758 locations->SetInAt(1, Location::Any());
2759 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2760 break;
2761 }
2762 case DataType::Type::kFloat32:
2763 case DataType::Type::kFloat64: {
2764 locations->SetInAt(0, Location::RequiresFpuRegister());
2765 locations->SetInAt(1, Location::Any());
2766 locations->SetOut(Location::RequiresRegister());
2767 break;
2768 }
2769 default:
2770 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2771 }
2772 }
2773
VisitCompare(HCompare * compare)2774 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2775 LocationSummary* locations = compare->GetLocations();
2776 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2777 Location left = locations->InAt(0);
2778 Location right = locations->InAt(1);
2779
2780 NearLabel less, greater, done;
2781 DataType::Type type = compare->GetComparisonType();
2782 Condition less_cond = kLess;
2783
2784 switch (type) {
2785 case DataType::Type::kUint32:
2786 less_cond = kBelow;
2787 FALLTHROUGH_INTENDED;
2788 case DataType::Type::kBool:
2789 case DataType::Type::kUint8:
2790 case DataType::Type::kInt8:
2791 case DataType::Type::kUint16:
2792 case DataType::Type::kInt16:
2793 case DataType::Type::kInt32: {
2794 codegen_->GenerateIntCompare(left, right);
2795 break;
2796 }
2797 case DataType::Type::kUint64:
2798 less_cond = kBelow;
2799 FALLTHROUGH_INTENDED;
2800 case DataType::Type::kInt64: {
2801 codegen_->GenerateLongCompare(left, right);
2802 break;
2803 }
2804 case DataType::Type::kFloat32: {
2805 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2806 if (right.IsConstant()) {
2807 float value = right.GetConstant()->AsFloatConstant()->GetValue();
2808 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2809 } else if (right.IsStackSlot()) {
2810 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2811 } else {
2812 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2813 }
2814 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2815 less_cond = kBelow; // ucomis{s,d} sets CF
2816 break;
2817 }
2818 case DataType::Type::kFloat64: {
2819 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2820 if (right.IsConstant()) {
2821 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2822 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2823 } else if (right.IsDoubleStackSlot()) {
2824 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2825 } else {
2826 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2827 }
2828 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2829 less_cond = kBelow; // ucomis{s,d} sets CF
2830 break;
2831 }
2832 default:
2833 LOG(FATAL) << "Unexpected compare type " << type;
2834 }
2835
2836 __ movl(out, Immediate(0));
2837 __ j(kEqual, &done);
2838 __ j(less_cond, &less);
2839
2840 __ Bind(&greater);
2841 __ movl(out, Immediate(1));
2842 __ jmp(&done);
2843
2844 __ Bind(&less);
2845 __ movl(out, Immediate(-1));
2846
2847 __ Bind(&done);
2848 }
2849
VisitIntConstant(HIntConstant * constant)2850 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2851 LocationSummary* locations =
2852 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2853 locations->SetOut(Location::ConstantLocation(constant));
2854 }
2855
VisitIntConstant(HIntConstant * constant)2856 void InstructionCodeGeneratorX86_64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
2857 // Will be generated at use site.
2858 }
2859
VisitNullConstant(HNullConstant * constant)2860 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2861 LocationSummary* locations =
2862 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2863 locations->SetOut(Location::ConstantLocation(constant));
2864 }
2865
VisitNullConstant(HNullConstant * constant)2866 void InstructionCodeGeneratorX86_64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
2867 // Will be generated at use site.
2868 }
2869
VisitLongConstant(HLongConstant * constant)2870 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2871 LocationSummary* locations =
2872 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2873 locations->SetOut(Location::ConstantLocation(constant));
2874 }
2875
VisitLongConstant(HLongConstant * constant)2876 void InstructionCodeGeneratorX86_64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
2877 // Will be generated at use site.
2878 }
2879
VisitFloatConstant(HFloatConstant * constant)2880 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2881 LocationSummary* locations =
2882 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2883 locations->SetOut(Location::ConstantLocation(constant));
2884 }
2885
VisitFloatConstant(HFloatConstant * constant)2886 void InstructionCodeGeneratorX86_64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
2887 // Will be generated at use site.
2888 }
2889
VisitDoubleConstant(HDoubleConstant * constant)2890 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2891 LocationSummary* locations =
2892 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2893 locations->SetOut(Location::ConstantLocation(constant));
2894 }
2895
VisitDoubleConstant(HDoubleConstant * constant)2896 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2897 [[maybe_unused]] HDoubleConstant* constant) {
2898 // Will be generated at use site.
2899 }
2900
VisitConstructorFence(HConstructorFence * constructor_fence)2901 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2902 constructor_fence->SetLocations(nullptr);
2903 }
2904
VisitConstructorFence(HConstructorFence * constructor_fence)2905 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2906 [[maybe_unused]] HConstructorFence* constructor_fence) {
2907 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2908 }
2909
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2910 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2911 memory_barrier->SetLocations(nullptr);
2912 }
2913
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2914 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2915 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2916 }
2917
VisitReturnVoid(HReturnVoid * ret)2918 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2919 ret->SetLocations(nullptr);
2920 }
2921
VisitReturnVoid(HReturnVoid * ret)2922 void InstructionCodeGeneratorX86_64::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
2923 codegen_->GenerateFrameExit();
2924 }
2925
VisitReturn(HReturn * ret)2926 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2927 LocationSummary* locations =
2928 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2929 SetInForReturnValue(ret, locations);
2930 }
2931
VisitReturn(HReturn * ret)2932 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2933 switch (ret->InputAt(0)->GetType()) {
2934 case DataType::Type::kReference:
2935 case DataType::Type::kBool:
2936 case DataType::Type::kUint8:
2937 case DataType::Type::kInt8:
2938 case DataType::Type::kUint16:
2939 case DataType::Type::kInt16:
2940 case DataType::Type::kInt32:
2941 case DataType::Type::kInt64:
2942 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2943 break;
2944
2945 case DataType::Type::kFloat32: {
2946 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2947 XMM0);
2948 // To simplify callers of an OSR method, we put the return value in both
2949 // floating point and core register.
2950 if (GetGraph()->IsCompilingOsr()) {
2951 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2952 }
2953 break;
2954 }
2955 case DataType::Type::kFloat64: {
2956 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2957 XMM0);
2958 // To simplify callers of an OSR method, we put the return value in both
2959 // floating point and core register.
2960 if (GetGraph()->IsCompilingOsr()) {
2961 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2962 }
2963 break;
2964 }
2965
2966 default:
2967 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2968 }
2969 codegen_->GenerateFrameExit();
2970 }
2971
GetReturnLocation(DataType::Type type) const2972 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2973 switch (type) {
2974 case DataType::Type::kReference:
2975 case DataType::Type::kBool:
2976 case DataType::Type::kUint8:
2977 case DataType::Type::kInt8:
2978 case DataType::Type::kUint16:
2979 case DataType::Type::kInt16:
2980 case DataType::Type::kUint32:
2981 case DataType::Type::kInt32:
2982 case DataType::Type::kUint64:
2983 case DataType::Type::kInt64:
2984 return Location::RegisterLocation(RAX);
2985
2986 case DataType::Type::kVoid:
2987 return Location::NoLocation();
2988
2989 case DataType::Type::kFloat64:
2990 case DataType::Type::kFloat32:
2991 return Location::FpuRegisterLocation(XMM0);
2992 }
2993 }
2994
GetMethodLocation() const2995 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2996 return Location::RegisterLocation(kMethodRegisterArgument);
2997 }
2998
GetNextLocation(DataType::Type type)2999 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
3000 switch (type) {
3001 case DataType::Type::kReference:
3002 case DataType::Type::kBool:
3003 case DataType::Type::kUint8:
3004 case DataType::Type::kInt8:
3005 case DataType::Type::kUint16:
3006 case DataType::Type::kInt16:
3007 case DataType::Type::kInt32: {
3008 uint32_t index = gp_index_++;
3009 stack_index_++;
3010 if (index < calling_convention.GetNumberOfRegisters()) {
3011 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
3012 } else {
3013 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
3014 }
3015 }
3016
3017 case DataType::Type::kInt64: {
3018 uint32_t index = gp_index_;
3019 stack_index_ += 2;
3020 if (index < calling_convention.GetNumberOfRegisters()) {
3021 gp_index_ += 1;
3022 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
3023 } else {
3024 gp_index_ += 2;
3025 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
3026 }
3027 }
3028
3029 case DataType::Type::kFloat32: {
3030 uint32_t index = float_index_++;
3031 stack_index_++;
3032 if (index < calling_convention.GetNumberOfFpuRegisters()) {
3033 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3034 } else {
3035 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
3036 }
3037 }
3038
3039 case DataType::Type::kFloat64: {
3040 uint32_t index = float_index_++;
3041 stack_index_ += 2;
3042 if (index < calling_convention.GetNumberOfFpuRegisters()) {
3043 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3044 } else {
3045 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
3046 }
3047 }
3048
3049 case DataType::Type::kUint32:
3050 case DataType::Type::kUint64:
3051 case DataType::Type::kVoid:
3052 LOG(FATAL) << "Unexpected parameter type " << type;
3053 UNREACHABLE();
3054 }
3055 return Location::NoLocation();
3056 }
3057
GetNextLocation(DataType::Type type)3058 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
3059 DCHECK_NE(type, DataType::Type::kReference);
3060
3061 Location location = Location::NoLocation();
3062 if (DataType::IsFloatingPointType(type)) {
3063 if (fpr_index_ < kParameterFloatRegistersLength) {
3064 location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
3065 ++fpr_index_;
3066 }
3067 } else {
3068 // Native ABI uses the same registers as managed, except that the method register RDI
3069 // is a normal argument.
3070 if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
3071 location = Location::RegisterLocation(
3072 gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
3073 ++gpr_index_;
3074 }
3075 }
3076 if (location.IsInvalid()) {
3077 if (DataType::Is64BitType(type)) {
3078 location = Location::DoubleStackSlot(stack_offset_);
3079 } else {
3080 location = Location::StackSlot(stack_offset_);
3081 }
3082 stack_offset_ += kFramePointerSize;
3083
3084 if (for_register_allocation_) {
3085 location = Location::Any();
3086 }
3087 }
3088 return location;
3089 }
3090
GetReturnLocation(DataType::Type type) const3091 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
3092 const {
3093 // We perform conversion to the managed ABI return register after the call if needed.
3094 InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
3095 return dex_calling_convention.GetReturnLocation(type);
3096 }
3097
GetMethodLocation() const3098 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
3099 // Pass the method in the hidden argument RAX.
3100 return Location::RegisterLocation(RAX);
3101 }
3102
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3103 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3104 // The trampoline uses the same calling convention as dex calling conventions,
3105 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3106 // the method_idx.
3107 HandleInvoke(invoke);
3108 }
3109
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3110 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3111 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3112 }
3113
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3114 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3115 // Explicit clinit checks triggered by static invokes must have been pruned by
3116 // art::PrepareForRegisterAllocation.
3117 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3118
3119 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3120 if (intrinsic.TryDispatch(invoke)) {
3121 return;
3122 }
3123
3124 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3125 CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
3126 /*for_register_allocation=*/ true);
3127 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3128 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
3129 } else {
3130 HandleInvoke(invoke);
3131 }
3132 }
3133
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3134 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3135 if (invoke->GetLocations()->Intrinsified()) {
3136 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
3137 intrinsic.Dispatch(invoke);
3138 return true;
3139 }
3140 return false;
3141 }
3142
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3143 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3144 // Explicit clinit checks triggered by static invokes must have been pruned by
3145 // art::PrepareForRegisterAllocation.
3146 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3147
3148 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3149 return;
3150 }
3151
3152 LocationSummary* locations = invoke->GetLocations();
3153 codegen_->GenerateStaticOrDirectCall(
3154 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3155 }
3156
HandleInvoke(HInvoke * invoke)3157 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
3158 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
3159 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3160 }
3161
VisitInvokeVirtual(HInvokeVirtual * invoke)3162 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3163 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3164 if (intrinsic.TryDispatch(invoke)) {
3165 return;
3166 }
3167
3168 HandleInvoke(invoke);
3169 }
3170
VisitInvokeVirtual(HInvokeVirtual * invoke)3171 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3172 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3173 return;
3174 }
3175
3176 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3177 DCHECK(!codegen_->IsLeafMethod());
3178 }
3179
VisitInvokeInterface(HInvokeInterface * invoke)3180 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3181 HandleInvoke(invoke);
3182 // Add the hidden argument.
3183 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3184 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
3185 Location::RegisterLocation(RAX));
3186 }
3187 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
3188 }
3189
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)3190 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3191 CpuRegister klass) {
3192 DCHECK_EQ(RDI, klass.AsRegister());
3193 if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
3194 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3195 DCHECK(info != nullptr);
3196 InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
3197 info, GetCompilerOptions(), instruction->AsInvoke());
3198 if (cache != nullptr) {
3199 uint64_t address = reinterpret_cast64<uint64_t>(cache);
3200 NearLabel done;
3201 __ movq(CpuRegister(TMP), Immediate(address));
3202 // Fast path for a monomorphic cache.
3203 __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
3204 __ j(kEqual, &done);
3205 GenerateInvokeRuntime(
3206 GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
3207 __ Bind(&done);
3208 } else {
3209 // This is unexpected, but we don't guarantee stable compilation across
3210 // JIT runs so just warn about it.
3211 ScopedObjectAccess soa(Thread::Current());
3212 LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
3213 }
3214 }
3215 }
3216
VisitInvokeInterface(HInvokeInterface * invoke)3217 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3218 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3219 LocationSummary* locations = invoke->GetLocations();
3220 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3221 Location receiver = locations->InAt(0);
3222 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
3223
3224 if (receiver.IsStackSlot()) {
3225 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
3226 // /* HeapReference<Class> */ temp = temp->klass_
3227 __ movl(temp, Address(temp, class_offset));
3228 } else {
3229 // /* HeapReference<Class> */ temp = receiver->klass_
3230 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
3231 }
3232 codegen_->MaybeRecordImplicitNullCheck(invoke);
3233 // Instead of simply (possibly) unpoisoning `temp` here, we should
3234 // emit a read barrier for the previous class reference load.
3235 // However this is not required in practice, as this is an
3236 // intermediate/temporary reference and because the current
3237 // concurrent copying collector keeps the from-space memory
3238 // intact/accessible until the end of the marking phase (the
3239 // concurrent copying collector may not in the future).
3240 __ MaybeUnpoisonHeapReference(temp);
3241
3242 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3243
3244 if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
3245 invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
3246 Location hidden_reg = locations->GetTemp(1);
3247 // Set the hidden argument. This is safe to do this here, as RAX
3248 // won't be modified thereafter, before the `call` instruction.
3249 // We also do it after MaybeGenerateInlineCache that may use RAX.
3250 DCHECK_EQ(RAX, hidden_reg.AsRegister<Register>());
3251 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3252 }
3253
3254 // temp = temp->GetAddressOfIMT()
3255 __ movq(temp,
3256 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
3257 // temp = temp->GetImtEntryAt(method_offset);
3258 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3259 invoke->GetImtIndex(), kX86_64PointerSize));
3260 // temp = temp->GetImtEntryAt(method_offset);
3261 __ movq(temp, Address(temp, method_offset));
3262 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3263 // We pass the method from the IMT in case of a conflict. This will ensure
3264 // we go into the runtime to resolve the actual method.
3265 Location hidden_reg = locations->GetTemp(1);
3266 __ movq(hidden_reg.AsRegister<CpuRegister>(), temp);
3267 }
3268 // call temp->GetEntryPoint();
3269 __ call(Address(
3270 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
3271
3272 DCHECK(!codegen_->IsLeafMethod());
3273 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
3274 }
3275
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3276 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3277 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3278 if (intrinsic.TryDispatch(invoke)) {
3279 return;
3280 }
3281 HandleInvoke(invoke);
3282 }
3283
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3284 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3285 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3286 return;
3287 }
3288 codegen_->GenerateInvokePolymorphicCall(invoke);
3289 }
3290
VisitInvokeCustom(HInvokeCustom * invoke)3291 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3292 HandleInvoke(invoke);
3293 }
3294
VisitInvokeCustom(HInvokeCustom * invoke)3295 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3296 codegen_->GenerateInvokeCustomCall(invoke);
3297 }
3298
VisitNeg(HNeg * neg)3299 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
3300 LocationSummary* locations =
3301 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3302 switch (neg->GetResultType()) {
3303 case DataType::Type::kInt32:
3304 case DataType::Type::kInt64:
3305 locations->SetInAt(0, Location::RequiresRegister());
3306 locations->SetOut(Location::SameAsFirstInput());
3307 break;
3308
3309 case DataType::Type::kFloat32:
3310 case DataType::Type::kFloat64:
3311 locations->SetInAt(0, Location::RequiresFpuRegister());
3312 locations->SetOut(Location::SameAsFirstInput());
3313 locations->AddTemp(Location::RequiresFpuRegister());
3314 break;
3315
3316 default:
3317 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3318 }
3319 }
3320
VisitNeg(HNeg * neg)3321 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
3322 LocationSummary* locations = neg->GetLocations();
3323 Location out = locations->Out();
3324 Location in = locations->InAt(0);
3325 switch (neg->GetResultType()) {
3326 case DataType::Type::kInt32:
3327 DCHECK(in.IsRegister());
3328 DCHECK(in.Equals(out));
3329 __ negl(out.AsRegister<CpuRegister>());
3330 break;
3331
3332 case DataType::Type::kInt64:
3333 DCHECK(in.IsRegister());
3334 DCHECK(in.Equals(out));
3335 __ negq(out.AsRegister<CpuRegister>());
3336 break;
3337
3338 case DataType::Type::kFloat32: {
3339 DCHECK(in.Equals(out));
3340 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3341 // Implement float negation with an exclusive or with value
3342 // 0x80000000 (mask for bit 31, representing the sign of a
3343 // single-precision floating-point number).
3344 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
3345 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3346 break;
3347 }
3348
3349 case DataType::Type::kFloat64: {
3350 DCHECK(in.Equals(out));
3351 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3352 // Implement double negation with an exclusive or with value
3353 // 0x8000000000000000 (mask for bit 63, representing the sign of
3354 // a double-precision floating-point number).
3355 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
3356 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3357 break;
3358 }
3359
3360 default:
3361 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3362 }
3363 }
3364
VisitTypeConversion(HTypeConversion * conversion)3365 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3366 LocationSummary* locations =
3367 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
3368 DataType::Type result_type = conversion->GetResultType();
3369 DataType::Type input_type = conversion->GetInputType();
3370 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3371 << input_type << " -> " << result_type;
3372
3373 switch (result_type) {
3374 case DataType::Type::kUint8:
3375 case DataType::Type::kInt8:
3376 case DataType::Type::kUint16:
3377 case DataType::Type::kInt16:
3378 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3379 locations->SetInAt(0, Location::Any());
3380 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3381 break;
3382
3383 case DataType::Type::kInt32:
3384 switch (input_type) {
3385 case DataType::Type::kInt64:
3386 locations->SetInAt(0, Location::Any());
3387 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3388 break;
3389
3390 case DataType::Type::kFloat32:
3391 locations->SetInAt(0, Location::RequiresFpuRegister());
3392 locations->SetOut(Location::RequiresRegister());
3393 break;
3394
3395 case DataType::Type::kFloat64:
3396 locations->SetInAt(0, Location::RequiresFpuRegister());
3397 locations->SetOut(Location::RequiresRegister());
3398 break;
3399
3400 default:
3401 LOG(FATAL) << "Unexpected type conversion from " << input_type
3402 << " to " << result_type;
3403 }
3404 break;
3405
3406 case DataType::Type::kInt64:
3407 switch (input_type) {
3408 case DataType::Type::kBool:
3409 case DataType::Type::kUint8:
3410 case DataType::Type::kInt8:
3411 case DataType::Type::kUint16:
3412 case DataType::Type::kInt16:
3413 case DataType::Type::kInt32:
3414 // TODO: We would benefit from a (to-be-implemented)
3415 // Location::RegisterOrStackSlot requirement for this input.
3416 locations->SetInAt(0, Location::RequiresRegister());
3417 locations->SetOut(Location::RequiresRegister());
3418 break;
3419
3420 case DataType::Type::kFloat32:
3421 locations->SetInAt(0, Location::RequiresFpuRegister());
3422 locations->SetOut(Location::RequiresRegister());
3423 break;
3424
3425 case DataType::Type::kFloat64:
3426 locations->SetInAt(0, Location::RequiresFpuRegister());
3427 locations->SetOut(Location::RequiresRegister());
3428 break;
3429
3430 default:
3431 LOG(FATAL) << "Unexpected type conversion from " << input_type
3432 << " to " << result_type;
3433 }
3434 break;
3435
3436 case DataType::Type::kFloat32:
3437 switch (input_type) {
3438 case DataType::Type::kBool:
3439 case DataType::Type::kUint8:
3440 case DataType::Type::kInt8:
3441 case DataType::Type::kUint16:
3442 case DataType::Type::kInt16:
3443 case DataType::Type::kInt32:
3444 locations->SetInAt(0, Location::Any());
3445 locations->SetOut(Location::RequiresFpuRegister());
3446 break;
3447
3448 case DataType::Type::kInt64:
3449 locations->SetInAt(0, Location::Any());
3450 locations->SetOut(Location::RequiresFpuRegister());
3451 break;
3452
3453 case DataType::Type::kFloat64:
3454 locations->SetInAt(0, Location::Any());
3455 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3456 break;
3457
3458 default:
3459 LOG(FATAL) << "Unexpected type conversion from " << input_type
3460 << " to " << result_type;
3461 }
3462 break;
3463
3464 case DataType::Type::kFloat64:
3465 switch (input_type) {
3466 case DataType::Type::kBool:
3467 case DataType::Type::kUint8:
3468 case DataType::Type::kInt8:
3469 case DataType::Type::kUint16:
3470 case DataType::Type::kInt16:
3471 case DataType::Type::kInt32:
3472 locations->SetInAt(0, Location::Any());
3473 locations->SetOut(Location::RequiresFpuRegister());
3474 break;
3475
3476 case DataType::Type::kInt64:
3477 locations->SetInAt(0, Location::Any());
3478 locations->SetOut(Location::RequiresFpuRegister());
3479 break;
3480
3481 case DataType::Type::kFloat32:
3482 locations->SetInAt(0, Location::Any());
3483 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3484 break;
3485
3486 default:
3487 LOG(FATAL) << "Unexpected type conversion from " << input_type
3488 << " to " << result_type;
3489 }
3490 break;
3491
3492 default:
3493 LOG(FATAL) << "Unexpected type conversion from " << input_type
3494 << " to " << result_type;
3495 }
3496 }
3497
VisitTypeConversion(HTypeConversion * conversion)3498 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3499 LocationSummary* locations = conversion->GetLocations();
3500 Location out = locations->Out();
3501 Location in = locations->InAt(0);
3502 DataType::Type result_type = conversion->GetResultType();
3503 DataType::Type input_type = conversion->GetInputType();
3504 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3505 << input_type << " -> " << result_type;
3506 switch (result_type) {
3507 case DataType::Type::kUint8:
3508 switch (input_type) {
3509 case DataType::Type::kInt8:
3510 case DataType::Type::kUint16:
3511 case DataType::Type::kInt16:
3512 case DataType::Type::kInt32:
3513 case DataType::Type::kInt64:
3514 if (in.IsRegister()) {
3515 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3516 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3517 __ movzxb(out.AsRegister<CpuRegister>(),
3518 Address(CpuRegister(RSP), in.GetStackIndex()));
3519 } else {
3520 __ movl(out.AsRegister<CpuRegister>(),
3521 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3522 }
3523 break;
3524
3525 default:
3526 LOG(FATAL) << "Unexpected type conversion from " << input_type
3527 << " to " << result_type;
3528 }
3529 break;
3530
3531 case DataType::Type::kInt8:
3532 switch (input_type) {
3533 case DataType::Type::kUint8:
3534 case DataType::Type::kUint16:
3535 case DataType::Type::kInt16:
3536 case DataType::Type::kInt32:
3537 case DataType::Type::kInt64:
3538 if (in.IsRegister()) {
3539 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3540 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3541 __ movsxb(out.AsRegister<CpuRegister>(),
3542 Address(CpuRegister(RSP), in.GetStackIndex()));
3543 } else {
3544 __ movl(out.AsRegister<CpuRegister>(),
3545 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3546 }
3547 break;
3548
3549 default:
3550 LOG(FATAL) << "Unexpected type conversion from " << input_type
3551 << " to " << result_type;
3552 }
3553 break;
3554
3555 case DataType::Type::kUint16:
3556 switch (input_type) {
3557 case DataType::Type::kInt8:
3558 case DataType::Type::kInt16:
3559 case DataType::Type::kInt32:
3560 case DataType::Type::kInt64:
3561 if (in.IsRegister()) {
3562 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3563 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3564 __ movzxw(out.AsRegister<CpuRegister>(),
3565 Address(CpuRegister(RSP), in.GetStackIndex()));
3566 } else {
3567 __ movl(out.AsRegister<CpuRegister>(),
3568 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3569 }
3570 break;
3571
3572 default:
3573 LOG(FATAL) << "Unexpected type conversion from " << input_type
3574 << " to " << result_type;
3575 }
3576 break;
3577
3578 case DataType::Type::kInt16:
3579 switch (input_type) {
3580 case DataType::Type::kUint16:
3581 case DataType::Type::kInt32:
3582 case DataType::Type::kInt64:
3583 if (in.IsRegister()) {
3584 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3585 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3586 __ movsxw(out.AsRegister<CpuRegister>(),
3587 Address(CpuRegister(RSP), in.GetStackIndex()));
3588 } else {
3589 __ movl(out.AsRegister<CpuRegister>(),
3590 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3591 }
3592 break;
3593
3594 default:
3595 LOG(FATAL) << "Unexpected type conversion from " << input_type
3596 << " to " << result_type;
3597 }
3598 break;
3599
3600 case DataType::Type::kInt32:
3601 switch (input_type) {
3602 case DataType::Type::kInt64:
3603 if (in.IsRegister()) {
3604 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3605 } else if (in.IsDoubleStackSlot()) {
3606 __ movl(out.AsRegister<CpuRegister>(),
3607 Address(CpuRegister(RSP), in.GetStackIndex()));
3608 } else {
3609 DCHECK(in.IsConstant());
3610 DCHECK(in.GetConstant()->IsLongConstant());
3611 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3612 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3613 }
3614 break;
3615
3616 case DataType::Type::kFloat32: {
3617 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3618 CpuRegister output = out.AsRegister<CpuRegister>();
3619 NearLabel done, nan;
3620
3621 __ movl(output, Immediate(kPrimIntMax));
3622 // if input >= (float)INT_MAX goto done
3623 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3624 __ j(kAboveEqual, &done);
3625 // if input == NaN goto nan
3626 __ j(kUnordered, &nan);
3627 // output = float-to-int-truncate(input)
3628 __ cvttss2si(output, input, false);
3629 __ jmp(&done);
3630 __ Bind(&nan);
3631 // output = 0
3632 __ xorl(output, output);
3633 __ Bind(&done);
3634 break;
3635 }
3636
3637 case DataType::Type::kFloat64: {
3638 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3639 CpuRegister output = out.AsRegister<CpuRegister>();
3640 NearLabel done, nan;
3641
3642 __ movl(output, Immediate(kPrimIntMax));
3643 // if input >= (double)INT_MAX goto done
3644 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3645 __ j(kAboveEqual, &done);
3646 // if input == NaN goto nan
3647 __ j(kUnordered, &nan);
3648 // output = double-to-int-truncate(input)
3649 __ cvttsd2si(output, input);
3650 __ jmp(&done);
3651 __ Bind(&nan);
3652 // output = 0
3653 __ xorl(output, output);
3654 __ Bind(&done);
3655 break;
3656 }
3657
3658 default:
3659 LOG(FATAL) << "Unexpected type conversion from " << input_type
3660 << " to " << result_type;
3661 }
3662 break;
3663
3664 case DataType::Type::kInt64:
3665 switch (input_type) {
3666 DCHECK(out.IsRegister());
3667 case DataType::Type::kBool:
3668 case DataType::Type::kUint8:
3669 case DataType::Type::kInt8:
3670 case DataType::Type::kUint16:
3671 case DataType::Type::kInt16:
3672 case DataType::Type::kInt32:
3673 DCHECK(in.IsRegister());
3674 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3675 break;
3676
3677 case DataType::Type::kFloat32: {
3678 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3679 CpuRegister output = out.AsRegister<CpuRegister>();
3680 NearLabel done, nan;
3681
3682 codegen_->Load64BitValue(output, kPrimLongMax);
3683 // if input >= (float)LONG_MAX goto done
3684 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3685 __ j(kAboveEqual, &done);
3686 // if input == NaN goto nan
3687 __ j(kUnordered, &nan);
3688 // output = float-to-long-truncate(input)
3689 __ cvttss2si(output, input, true);
3690 __ jmp(&done);
3691 __ Bind(&nan);
3692 // output = 0
3693 __ xorl(output, output);
3694 __ Bind(&done);
3695 break;
3696 }
3697
3698 case DataType::Type::kFloat64: {
3699 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3700 CpuRegister output = out.AsRegister<CpuRegister>();
3701 NearLabel done, nan;
3702
3703 codegen_->Load64BitValue(output, kPrimLongMax);
3704 // if input >= (double)LONG_MAX goto done
3705 __ comisd(input, codegen_->LiteralDoubleAddress(
3706 static_cast<double>(kPrimLongMax)));
3707 __ j(kAboveEqual, &done);
3708 // if input == NaN goto nan
3709 __ j(kUnordered, &nan);
3710 // output = double-to-long-truncate(input)
3711 __ cvttsd2si(output, input, true);
3712 __ jmp(&done);
3713 __ Bind(&nan);
3714 // output = 0
3715 __ xorl(output, output);
3716 __ Bind(&done);
3717 break;
3718 }
3719
3720 default:
3721 LOG(FATAL) << "Unexpected type conversion from " << input_type
3722 << " to " << result_type;
3723 }
3724 break;
3725
3726 case DataType::Type::kFloat32:
3727 switch (input_type) {
3728 case DataType::Type::kBool:
3729 case DataType::Type::kUint8:
3730 case DataType::Type::kInt8:
3731 case DataType::Type::kUint16:
3732 case DataType::Type::kInt16:
3733 case DataType::Type::kInt32:
3734 if (in.IsRegister()) {
3735 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3736 } else if (in.IsConstant()) {
3737 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3738 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3739 codegen_->Load32BitValue(dest, static_cast<float>(v));
3740 } else {
3741 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3742 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3743 }
3744 break;
3745
3746 case DataType::Type::kInt64:
3747 if (in.IsRegister()) {
3748 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3749 } else if (in.IsConstant()) {
3750 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3751 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3752 codegen_->Load32BitValue(dest, static_cast<float>(v));
3753 } else {
3754 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3755 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3756 }
3757 break;
3758
3759 case DataType::Type::kFloat64:
3760 if (in.IsFpuRegister()) {
3761 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3762 } else if (in.IsConstant()) {
3763 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3764 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3765 codegen_->Load32BitValue(dest, static_cast<float>(v));
3766 } else {
3767 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3768 Address(CpuRegister(RSP), in.GetStackIndex()));
3769 }
3770 break;
3771
3772 default:
3773 LOG(FATAL) << "Unexpected type conversion from " << input_type
3774 << " to " << result_type;
3775 }
3776 break;
3777
3778 case DataType::Type::kFloat64:
3779 switch (input_type) {
3780 case DataType::Type::kBool:
3781 case DataType::Type::kUint8:
3782 case DataType::Type::kInt8:
3783 case DataType::Type::kUint16:
3784 case DataType::Type::kInt16:
3785 case DataType::Type::kInt32:
3786 if (in.IsRegister()) {
3787 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3788 } else if (in.IsConstant()) {
3789 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3790 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3791 codegen_->Load64BitValue(dest, static_cast<double>(v));
3792 } else {
3793 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3794 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3795 }
3796 break;
3797
3798 case DataType::Type::kInt64:
3799 if (in.IsRegister()) {
3800 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3801 } else if (in.IsConstant()) {
3802 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3803 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3804 codegen_->Load64BitValue(dest, static_cast<double>(v));
3805 } else {
3806 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3807 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3808 }
3809 break;
3810
3811 case DataType::Type::kFloat32:
3812 if (in.IsFpuRegister()) {
3813 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3814 } else if (in.IsConstant()) {
3815 float v = in.GetConstant()->AsFloatConstant()->GetValue();
3816 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3817 codegen_->Load64BitValue(dest, static_cast<double>(v));
3818 } else {
3819 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3820 Address(CpuRegister(RSP), in.GetStackIndex()));
3821 }
3822 break;
3823
3824 default:
3825 LOG(FATAL) << "Unexpected type conversion from " << input_type
3826 << " to " << result_type;
3827 }
3828 break;
3829
3830 default:
3831 LOG(FATAL) << "Unexpected type conversion from " << input_type
3832 << " to " << result_type;
3833 }
3834 }
3835
VisitAdd(HAdd * add)3836 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3837 LocationSummary* locations =
3838 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3839 switch (add->GetResultType()) {
3840 case DataType::Type::kInt32: {
3841 locations->SetInAt(0, Location::RequiresRegister());
3842 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3843 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3844 break;
3845 }
3846
3847 case DataType::Type::kInt64: {
3848 locations->SetInAt(0, Location::RequiresRegister());
3849 // We can use a leaq or addq if the constant can fit in an immediate.
3850 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3851 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3852 break;
3853 }
3854
3855 case DataType::Type::kFloat64:
3856 case DataType::Type::kFloat32: {
3857 locations->SetInAt(0, Location::RequiresFpuRegister());
3858 locations->SetInAt(1, Location::Any());
3859 locations->SetOut(Location::SameAsFirstInput());
3860 break;
3861 }
3862
3863 default:
3864 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3865 }
3866 }
3867
VisitAdd(HAdd * add)3868 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3869 LocationSummary* locations = add->GetLocations();
3870 Location first = locations->InAt(0);
3871 Location second = locations->InAt(1);
3872 Location out = locations->Out();
3873
3874 switch (add->GetResultType()) {
3875 case DataType::Type::kInt32: {
3876 if (second.IsRegister()) {
3877 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3878 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3879 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3880 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3881 } else {
3882 __ leal(out.AsRegister<CpuRegister>(), Address(
3883 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3884 }
3885 } else if (second.IsConstant()) {
3886 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3887 __ addl(out.AsRegister<CpuRegister>(),
3888 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3889 } else {
3890 __ leal(out.AsRegister<CpuRegister>(), Address(
3891 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3892 }
3893 } else {
3894 DCHECK(first.Equals(locations->Out()));
3895 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3896 }
3897 break;
3898 }
3899
3900 case DataType::Type::kInt64: {
3901 if (second.IsRegister()) {
3902 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3903 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3904 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3905 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3906 } else {
3907 __ leaq(out.AsRegister<CpuRegister>(), Address(
3908 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3909 }
3910 } else {
3911 DCHECK(second.IsConstant());
3912 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3913 int32_t int32_value = Low32Bits(value);
3914 DCHECK_EQ(int32_value, value);
3915 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3916 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3917 } else {
3918 __ leaq(out.AsRegister<CpuRegister>(), Address(
3919 first.AsRegister<CpuRegister>(), int32_value));
3920 }
3921 }
3922 break;
3923 }
3924
3925 case DataType::Type::kFloat32: {
3926 if (second.IsFpuRegister()) {
3927 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3928 } else if (second.IsConstant()) {
3929 __ addss(first.AsFpuRegister<XmmRegister>(),
3930 codegen_->LiteralFloatAddress(
3931 second.GetConstant()->AsFloatConstant()->GetValue()));
3932 } else {
3933 DCHECK(second.IsStackSlot());
3934 __ addss(first.AsFpuRegister<XmmRegister>(),
3935 Address(CpuRegister(RSP), second.GetStackIndex()));
3936 }
3937 break;
3938 }
3939
3940 case DataType::Type::kFloat64: {
3941 if (second.IsFpuRegister()) {
3942 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3943 } else if (second.IsConstant()) {
3944 __ addsd(first.AsFpuRegister<XmmRegister>(),
3945 codegen_->LiteralDoubleAddress(
3946 second.GetConstant()->AsDoubleConstant()->GetValue()));
3947 } else {
3948 DCHECK(second.IsDoubleStackSlot());
3949 __ addsd(first.AsFpuRegister<XmmRegister>(),
3950 Address(CpuRegister(RSP), second.GetStackIndex()));
3951 }
3952 break;
3953 }
3954
3955 default:
3956 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3957 }
3958 }
3959
VisitSub(HSub * sub)3960 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3961 LocationSummary* locations =
3962 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3963 switch (sub->GetResultType()) {
3964 case DataType::Type::kInt32: {
3965 locations->SetInAt(0, Location::RequiresRegister());
3966 locations->SetInAt(1, Location::Any());
3967 locations->SetOut(Location::SameAsFirstInput());
3968 break;
3969 }
3970 case DataType::Type::kInt64: {
3971 locations->SetInAt(0, Location::RequiresRegister());
3972 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3973 locations->SetOut(Location::SameAsFirstInput());
3974 break;
3975 }
3976 case DataType::Type::kFloat32:
3977 case DataType::Type::kFloat64: {
3978 locations->SetInAt(0, Location::RequiresFpuRegister());
3979 locations->SetInAt(1, Location::Any());
3980 locations->SetOut(Location::SameAsFirstInput());
3981 break;
3982 }
3983 default:
3984 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3985 }
3986 }
3987
VisitSub(HSub * sub)3988 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3989 LocationSummary* locations = sub->GetLocations();
3990 Location first = locations->InAt(0);
3991 Location second = locations->InAt(1);
3992 DCHECK(first.Equals(locations->Out()));
3993 switch (sub->GetResultType()) {
3994 case DataType::Type::kInt32: {
3995 if (second.IsRegister()) {
3996 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3997 } else if (second.IsConstant()) {
3998 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3999 __ subl(first.AsRegister<CpuRegister>(), imm);
4000 } else {
4001 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
4002 }
4003 break;
4004 }
4005 case DataType::Type::kInt64: {
4006 if (second.IsConstant()) {
4007 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
4008 DCHECK(IsInt<32>(value));
4009 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
4010 } else {
4011 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4012 }
4013 break;
4014 }
4015
4016 case DataType::Type::kFloat32: {
4017 if (second.IsFpuRegister()) {
4018 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4019 } else if (second.IsConstant()) {
4020 __ subss(first.AsFpuRegister<XmmRegister>(),
4021 codegen_->LiteralFloatAddress(
4022 second.GetConstant()->AsFloatConstant()->GetValue()));
4023 } else {
4024 DCHECK(second.IsStackSlot());
4025 __ subss(first.AsFpuRegister<XmmRegister>(),
4026 Address(CpuRegister(RSP), second.GetStackIndex()));
4027 }
4028 break;
4029 }
4030
4031 case DataType::Type::kFloat64: {
4032 if (second.IsFpuRegister()) {
4033 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4034 } else if (second.IsConstant()) {
4035 __ subsd(first.AsFpuRegister<XmmRegister>(),
4036 codegen_->LiteralDoubleAddress(
4037 second.GetConstant()->AsDoubleConstant()->GetValue()));
4038 } else {
4039 DCHECK(second.IsDoubleStackSlot());
4040 __ subsd(first.AsFpuRegister<XmmRegister>(),
4041 Address(CpuRegister(RSP), second.GetStackIndex()));
4042 }
4043 break;
4044 }
4045
4046 default:
4047 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4048 }
4049 }
4050
VisitMul(HMul * mul)4051 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
4052 LocationSummary* locations =
4053 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4054 switch (mul->GetResultType()) {
4055 case DataType::Type::kInt32: {
4056 locations->SetInAt(0, Location::RequiresRegister());
4057 locations->SetInAt(1, Location::Any());
4058 if (mul->InputAt(1)->IsIntConstant()) {
4059 // Can use 3 operand multiply.
4060 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4061 } else {
4062 locations->SetOut(Location::SameAsFirstInput());
4063 }
4064 break;
4065 }
4066 case DataType::Type::kInt64: {
4067 locations->SetInAt(0, Location::RequiresRegister());
4068 locations->SetInAt(1, Location::Any());
4069 if (mul->InputAt(1)->IsLongConstant() &&
4070 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
4071 // Can use 3 operand multiply.
4072 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4073 } else {
4074 locations->SetOut(Location::SameAsFirstInput());
4075 }
4076 break;
4077 }
4078 case DataType::Type::kFloat32:
4079 case DataType::Type::kFloat64: {
4080 locations->SetInAt(0, Location::RequiresFpuRegister());
4081 locations->SetInAt(1, Location::Any());
4082 locations->SetOut(Location::SameAsFirstInput());
4083 break;
4084 }
4085
4086 default:
4087 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4088 }
4089 }
4090
VisitMul(HMul * mul)4091 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
4092 LocationSummary* locations = mul->GetLocations();
4093 Location first = locations->InAt(0);
4094 Location second = locations->InAt(1);
4095 Location out = locations->Out();
4096 switch (mul->GetResultType()) {
4097 case DataType::Type::kInt32:
4098 // The constant may have ended up in a register, so test explicitly to avoid
4099 // problems where the output may not be the same as the first operand.
4100 if (mul->InputAt(1)->IsIntConstant()) {
4101 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
4102 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
4103 } else if (second.IsRegister()) {
4104 DCHECK(first.Equals(out));
4105 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4106 } else {
4107 DCHECK(first.Equals(out));
4108 DCHECK(second.IsStackSlot());
4109 __ imull(first.AsRegister<CpuRegister>(),
4110 Address(CpuRegister(RSP), second.GetStackIndex()));
4111 }
4112 break;
4113 case DataType::Type::kInt64: {
4114 // The constant may have ended up in a register, so test explicitly to avoid
4115 // problems where the output may not be the same as the first operand.
4116 if (mul->InputAt(1)->IsLongConstant()) {
4117 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
4118 if (IsInt<32>(value)) {
4119 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
4120 Immediate(static_cast<int32_t>(value)));
4121 } else {
4122 // Have to use the constant area.
4123 DCHECK(first.Equals(out));
4124 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
4125 }
4126 } else if (second.IsRegister()) {
4127 DCHECK(first.Equals(out));
4128 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4129 } else {
4130 DCHECK(second.IsDoubleStackSlot());
4131 DCHECK(first.Equals(out));
4132 __ imulq(first.AsRegister<CpuRegister>(),
4133 Address(CpuRegister(RSP), second.GetStackIndex()));
4134 }
4135 break;
4136 }
4137
4138 case DataType::Type::kFloat32: {
4139 DCHECK(first.Equals(out));
4140 if (second.IsFpuRegister()) {
4141 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4142 } else if (second.IsConstant()) {
4143 __ mulss(first.AsFpuRegister<XmmRegister>(),
4144 codegen_->LiteralFloatAddress(
4145 second.GetConstant()->AsFloatConstant()->GetValue()));
4146 } else {
4147 DCHECK(second.IsStackSlot());
4148 __ mulss(first.AsFpuRegister<XmmRegister>(),
4149 Address(CpuRegister(RSP), second.GetStackIndex()));
4150 }
4151 break;
4152 }
4153
4154 case DataType::Type::kFloat64: {
4155 DCHECK(first.Equals(out));
4156 if (second.IsFpuRegister()) {
4157 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4158 } else if (second.IsConstant()) {
4159 __ mulsd(first.AsFpuRegister<XmmRegister>(),
4160 codegen_->LiteralDoubleAddress(
4161 second.GetConstant()->AsDoubleConstant()->GetValue()));
4162 } else {
4163 DCHECK(second.IsDoubleStackSlot());
4164 __ mulsd(first.AsFpuRegister<XmmRegister>(),
4165 Address(CpuRegister(RSP), second.GetStackIndex()));
4166 }
4167 break;
4168 }
4169
4170 default:
4171 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4172 }
4173 }
4174
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)4175 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
4176 uint32_t stack_adjustment, bool is_float) {
4177 if (source.IsStackSlot()) {
4178 DCHECK(is_float);
4179 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4180 } else if (source.IsDoubleStackSlot()) {
4181 DCHECK(!is_float);
4182 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4183 } else {
4184 // Write the value to the temporary location on the stack and load to FP stack.
4185 if (is_float) {
4186 Location stack_temp = Location::StackSlot(temp_offset);
4187 codegen_->Move(stack_temp, source);
4188 __ flds(Address(CpuRegister(RSP), temp_offset));
4189 } else {
4190 Location stack_temp = Location::DoubleStackSlot(temp_offset);
4191 codegen_->Move(stack_temp, source);
4192 __ fldl(Address(CpuRegister(RSP), temp_offset));
4193 }
4194 }
4195 }
4196
GenerateRemFP(HRem * rem)4197 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
4198 DataType::Type type = rem->GetResultType();
4199 bool is_float = type == DataType::Type::kFloat32;
4200 size_t elem_size = DataType::Size(type);
4201 LocationSummary* locations = rem->GetLocations();
4202 Location first = locations->InAt(0);
4203 Location second = locations->InAt(1);
4204 Location out = locations->Out();
4205
4206 // Create stack space for 2 elements.
4207 // TODO: enhance register allocator to ask for stack temporaries.
4208 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
4209
4210 // Load the values to the FP stack in reverse order, using temporaries if needed.
4211 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
4212 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
4213
4214 // Loop doing FPREM until we stabilize.
4215 NearLabel retry;
4216 __ Bind(&retry);
4217 __ fprem();
4218
4219 // Move FP status to AX.
4220 __ fstsw();
4221
4222 // And see if the argument reduction is complete. This is signaled by the
4223 // C2 FPU flag bit set to 0.
4224 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
4225 __ j(kNotEqual, &retry);
4226
4227 // We have settled on the final value. Retrieve it into an XMM register.
4228 // Store FP top of stack to real stack.
4229 if (is_float) {
4230 __ fsts(Address(CpuRegister(RSP), 0));
4231 } else {
4232 __ fstl(Address(CpuRegister(RSP), 0));
4233 }
4234
4235 // Pop the 2 items from the FP stack.
4236 __ fucompp();
4237
4238 // Load the value from the stack into an XMM register.
4239 DCHECK(out.IsFpuRegister()) << out;
4240 if (is_float) {
4241 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4242 } else {
4243 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4244 }
4245
4246 // And remove the temporary stack space we allocated.
4247 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
4248 }
4249
DivRemOneOrMinusOne(HBinaryOperation * instruction)4250 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4251 DCHECK(instruction->IsDiv() || instruction->IsRem());
4252
4253 LocationSummary* locations = instruction->GetLocations();
4254 Location second = locations->InAt(1);
4255 DCHECK(second.IsConstant());
4256
4257 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4258 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
4259 int64_t imm = Int64FromConstant(second.GetConstant());
4260
4261 DCHECK(imm == 1 || imm == -1);
4262
4263 switch (instruction->GetResultType()) {
4264 case DataType::Type::kInt32: {
4265 if (instruction->IsRem()) {
4266 __ xorl(output_register, output_register);
4267 } else {
4268 __ movl(output_register, input_register);
4269 if (imm == -1) {
4270 __ negl(output_register);
4271 }
4272 }
4273 break;
4274 }
4275
4276 case DataType::Type::kInt64: {
4277 if (instruction->IsRem()) {
4278 __ xorl(output_register, output_register);
4279 } else {
4280 __ movq(output_register, input_register);
4281 if (imm == -1) {
4282 __ negq(output_register);
4283 }
4284 }
4285 break;
4286 }
4287
4288 default:
4289 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
4290 }
4291 }
RemByPowerOfTwo(HRem * instruction)4292 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
4293 LocationSummary* locations = instruction->GetLocations();
4294 Location second = locations->InAt(1);
4295 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4296 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4297 int64_t imm = Int64FromConstant(second.GetConstant());
4298 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4299 uint64_t abs_imm = AbsOrMin(imm);
4300 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4301 if (instruction->GetResultType() == DataType::Type::kInt32) {
4302 NearLabel done;
4303 __ movl(out, numerator);
4304 __ andl(out, Immediate(abs_imm-1));
4305 __ j(Condition::kZero, &done);
4306 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4307 __ testl(numerator, numerator);
4308 __ cmov(Condition::kLess, out, tmp, false);
4309 __ Bind(&done);
4310
4311 } else {
4312 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4313 codegen_->Load64BitValue(tmp, abs_imm - 1);
4314 NearLabel done;
4315
4316 __ movq(out, numerator);
4317 __ andq(out, tmp);
4318 __ j(Condition::kZero, &done);
4319 __ movq(tmp, numerator);
4320 __ sarq(tmp, Immediate(63));
4321 __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
4322 __ orq(out, tmp);
4323 __ Bind(&done);
4324 }
4325 }
DivByPowerOfTwo(HDiv * instruction)4326 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
4327 LocationSummary* locations = instruction->GetLocations();
4328 Location second = locations->InAt(1);
4329
4330 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4331 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4332
4333 int64_t imm = Int64FromConstant(second.GetConstant());
4334 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4335 uint64_t abs_imm = AbsOrMin(imm);
4336
4337 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4338
4339 if (instruction->GetResultType() == DataType::Type::kInt32) {
4340 // When denominator is equal to 2, we can add signed bit and numerator to tmp.
4341 // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
4342 if (abs_imm == 2) {
4343 __ leal(tmp, Address(numerator, 0));
4344 __ shrl(tmp, Immediate(31));
4345 __ addl(tmp, numerator);
4346 } else {
4347 __ leal(tmp, Address(numerator, abs_imm - 1));
4348 __ testl(numerator, numerator);
4349 __ cmov(kGreaterEqual, tmp, numerator);
4350 }
4351 int shift = CTZ(imm);
4352 __ sarl(tmp, Immediate(shift));
4353
4354 if (imm < 0) {
4355 __ negl(tmp);
4356 }
4357
4358 __ movl(output_register, tmp);
4359 } else {
4360 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4361 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
4362 if (abs_imm == 2) {
4363 __ movq(rdx, numerator);
4364 __ shrq(rdx, Immediate(63));
4365 __ addq(rdx, numerator);
4366 } else {
4367 codegen_->Load64BitValue(rdx, abs_imm - 1);
4368 __ addq(rdx, numerator);
4369 __ testq(numerator, numerator);
4370 __ cmov(kGreaterEqual, rdx, numerator);
4371 }
4372 int shift = CTZ(imm);
4373 __ sarq(rdx, Immediate(shift));
4374
4375 if (imm < 0) {
4376 __ negq(rdx);
4377 }
4378
4379 __ movq(output_register, rdx);
4380 }
4381 }
4382
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4383 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4384 DCHECK(instruction->IsDiv() || instruction->IsRem());
4385
4386 LocationSummary* locations = instruction->GetLocations();
4387 Location second = locations->InAt(1);
4388
4389 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
4390 : locations->GetTemp(0).AsRegister<CpuRegister>();
4391 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
4392 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
4393 : locations->Out().AsRegister<CpuRegister>();
4394 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4395
4396 DCHECK_EQ(RAX, eax.AsRegister());
4397 DCHECK_EQ(RDX, edx.AsRegister());
4398 if (instruction->IsDiv()) {
4399 DCHECK_EQ(RAX, out.AsRegister());
4400 } else {
4401 DCHECK_EQ(RDX, out.AsRegister());
4402 }
4403
4404 int64_t magic;
4405 int shift;
4406
4407 // TODO: can these branches be written as one?
4408 if (instruction->GetResultType() == DataType::Type::kInt32) {
4409 int imm = second.GetConstant()->AsIntConstant()->GetValue();
4410
4411 CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
4412
4413 __ movl(numerator, eax);
4414
4415 __ movl(eax, Immediate(magic));
4416 __ imull(numerator);
4417
4418 if (imm > 0 && magic < 0) {
4419 __ addl(edx, numerator);
4420 } else if (imm < 0 && magic > 0) {
4421 __ subl(edx, numerator);
4422 }
4423
4424 if (shift != 0) {
4425 __ sarl(edx, Immediate(shift));
4426 }
4427
4428 __ movl(eax, edx);
4429 __ shrl(edx, Immediate(31));
4430 __ addl(edx, eax);
4431
4432 if (instruction->IsRem()) {
4433 __ movl(eax, numerator);
4434 __ imull(edx, Immediate(imm));
4435 __ subl(eax, edx);
4436 __ movl(edx, eax);
4437 } else {
4438 __ movl(eax, edx);
4439 }
4440 } else {
4441 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
4442
4443 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4444
4445 CpuRegister rax = eax;
4446 CpuRegister rdx = edx;
4447
4448 CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
4449
4450 // Save the numerator.
4451 __ movq(numerator, rax);
4452
4453 // RAX = magic
4454 codegen_->Load64BitValue(rax, magic);
4455
4456 // RDX:RAX = magic * numerator
4457 __ imulq(numerator);
4458
4459 if (imm > 0 && magic < 0) {
4460 // RDX += numerator
4461 __ addq(rdx, numerator);
4462 } else if (imm < 0 && magic > 0) {
4463 // RDX -= numerator
4464 __ subq(rdx, numerator);
4465 }
4466
4467 // Shift if needed.
4468 if (shift != 0) {
4469 __ sarq(rdx, Immediate(shift));
4470 }
4471
4472 // RDX += 1 if RDX < 0
4473 __ movq(rax, rdx);
4474 __ shrq(rdx, Immediate(63));
4475 __ addq(rdx, rax);
4476
4477 if (instruction->IsRem()) {
4478 __ movq(rax, numerator);
4479
4480 if (IsInt<32>(imm)) {
4481 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
4482 } else {
4483 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
4484 }
4485
4486 __ subq(rax, rdx);
4487 __ movq(rdx, rax);
4488 } else {
4489 __ movq(rax, rdx);
4490 }
4491 }
4492 }
4493
GenerateDivRemIntegral(HBinaryOperation * instruction)4494 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4495 DCHECK(instruction->IsDiv() || instruction->IsRem());
4496 DataType::Type type = instruction->GetResultType();
4497 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4498
4499 bool is_div = instruction->IsDiv();
4500 LocationSummary* locations = instruction->GetLocations();
4501
4502 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4503 Location second = locations->InAt(1);
4504
4505 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
4506 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
4507
4508 if (second.IsConstant()) {
4509 int64_t imm = Int64FromConstant(second.GetConstant());
4510
4511 if (imm == 0) {
4512 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4513 } else if (imm == 1 || imm == -1) {
4514 DivRemOneOrMinusOne(instruction);
4515 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4516 if (is_div) {
4517 DivByPowerOfTwo(instruction->AsDiv());
4518 } else {
4519 RemByPowerOfTwo(instruction->AsRem());
4520 }
4521 } else {
4522 DCHECK(imm <= -2 || imm >= 2);
4523 GenerateDivRemWithAnyConstant(instruction);
4524 }
4525 } else {
4526 SlowPathCode* slow_path =
4527 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4528 instruction, out.AsRegister(), type, is_div);
4529 codegen_->AddSlowPath(slow_path);
4530
4531 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4532 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4533 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4534 // so it's safe to just use negl instead of more complex comparisons.
4535 if (type == DataType::Type::kInt32) {
4536 __ cmpl(second_reg, Immediate(-1));
4537 __ j(kEqual, slow_path->GetEntryLabel());
4538 // edx:eax <- sign-extended of eax
4539 __ cdq();
4540 // eax = quotient, edx = remainder
4541 __ idivl(second_reg);
4542 } else {
4543 __ cmpq(second_reg, Immediate(-1));
4544 __ j(kEqual, slow_path->GetEntryLabel());
4545 // rdx:rax <- sign-extended of rax
4546 __ cqo();
4547 // rax = quotient, rdx = remainder
4548 __ idivq(second_reg);
4549 }
4550 __ Bind(slow_path->GetExitLabel());
4551 }
4552 }
4553
VisitDiv(HDiv * div)4554 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4555 LocationSummary* locations =
4556 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4557 switch (div->GetResultType()) {
4558 case DataType::Type::kInt32:
4559 case DataType::Type::kInt64: {
4560 locations->SetInAt(0, Location::RegisterLocation(RAX));
4561 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4562 locations->SetOut(Location::SameAsFirstInput());
4563 // Intel uses edx:eax as the dividend.
4564 locations->AddTemp(Location::RegisterLocation(RDX));
4565 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4566 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4567 // output and request another temp.
4568 if (div->InputAt(1)->IsConstant()) {
4569 locations->AddTemp(Location::RequiresRegister());
4570 }
4571 break;
4572 }
4573
4574 case DataType::Type::kFloat32:
4575 case DataType::Type::kFloat64: {
4576 locations->SetInAt(0, Location::RequiresFpuRegister());
4577 locations->SetInAt(1, Location::Any());
4578 locations->SetOut(Location::SameAsFirstInput());
4579 break;
4580 }
4581
4582 default:
4583 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4584 }
4585 }
4586
VisitDiv(HDiv * div)4587 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4588 LocationSummary* locations = div->GetLocations();
4589 Location first = locations->InAt(0);
4590 Location second = locations->InAt(1);
4591 DCHECK(first.Equals(locations->Out()));
4592
4593 DataType::Type type = div->GetResultType();
4594 switch (type) {
4595 case DataType::Type::kInt32:
4596 case DataType::Type::kInt64: {
4597 GenerateDivRemIntegral(div);
4598 break;
4599 }
4600
4601 case DataType::Type::kFloat32: {
4602 if (second.IsFpuRegister()) {
4603 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4604 } else if (second.IsConstant()) {
4605 __ divss(first.AsFpuRegister<XmmRegister>(),
4606 codegen_->LiteralFloatAddress(
4607 second.GetConstant()->AsFloatConstant()->GetValue()));
4608 } else {
4609 DCHECK(second.IsStackSlot());
4610 __ divss(first.AsFpuRegister<XmmRegister>(),
4611 Address(CpuRegister(RSP), second.GetStackIndex()));
4612 }
4613 break;
4614 }
4615
4616 case DataType::Type::kFloat64: {
4617 if (second.IsFpuRegister()) {
4618 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4619 } else if (second.IsConstant()) {
4620 __ divsd(first.AsFpuRegister<XmmRegister>(),
4621 codegen_->LiteralDoubleAddress(
4622 second.GetConstant()->AsDoubleConstant()->GetValue()));
4623 } else {
4624 DCHECK(second.IsDoubleStackSlot());
4625 __ divsd(first.AsFpuRegister<XmmRegister>(),
4626 Address(CpuRegister(RSP), second.GetStackIndex()));
4627 }
4628 break;
4629 }
4630
4631 default:
4632 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4633 }
4634 }
4635
VisitRem(HRem * rem)4636 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4637 DataType::Type type = rem->GetResultType();
4638 LocationSummary* locations =
4639 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4640
4641 switch (type) {
4642 case DataType::Type::kInt32:
4643 case DataType::Type::kInt64: {
4644 locations->SetInAt(0, Location::RegisterLocation(RAX));
4645 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4646 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4647 locations->SetOut(Location::RegisterLocation(RDX));
4648 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4649 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4650 // output and request another temp.
4651 if (rem->InputAt(1)->IsConstant()) {
4652 locations->AddTemp(Location::RequiresRegister());
4653 }
4654 break;
4655 }
4656
4657 case DataType::Type::kFloat32:
4658 case DataType::Type::kFloat64: {
4659 locations->SetInAt(0, Location::Any());
4660 locations->SetInAt(1, Location::Any());
4661 locations->SetOut(Location::RequiresFpuRegister());
4662 locations->AddTemp(Location::RegisterLocation(RAX));
4663 break;
4664 }
4665
4666 default:
4667 LOG(FATAL) << "Unexpected rem type " << type;
4668 }
4669 }
4670
VisitRem(HRem * rem)4671 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4672 DataType::Type type = rem->GetResultType();
4673 switch (type) {
4674 case DataType::Type::kInt32:
4675 case DataType::Type::kInt64: {
4676 GenerateDivRemIntegral(rem);
4677 break;
4678 }
4679 case DataType::Type::kFloat32:
4680 case DataType::Type::kFloat64: {
4681 GenerateRemFP(rem);
4682 break;
4683 }
4684 default:
4685 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4686 }
4687 }
4688
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4689 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4690 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4691 switch (minmax->GetResultType()) {
4692 case DataType::Type::kInt32:
4693 case DataType::Type::kInt64:
4694 locations->SetInAt(0, Location::RequiresRegister());
4695 locations->SetInAt(1, Location::RequiresRegister());
4696 locations->SetOut(Location::SameAsFirstInput());
4697 break;
4698 case DataType::Type::kFloat32:
4699 case DataType::Type::kFloat64:
4700 locations->SetInAt(0, Location::RequiresFpuRegister());
4701 locations->SetInAt(1, Location::RequiresFpuRegister());
4702 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4703 // the second input to be the output (we can simply swap inputs).
4704 locations->SetOut(Location::SameAsFirstInput());
4705 break;
4706 default:
4707 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4708 }
4709 }
4710
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4711 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4712 bool is_min,
4713 DataType::Type type) {
4714 Location op1_loc = locations->InAt(0);
4715 Location op2_loc = locations->InAt(1);
4716
4717 // Shortcut for same input locations.
4718 if (op1_loc.Equals(op2_loc)) {
4719 // Can return immediately, as op1_loc == out_loc.
4720 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4721 // a copy here.
4722 DCHECK(locations->Out().Equals(op1_loc));
4723 return;
4724 }
4725
4726 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4727 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4728
4729 // (out := op1)
4730 // out <=? op2
4731 // if out is min jmp done
4732 // out := op2
4733 // done:
4734
4735 if (type == DataType::Type::kInt64) {
4736 __ cmpq(out, op2);
4737 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4738 } else {
4739 DCHECK_EQ(type, DataType::Type::kInt32);
4740 __ cmpl(out, op2);
4741 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4742 }
4743 }
4744
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4745 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4746 bool is_min,
4747 DataType::Type type) {
4748 Location op1_loc = locations->InAt(0);
4749 Location op2_loc = locations->InAt(1);
4750 Location out_loc = locations->Out();
4751 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4752
4753 // Shortcut for same input locations.
4754 if (op1_loc.Equals(op2_loc)) {
4755 DCHECK(out_loc.Equals(op1_loc));
4756 return;
4757 }
4758
4759 // (out := op1)
4760 // out <=? op2
4761 // if Nan jmp Nan_label
4762 // if out is min jmp done
4763 // if op2 is min jmp op2_label
4764 // handle -0/+0
4765 // jmp done
4766 // Nan_label:
4767 // out := NaN
4768 // op2_label:
4769 // out := op2
4770 // done:
4771 //
4772 // This removes one jmp, but needs to copy one input (op1) to out.
4773 //
4774 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4775
4776 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4777
4778 NearLabel nan, done, op2_label;
4779 if (type == DataType::Type::kFloat64) {
4780 __ ucomisd(out, op2);
4781 } else {
4782 DCHECK_EQ(type, DataType::Type::kFloat32);
4783 __ ucomiss(out, op2);
4784 }
4785
4786 __ j(Condition::kParityEven, &nan);
4787
4788 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4789 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4790
4791 // Handle 0.0/-0.0.
4792 if (is_min) {
4793 if (type == DataType::Type::kFloat64) {
4794 __ orpd(out, op2);
4795 } else {
4796 __ orps(out, op2);
4797 }
4798 } else {
4799 if (type == DataType::Type::kFloat64) {
4800 __ andpd(out, op2);
4801 } else {
4802 __ andps(out, op2);
4803 }
4804 }
4805 __ jmp(&done);
4806
4807 // NaN handling.
4808 __ Bind(&nan);
4809 if (type == DataType::Type::kFloat64) {
4810 __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4811 } else {
4812 __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4813 }
4814 __ jmp(&done);
4815
4816 // out := op2;
4817 __ Bind(&op2_label);
4818 if (type == DataType::Type::kFloat64) {
4819 __ movsd(out, op2);
4820 } else {
4821 __ movss(out, op2);
4822 }
4823
4824 // Done.
4825 __ Bind(&done);
4826 }
4827
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4828 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4829 DataType::Type type = minmax->GetResultType();
4830 switch (type) {
4831 case DataType::Type::kInt32:
4832 case DataType::Type::kInt64:
4833 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4834 break;
4835 case DataType::Type::kFloat32:
4836 case DataType::Type::kFloat64:
4837 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4838 break;
4839 default:
4840 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4841 }
4842 }
4843
VisitMin(HMin * min)4844 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4845 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4846 }
4847
VisitMin(HMin * min)4848 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4849 GenerateMinMax(min, /*is_min*/ true);
4850 }
4851
VisitMax(HMax * max)4852 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4853 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4854 }
4855
VisitMax(HMax * max)4856 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4857 GenerateMinMax(max, /*is_min*/ false);
4858 }
4859
VisitAbs(HAbs * abs)4860 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4861 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4862 switch (abs->GetResultType()) {
4863 case DataType::Type::kInt32:
4864 case DataType::Type::kInt64:
4865 locations->SetInAt(0, Location::RequiresRegister());
4866 locations->SetOut(Location::SameAsFirstInput());
4867 locations->AddTemp(Location::RequiresRegister());
4868 break;
4869 case DataType::Type::kFloat32:
4870 case DataType::Type::kFloat64:
4871 locations->SetInAt(0, Location::RequiresFpuRegister());
4872 locations->SetOut(Location::SameAsFirstInput());
4873 locations->AddTemp(Location::RequiresFpuRegister());
4874 break;
4875 default:
4876 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4877 }
4878 }
4879
VisitAbs(HAbs * abs)4880 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4881 LocationSummary* locations = abs->GetLocations();
4882 switch (abs->GetResultType()) {
4883 case DataType::Type::kInt32: {
4884 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4885 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4886 // Create mask.
4887 __ movl(mask, out);
4888 __ sarl(mask, Immediate(31));
4889 // Add mask.
4890 __ addl(out, mask);
4891 __ xorl(out, mask);
4892 break;
4893 }
4894 case DataType::Type::kInt64: {
4895 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4896 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4897 // Create mask.
4898 __ movq(mask, out);
4899 __ sarq(mask, Immediate(63));
4900 // Add mask.
4901 __ addq(out, mask);
4902 __ xorq(out, mask);
4903 break;
4904 }
4905 case DataType::Type::kFloat32: {
4906 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4907 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4908 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4909 __ andps(out, mask);
4910 break;
4911 }
4912 case DataType::Type::kFloat64: {
4913 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4914 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4915 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4916 __ andpd(out, mask);
4917 break;
4918 }
4919 default:
4920 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4921 }
4922 }
4923
VisitDivZeroCheck(HDivZeroCheck * instruction)4924 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4925 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4926 locations->SetInAt(0, Location::Any());
4927 }
4928
VisitDivZeroCheck(HDivZeroCheck * instruction)4929 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4930 SlowPathCode* slow_path =
4931 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4932 codegen_->AddSlowPath(slow_path);
4933
4934 LocationSummary* locations = instruction->GetLocations();
4935 Location value = locations->InAt(0);
4936
4937 switch (instruction->GetType()) {
4938 case DataType::Type::kBool:
4939 case DataType::Type::kUint8:
4940 case DataType::Type::kInt8:
4941 case DataType::Type::kUint16:
4942 case DataType::Type::kInt16:
4943 case DataType::Type::kInt32: {
4944 if (value.IsRegister()) {
4945 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4946 __ j(kEqual, slow_path->GetEntryLabel());
4947 } else if (value.IsStackSlot()) {
4948 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4949 __ j(kEqual, slow_path->GetEntryLabel());
4950 } else {
4951 DCHECK(value.IsConstant()) << value;
4952 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4953 __ jmp(slow_path->GetEntryLabel());
4954 }
4955 }
4956 break;
4957 }
4958 case DataType::Type::kInt64: {
4959 if (value.IsRegister()) {
4960 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4961 __ j(kEqual, slow_path->GetEntryLabel());
4962 } else if (value.IsDoubleStackSlot()) {
4963 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4964 __ j(kEqual, slow_path->GetEntryLabel());
4965 } else {
4966 DCHECK(value.IsConstant()) << value;
4967 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4968 __ jmp(slow_path->GetEntryLabel());
4969 }
4970 }
4971 break;
4972 }
4973 default:
4974 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4975 }
4976 }
4977
HandleShift(HBinaryOperation * op)4978 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4979 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4980
4981 LocationSummary* locations =
4982 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4983
4984 switch (op->GetResultType()) {
4985 case DataType::Type::kInt32:
4986 case DataType::Type::kInt64: {
4987 locations->SetInAt(0, Location::RequiresRegister());
4988 // The shift count needs to be in CL.
4989 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4990 locations->SetOut(Location::SameAsFirstInput());
4991 break;
4992 }
4993 default:
4994 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4995 }
4996 }
4997
HandleShift(HBinaryOperation * op)4998 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4999 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5000
5001 LocationSummary* locations = op->GetLocations();
5002 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
5003 Location second = locations->InAt(1);
5004
5005 switch (op->GetResultType()) {
5006 case DataType::Type::kInt32: {
5007 if (second.IsRegister()) {
5008 CpuRegister second_reg = second.AsRegister<CpuRegister>();
5009 if (op->IsShl()) {
5010 __ shll(first_reg, second_reg);
5011 } else if (op->IsShr()) {
5012 __ sarl(first_reg, second_reg);
5013 } else {
5014 __ shrl(first_reg, second_reg);
5015 }
5016 } else {
5017 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5018 if (op->IsShl()) {
5019 __ shll(first_reg, imm);
5020 } else if (op->IsShr()) {
5021 __ sarl(first_reg, imm);
5022 } else {
5023 __ shrl(first_reg, imm);
5024 }
5025 }
5026 break;
5027 }
5028 case DataType::Type::kInt64: {
5029 if (second.IsRegister()) {
5030 CpuRegister second_reg = second.AsRegister<CpuRegister>();
5031 if (op->IsShl()) {
5032 __ shlq(first_reg, second_reg);
5033 } else if (op->IsShr()) {
5034 __ sarq(first_reg, second_reg);
5035 } else {
5036 __ shrq(first_reg, second_reg);
5037 }
5038 } else {
5039 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5040 if (op->IsShl()) {
5041 __ shlq(first_reg, imm);
5042 } else if (op->IsShr()) {
5043 __ sarq(first_reg, imm);
5044 } else {
5045 __ shrq(first_reg, imm);
5046 }
5047 }
5048 break;
5049 }
5050 default:
5051 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5052 UNREACHABLE();
5053 }
5054 }
5055
HandleRotate(HBinaryOperation * rotate)5056 void LocationsBuilderX86_64::HandleRotate(HBinaryOperation* rotate) {
5057 LocationSummary* locations =
5058 new (GetGraph()->GetAllocator()) LocationSummary(rotate, LocationSummary::kNoCall);
5059
5060 switch (rotate->GetResultType()) {
5061 case DataType::Type::kInt32:
5062 case DataType::Type::kInt64: {
5063 locations->SetInAt(0, Location::RequiresRegister());
5064 // The shift count needs to be in CL (unless it is a constant).
5065 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, rotate->InputAt(1)));
5066 locations->SetOut(Location::SameAsFirstInput());
5067 break;
5068 }
5069 default:
5070 LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5071 UNREACHABLE();
5072 }
5073 }
5074
HandleRotate(HBinaryOperation * rotate)5075 void InstructionCodeGeneratorX86_64::HandleRotate(HBinaryOperation* rotate) {
5076 LocationSummary* locations = rotate->GetLocations();
5077 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
5078 Location second = locations->InAt(1);
5079
5080 switch (rotate->GetResultType()) {
5081 case DataType::Type::kInt32:
5082 if (second.IsRegister()) {
5083 CpuRegister second_reg = second.AsRegister<CpuRegister>();
5084 if (rotate->IsRor()) {
5085 __ rorl(first_reg, second_reg);
5086 } else {
5087 DCHECK(rotate->IsRol());
5088 __ roll(first_reg, second_reg);
5089 }
5090 } else {
5091 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5092 if (rotate->IsRor()) {
5093 __ rorl(first_reg, imm);
5094 } else {
5095 DCHECK(rotate->IsRol());
5096 __ roll(first_reg, imm);
5097 }
5098 }
5099 break;
5100 case DataType::Type::kInt64:
5101 if (second.IsRegister()) {
5102 CpuRegister second_reg = second.AsRegister<CpuRegister>();
5103 if (rotate->IsRor()) {
5104 __ rorq(first_reg, second_reg);
5105 } else {
5106 DCHECK(rotate->IsRol());
5107 __ rolq(first_reg, second_reg);
5108 }
5109 } else {
5110 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5111 if (rotate->IsRor()) {
5112 __ rorq(first_reg, imm);
5113 } else {
5114 DCHECK(rotate->IsRol());
5115 __ rolq(first_reg, imm);
5116 }
5117 }
5118 break;
5119 default:
5120 LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5121 UNREACHABLE();
5122 }
5123 }
5124
VisitRol(HRol * rol)5125 void LocationsBuilderX86_64::VisitRol(HRol* rol) {
5126 HandleRotate(rol);
5127 }
5128
VisitRol(HRol * rol)5129 void InstructionCodeGeneratorX86_64::VisitRol(HRol* rol) {
5130 HandleRotate(rol);
5131 }
5132
VisitRor(HRor * ror)5133 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
5134 HandleRotate(ror);
5135 }
5136
VisitRor(HRor * ror)5137 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
5138 HandleRotate(ror);
5139 }
5140
VisitShl(HShl * shl)5141 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
5142 HandleShift(shl);
5143 }
5144
VisitShl(HShl * shl)5145 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
5146 HandleShift(shl);
5147 }
5148
VisitShr(HShr * shr)5149 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
5150 HandleShift(shr);
5151 }
5152
VisitShr(HShr * shr)5153 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
5154 HandleShift(shr);
5155 }
5156
VisitUShr(HUShr * ushr)5157 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
5158 HandleShift(ushr);
5159 }
5160
VisitUShr(HUShr * ushr)5161 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
5162 HandleShift(ushr);
5163 }
5164
VisitNewInstance(HNewInstance * instruction)5165 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
5166 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5167 instruction, LocationSummary::kCallOnMainOnly);
5168 InvokeRuntimeCallingConvention calling_convention;
5169 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5170 locations->SetOut(Location::RegisterLocation(RAX));
5171 }
5172
VisitNewInstance(HNewInstance * instruction)5173 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
5174 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5175 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5176 DCHECK(!codegen_->IsLeafMethod());
5177 }
5178
VisitNewArray(HNewArray * instruction)5179 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
5180 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5181 instruction, LocationSummary::kCallOnMainOnly);
5182 InvokeRuntimeCallingConvention calling_convention;
5183 locations->SetOut(Location::RegisterLocation(RAX));
5184 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5185 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5186 }
5187
VisitNewArray(HNewArray * instruction)5188 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
5189 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5190 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5191 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5192 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5193 DCHECK(!codegen_->IsLeafMethod());
5194 }
5195
VisitParameterValue(HParameterValue * instruction)5196 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
5197 LocationSummary* locations =
5198 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5199 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5200 if (location.IsStackSlot()) {
5201 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5202 } else if (location.IsDoubleStackSlot()) {
5203 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5204 }
5205 locations->SetOut(location);
5206 }
5207
VisitParameterValue(HParameterValue * instruction)5208 void InstructionCodeGeneratorX86_64::VisitParameterValue(
5209 [[maybe_unused]] HParameterValue* instruction) {
5210 // Nothing to do, the parameter is already at its location.
5211 }
5212
VisitCurrentMethod(HCurrentMethod * instruction)5213 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
5214 LocationSummary* locations =
5215 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5216 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5217 }
5218
VisitCurrentMethod(HCurrentMethod * instruction)5219 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
5220 [[maybe_unused]] HCurrentMethod* instruction) {
5221 // Nothing to do, the method is already at its location.
5222 }
5223
VisitClassTableGet(HClassTableGet * instruction)5224 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5225 LocationSummary* locations =
5226 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5227 locations->SetInAt(0, Location::RequiresRegister());
5228 locations->SetOut(Location::RequiresRegister());
5229 }
5230
VisitClassTableGet(HClassTableGet * instruction)5231 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5232 LocationSummary* locations = instruction->GetLocations();
5233 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5234 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5235 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
5236 __ movq(locations->Out().AsRegister<CpuRegister>(),
5237 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
5238 } else {
5239 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5240 instruction->GetIndex(), kX86_64PointerSize));
5241 __ movq(locations->Out().AsRegister<CpuRegister>(),
5242 Address(locations->InAt(0).AsRegister<CpuRegister>(),
5243 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
5244 __ movq(locations->Out().AsRegister<CpuRegister>(),
5245 Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
5246 }
5247 }
5248
VisitNot(HNot * not_)5249 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
5250 LocationSummary* locations =
5251 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5252 locations->SetInAt(0, Location::RequiresRegister());
5253 locations->SetOut(Location::SameAsFirstInput());
5254 }
5255
VisitNot(HNot * not_)5256 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
5257 LocationSummary* locations = not_->GetLocations();
5258 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5259 locations->Out().AsRegister<CpuRegister>().AsRegister());
5260 Location out = locations->Out();
5261 switch (not_->GetResultType()) {
5262 case DataType::Type::kInt32:
5263 __ notl(out.AsRegister<CpuRegister>());
5264 break;
5265
5266 case DataType::Type::kInt64:
5267 __ notq(out.AsRegister<CpuRegister>());
5268 break;
5269
5270 default:
5271 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5272 }
5273 }
5274
VisitBooleanNot(HBooleanNot * bool_not)5275 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5276 LocationSummary* locations =
5277 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5278 locations->SetInAt(0, Location::RequiresRegister());
5279 locations->SetOut(Location::SameAsFirstInput());
5280 }
5281
VisitBooleanNot(HBooleanNot * bool_not)5282 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5283 LocationSummary* locations = bool_not->GetLocations();
5284 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5285 locations->Out().AsRegister<CpuRegister>().AsRegister());
5286 Location out = locations->Out();
5287 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
5288 }
5289
VisitPhi(HPhi * instruction)5290 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
5291 LocationSummary* locations =
5292 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5293 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5294 locations->SetInAt(i, Location::Any());
5295 }
5296 locations->SetOut(Location::Any());
5297 }
5298
VisitPhi(HPhi * instruction)5299 void InstructionCodeGeneratorX86_64::VisitPhi([[maybe_unused]] HPhi* instruction) {
5300 LOG(FATAL) << "Unimplemented";
5301 }
5302
GenerateMemoryBarrier(MemBarrierKind kind)5303 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
5304 /*
5305 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
5306 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
5307 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5308 */
5309 switch (kind) {
5310 case MemBarrierKind::kAnyAny: {
5311 MemoryFence();
5312 break;
5313 }
5314 case MemBarrierKind::kAnyStore:
5315 case MemBarrierKind::kLoadAny:
5316 case MemBarrierKind::kStoreStore: {
5317 // nop
5318 break;
5319 }
5320 case MemBarrierKind::kNTStoreStore:
5321 // Non-Temporal Store/Store needs an explicit fence.
5322 MemoryFence(/* non-temporal= */ true);
5323 break;
5324 }
5325 }
5326
HandleFieldGet(HInstruction * instruction)5327 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
5328 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5329
5330 bool object_field_get_with_read_barrier =
5331 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5332 LocationSummary* locations =
5333 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5334 object_field_get_with_read_barrier
5335 ? LocationSummary::kCallOnSlowPath
5336 : LocationSummary::kNoCall);
5337 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5338 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5339 }
5340 // receiver_input
5341 locations->SetInAt(0, Location::RequiresRegister());
5342 if (DataType::IsFloatingPointType(instruction->GetType())) {
5343 locations->SetOut(Location::RequiresFpuRegister());
5344 } else {
5345 // The output overlaps for an object field get when read barriers are
5346 // enabled: we do not want the move to overwrite the object's location, as
5347 // we need it to emit the read barrier. For predicated instructions we can
5348 // always overlap since the output is SameAsFirst and the default value.
5349 locations->SetOut(
5350 Location::RequiresRegister(),
5351 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5352 }
5353 }
5354
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5355 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
5356 const FieldInfo& field_info) {
5357 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5358
5359 LocationSummary* locations = instruction->GetLocations();
5360 Location base_loc = locations->InAt(0);
5361 CpuRegister base = base_loc.AsRegister<CpuRegister>();
5362 Location out = locations->Out();
5363 bool is_volatile = field_info.IsVolatile();
5364 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5365 DataType::Type load_type = instruction->GetType();
5366 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5367
5368 if (load_type == DataType::Type::kReference) {
5369 // /* HeapReference<Object> */ out = *(base + offset)
5370 if (codegen_->EmitBakerReadBarrier()) {
5371 // Note that a potential implicit null check is handled in this
5372 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
5373 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5374 instruction, out, base, offset, /* needs_null_check= */ true);
5375 if (is_volatile) {
5376 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5377 }
5378 } else {
5379 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
5380 codegen_->MaybeRecordImplicitNullCheck(instruction);
5381 if (is_volatile) {
5382 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5383 }
5384 // If read barriers are enabled, emit read barriers other than
5385 // Baker's using a slow path (and also unpoison the loaded
5386 // reference, if heap poisoning is enabled).
5387 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5388 }
5389 } else {
5390 codegen_->LoadFromMemoryNoReference(load_type, out, Address(base, offset));
5391 codegen_->MaybeRecordImplicitNullCheck(instruction);
5392 if (is_volatile) {
5393 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5394 }
5395 }
5396 }
5397
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5398 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
5399 const FieldInfo& field_info,
5400 WriteBarrierKind write_barrier_kind) {
5401 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5402
5403 LocationSummary* locations =
5404 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5405 DataType::Type field_type = field_info.GetFieldType();
5406 bool is_volatile = field_info.IsVolatile();
5407 bool needs_write_barrier =
5408 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5409 bool check_gc_card =
5410 codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
5411
5412 locations->SetInAt(0, Location::RequiresRegister());
5413 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
5414 if (is_volatile) {
5415 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5416 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
5417 } else {
5418 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5419 }
5420 } else {
5421 if (is_volatile) {
5422 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5423 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
5424 } else {
5425 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5426 }
5427 }
5428
5429 // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
5430 // InstructionCodeGeneratorX86_64::HandleFieldSet, GenerateVarHandleSet due to `extra_temp_index`.
5431 if (needs_write_barrier ||
5432 check_gc_card ||
5433 (kPoisonHeapReferences && field_type == DataType::Type::kReference)) {
5434 // Temporary registers for the write barrier / reference poisoning.
5435 locations->AddRegisterTemps(2);
5436 }
5437 }
5438
Bswap(Location value,DataType::Type type,CpuRegister * temp)5439 void InstructionCodeGeneratorX86_64::Bswap(Location value,
5440 DataType::Type type,
5441 CpuRegister* temp) {
5442 switch (type) {
5443 case DataType::Type::kInt16:
5444 // This should sign-extend, even if reimplemented with an XCHG of 8-bit registers.
5445 __ bswapl(value.AsRegister<CpuRegister>());
5446 __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
5447 break;
5448 case DataType::Type::kUint16:
5449 // TODO: Can be done with an XCHG of 8-bit registers. This is straight from Quick.
5450 __ bswapl(value.AsRegister<CpuRegister>());
5451 __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
5452 break;
5453 case DataType::Type::kInt32:
5454 case DataType::Type::kUint32:
5455 __ bswapl(value.AsRegister<CpuRegister>());
5456 break;
5457 case DataType::Type::kInt64:
5458 case DataType::Type::kUint64:
5459 __ bswapq(value.AsRegister<CpuRegister>());
5460 break;
5461 case DataType::Type::kFloat32: {
5462 DCHECK_NE(temp, nullptr);
5463 __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
5464 __ bswapl(*temp);
5465 __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ false);
5466 break;
5467 }
5468 case DataType::Type::kFloat64: {
5469 DCHECK_NE(temp, nullptr);
5470 __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
5471 __ bswapq(*temp);
5472 __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ true);
5473 break;
5474 }
5475 default:
5476 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
5477 UNREACHABLE();
5478 }
5479 }
5480
HandleFieldSet(HInstruction * instruction,uint32_t value_index,uint32_t extra_temp_index,DataType::Type field_type,Address field_addr,CpuRegister base,bool is_volatile,bool is_atomic,bool value_can_be_null,bool byte_swap,WriteBarrierKind write_barrier_kind)5481 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5482 uint32_t value_index,
5483 uint32_t extra_temp_index,
5484 DataType::Type field_type,
5485 Address field_addr,
5486 CpuRegister base,
5487 bool is_volatile,
5488 bool is_atomic,
5489 bool value_can_be_null,
5490 bool byte_swap,
5491 WriteBarrierKind write_barrier_kind) {
5492 LocationSummary* locations = instruction->GetLocations();
5493 Location value = locations->InAt(value_index);
5494
5495 if (is_volatile) {
5496 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5497 }
5498
5499 bool maybe_record_implicit_null_check_done = false;
5500
5501 if (value.IsConstant()) {
5502 switch (field_type) {
5503 case DataType::Type::kBool:
5504 case DataType::Type::kUint8:
5505 case DataType::Type::kInt8:
5506 __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5507 break;
5508 case DataType::Type::kUint16:
5509 case DataType::Type::kInt16: {
5510 int16_t v = CodeGenerator::GetInt16ValueOf(value.GetConstant());
5511 if (byte_swap) {
5512 v = BSWAP(v);
5513 }
5514 __ movw(field_addr, Immediate(v));
5515 break;
5516 }
5517 case DataType::Type::kUint32:
5518 case DataType::Type::kInt32:
5519 case DataType::Type::kFloat32:
5520 case DataType::Type::kReference: {
5521 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5522 if (byte_swap) {
5523 v = BSWAP(v);
5524 }
5525 DCHECK_IMPLIES(field_type == DataType::Type::kReference, v == 0);
5526 // Note: if heap poisoning is enabled, no need to poison
5527 // (negate) `v` if it is a reference, as it would be null.
5528 __ movl(field_addr, Immediate(v));
5529 break;
5530 }
5531 case DataType::Type::kUint64:
5532 case DataType::Type::kInt64:
5533 case DataType::Type::kFloat64: {
5534 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5535 if (byte_swap) {
5536 v = BSWAP(v);
5537 }
5538 if (is_atomic) {
5539 // Move constant into a register, then atomically store the register to memory.
5540 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5541 __ movq(temp, Immediate(v));
5542 __ movq(field_addr, temp);
5543 } else {
5544 Address field_addr2 = Address::displace(field_addr, sizeof(int32_t));
5545 codegen_->MoveInt64ToAddress(field_addr, field_addr2, v, instruction);
5546 }
5547 maybe_record_implicit_null_check_done = true;
5548 break;
5549 }
5550 case DataType::Type::kVoid:
5551 LOG(FATAL) << "Unreachable type " << field_type;
5552 UNREACHABLE();
5553 }
5554 } else {
5555 if (byte_swap) {
5556 // Swap byte order in-place in the input register (we will restore it later).
5557 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5558 Bswap(value, field_type, &temp);
5559 }
5560
5561 switch (field_type) {
5562 case DataType::Type::kBool:
5563 case DataType::Type::kUint8:
5564 case DataType::Type::kInt8:
5565 __ movb(field_addr, value.AsRegister<CpuRegister>());
5566 break;
5567 case DataType::Type::kUint16:
5568 case DataType::Type::kInt16:
5569 __ movw(field_addr, value.AsRegister<CpuRegister>());
5570 break;
5571 case DataType::Type::kUint32:
5572 case DataType::Type::kInt32:
5573 case DataType::Type::kReference:
5574 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5575 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5576 __ movl(temp, value.AsRegister<CpuRegister>());
5577 __ PoisonHeapReference(temp);
5578 __ movl(field_addr, temp);
5579 } else {
5580 __ movl(field_addr, value.AsRegister<CpuRegister>());
5581 }
5582 break;
5583 case DataType::Type::kUint64:
5584 case DataType::Type::kInt64:
5585 __ movq(field_addr, value.AsRegister<CpuRegister>());
5586 break;
5587 case DataType::Type::kFloat32:
5588 __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5589 break;
5590 case DataType::Type::kFloat64:
5591 __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5592 break;
5593 case DataType::Type::kVoid:
5594 LOG(FATAL) << "Unreachable type " << field_type;
5595 UNREACHABLE();
5596 }
5597
5598 if (byte_swap) {
5599 // Restore byte order.
5600 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5601 Bswap(value, field_type, &temp);
5602 }
5603 }
5604
5605 if (!maybe_record_implicit_null_check_done) {
5606 codegen_->MaybeRecordImplicitNullCheck(instruction);
5607 }
5608
5609 bool needs_write_barrier =
5610 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5611 if (needs_write_barrier) {
5612 if (value.IsConstant()) {
5613 DCHECK(value.GetConstant()->IsNullConstant());
5614 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
5615 DCHECK_NE(extra_temp_index, 0u);
5616 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5617 CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5618 codegen_->MarkGCCard(temp, card, base);
5619 }
5620 } else {
5621 DCHECK_NE(extra_temp_index, 0u);
5622 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5623 CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5624 codegen_->MaybeMarkGCCard(
5625 temp,
5626 card,
5627 base,
5628 value.AsRegister<CpuRegister>(),
5629 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
5630 }
5631 } else if (codegen_->ShouldCheckGCCard(
5632 field_type, instruction->InputAt(value_index), write_barrier_kind)) {
5633 DCHECK_NE(extra_temp_index, 0u);
5634 DCHECK(value.IsRegister());
5635 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5636 CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5637 codegen_->CheckGCCardIsValid(temp, card, base);
5638 }
5639
5640 if (is_volatile) {
5641 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5642 }
5643 }
5644
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)5645 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5646 const FieldInfo& field_info,
5647 bool value_can_be_null,
5648 WriteBarrierKind write_barrier_kind) {
5649 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5650
5651 LocationSummary* locations = instruction->GetLocations();
5652 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
5653 bool is_volatile = field_info.IsVolatile();
5654 DataType::Type field_type = field_info.GetFieldType();
5655 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5656
5657 HandleFieldSet(instruction,
5658 /*value_index=*/ 1,
5659 /*extra_temp_index=*/ 1,
5660 field_type,
5661 Address(base, offset),
5662 base,
5663 is_volatile,
5664 /*is_atomic=*/ false,
5665 value_can_be_null,
5666 /*byte_swap=*/ false,
5667 write_barrier_kind);
5668 }
5669
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5670 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5671 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5672 }
5673
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5674 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5675 HandleFieldSet(instruction,
5676 instruction->GetFieldInfo(),
5677 instruction->GetValueCanBeNull(),
5678 instruction->GetWriteBarrierKind());
5679 }
5680
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5681 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5682 HandleFieldGet(instruction);
5683 }
5684
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5685 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5686 HandleFieldGet(instruction, instruction->GetFieldInfo());
5687 }
5688
VisitStaticFieldGet(HStaticFieldGet * instruction)5689 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5690 HandleFieldGet(instruction);
5691 }
5692
VisitStaticFieldGet(HStaticFieldGet * instruction)5693 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5694 HandleFieldGet(instruction, instruction->GetFieldInfo());
5695 }
5696
VisitStaticFieldSet(HStaticFieldSet * instruction)5697 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5698 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5699 }
5700
VisitStaticFieldSet(HStaticFieldSet * instruction)5701 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5702 HandleFieldSet(instruction,
5703 instruction->GetFieldInfo(),
5704 instruction->GetValueCanBeNull(),
5705 instruction->GetWriteBarrierKind());
5706 }
5707
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5708 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5709 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5710 }
5711
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5712 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5713 __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5714 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5715 }
5716
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5717 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5718 HUnresolvedInstanceFieldGet* instruction) {
5719 FieldAccessCallingConventionX86_64 calling_convention;
5720 codegen_->CreateUnresolvedFieldLocationSummary(
5721 instruction, instruction->GetFieldType(), calling_convention);
5722 }
5723
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5724 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5725 HUnresolvedInstanceFieldGet* instruction) {
5726 FieldAccessCallingConventionX86_64 calling_convention;
5727 codegen_->GenerateUnresolvedFieldAccess(instruction,
5728 instruction->GetFieldType(),
5729 instruction->GetFieldIndex(),
5730 instruction->GetDexPc(),
5731 calling_convention);
5732 }
5733
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5734 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5735 HUnresolvedInstanceFieldSet* instruction) {
5736 FieldAccessCallingConventionX86_64 calling_convention;
5737 codegen_->CreateUnresolvedFieldLocationSummary(
5738 instruction, instruction->GetFieldType(), calling_convention);
5739 }
5740
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5741 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5742 HUnresolvedInstanceFieldSet* instruction) {
5743 FieldAccessCallingConventionX86_64 calling_convention;
5744 codegen_->GenerateUnresolvedFieldAccess(instruction,
5745 instruction->GetFieldType(),
5746 instruction->GetFieldIndex(),
5747 instruction->GetDexPc(),
5748 calling_convention);
5749 }
5750
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5751 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5752 HUnresolvedStaticFieldGet* instruction) {
5753 FieldAccessCallingConventionX86_64 calling_convention;
5754 codegen_->CreateUnresolvedFieldLocationSummary(
5755 instruction, instruction->GetFieldType(), calling_convention);
5756 }
5757
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5758 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5759 HUnresolvedStaticFieldGet* instruction) {
5760 FieldAccessCallingConventionX86_64 calling_convention;
5761 codegen_->GenerateUnresolvedFieldAccess(instruction,
5762 instruction->GetFieldType(),
5763 instruction->GetFieldIndex(),
5764 instruction->GetDexPc(),
5765 calling_convention);
5766 }
5767
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5768 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5769 HUnresolvedStaticFieldSet* instruction) {
5770 FieldAccessCallingConventionX86_64 calling_convention;
5771 codegen_->CreateUnresolvedFieldLocationSummary(
5772 instruction, instruction->GetFieldType(), calling_convention);
5773 }
5774
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5775 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5776 HUnresolvedStaticFieldSet* instruction) {
5777 FieldAccessCallingConventionX86_64 calling_convention;
5778 codegen_->GenerateUnresolvedFieldAccess(instruction,
5779 instruction->GetFieldType(),
5780 instruction->GetFieldIndex(),
5781 instruction->GetDexPc(),
5782 calling_convention);
5783 }
5784
VisitNullCheck(HNullCheck * instruction)5785 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5786 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5787 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5788 ? Location::RequiresRegister()
5789 : Location::Any();
5790 locations->SetInAt(0, loc);
5791 }
5792
GenerateImplicitNullCheck(HNullCheck * instruction)5793 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5794 if (CanMoveNullCheckToUser(instruction)) {
5795 return;
5796 }
5797 LocationSummary* locations = instruction->GetLocations();
5798 Location obj = locations->InAt(0);
5799
5800 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5801 RecordPcInfo(instruction, instruction->GetDexPc());
5802 }
5803
GenerateExplicitNullCheck(HNullCheck * instruction)5804 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5805 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5806 AddSlowPath(slow_path);
5807
5808 LocationSummary* locations = instruction->GetLocations();
5809 Location obj = locations->InAt(0);
5810
5811 if (obj.IsRegister()) {
5812 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5813 } else if (obj.IsStackSlot()) {
5814 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5815 } else {
5816 DCHECK(obj.IsConstant()) << obj;
5817 DCHECK(obj.GetConstant()->IsNullConstant());
5818 __ jmp(slow_path->GetEntryLabel());
5819 return;
5820 }
5821 __ j(kEqual, slow_path->GetEntryLabel());
5822 }
5823
VisitNullCheck(HNullCheck * instruction)5824 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5825 codegen_->GenerateNullCheck(instruction);
5826 }
5827
VisitArrayGet(HArrayGet * instruction)5828 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5829 bool object_array_get_with_read_barrier =
5830 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5831 LocationSummary* locations =
5832 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5833 object_array_get_with_read_barrier
5834 ? LocationSummary::kCallOnSlowPath
5835 : LocationSummary::kNoCall);
5836 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5837 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5838 }
5839 locations->SetInAt(0, Location::RequiresRegister());
5840 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5841 if (DataType::IsFloatingPointType(instruction->GetType())) {
5842 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5843 } else {
5844 // The output overlaps for an object array get when read barriers
5845 // are enabled: we do not want the move to overwrite the array's
5846 // location, as we need it to emit the read barrier.
5847 locations->SetOut(
5848 Location::RequiresRegister(),
5849 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5850 }
5851 }
5852
VisitArrayGet(HArrayGet * instruction)5853 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5854 LocationSummary* locations = instruction->GetLocations();
5855 Location obj_loc = locations->InAt(0);
5856 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5857 Location index = locations->InAt(1);
5858 Location out_loc = locations->Out();
5859 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5860
5861 DataType::Type type = instruction->GetType();
5862 if (type == DataType::Type::kReference) {
5863 static_assert(
5864 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5865 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5866 // /* HeapReference<Object> */ out =
5867 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5868 if (codegen_->EmitBakerReadBarrier()) {
5869 // Note that a potential implicit null check is handled in this
5870 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5871 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5872 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5873 } else {
5874 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5875 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5876 codegen_->MaybeRecordImplicitNullCheck(instruction);
5877 // If read barriers are enabled, emit read barriers other than
5878 // Baker's using a slow path (and also unpoison the loaded
5879 // reference, if heap poisoning is enabled).
5880 if (index.IsConstant()) {
5881 uint32_t offset =
5882 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5883 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5884 } else {
5885 codegen_->MaybeGenerateReadBarrierSlow(
5886 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5887 }
5888 }
5889 } else {
5890 if (type == DataType::Type::kUint16
5891 && mirror::kUseStringCompression
5892 && instruction->IsStringCharAt()) {
5893 // Branch cases into compressed and uncompressed for each index's type.
5894 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5895 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5896 NearLabel done, not_compressed;
5897 __ testb(Address(obj, count_offset), Immediate(1));
5898 codegen_->MaybeRecordImplicitNullCheck(instruction);
5899 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5900 "Expecting 0=compressed, 1=uncompressed");
5901 __ j(kNotZero, ¬_compressed);
5902 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5903 __ jmp(&done);
5904 __ Bind(¬_compressed);
5905 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5906 __ Bind(&done);
5907 } else {
5908 ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
5909 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, scale, data_offset);
5910 codegen_->LoadFromMemoryNoReference(type, out_loc, src);
5911 }
5912 codegen_->MaybeRecordImplicitNullCheck(instruction);
5913 }
5914 }
5915
VisitArraySet(HArraySet * instruction)5916 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5917 DataType::Type value_type = instruction->GetComponentType();
5918
5919 WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5920 bool needs_write_barrier =
5921 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5922 bool check_gc_card =
5923 codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
5924 bool needs_type_check = instruction->NeedsTypeCheck();
5925
5926 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5927 instruction,
5928 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5929
5930 locations->SetInAt(0, Location::RequiresRegister());
5931 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5932 if (DataType::IsFloatingPointType(value_type)) {
5933 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5934 } else {
5935 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5936 }
5937
5938 if (needs_write_barrier || check_gc_card) {
5939 // Used by reference poisoning, type checking, emitting write barrier, or checking write
5940 // barrier.
5941 locations->AddTemp(Location::RequiresRegister());
5942 // Only used when emitting a write barrier, or when checking for the card table.
5943 locations->AddTemp(Location::RequiresRegister());
5944 } else if ((kPoisonHeapReferences && value_type == DataType::Type::kReference) ||
5945 instruction->NeedsTypeCheck()) {
5946 // Used for poisoning or type checking.
5947 locations->AddTemp(Location::RequiresRegister());
5948 }
5949 }
5950
VisitArraySet(HArraySet * instruction)5951 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5952 LocationSummary* locations = instruction->GetLocations();
5953 Location array_loc = locations->InAt(0);
5954 CpuRegister array = array_loc.AsRegister<CpuRegister>();
5955 Location index = locations->InAt(1);
5956 Location value = locations->InAt(2);
5957 DataType::Type value_type = instruction->GetComponentType();
5958 bool needs_type_check = instruction->NeedsTypeCheck();
5959 const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5960 bool needs_write_barrier =
5961 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5962
5963 switch (value_type) {
5964 case DataType::Type::kBool:
5965 case DataType::Type::kUint8:
5966 case DataType::Type::kInt8: {
5967 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5968 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5969 if (value.IsRegister()) {
5970 __ movb(address, value.AsRegister<CpuRegister>());
5971 } else {
5972 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5973 }
5974 codegen_->MaybeRecordImplicitNullCheck(instruction);
5975 break;
5976 }
5977
5978 case DataType::Type::kUint16:
5979 case DataType::Type::kInt16: {
5980 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5981 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5982 if (value.IsRegister()) {
5983 __ movw(address, value.AsRegister<CpuRegister>());
5984 } else {
5985 DCHECK(value.IsConstant()) << value;
5986 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5987 }
5988 codegen_->MaybeRecordImplicitNullCheck(instruction);
5989 break;
5990 }
5991
5992 case DataType::Type::kReference: {
5993 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5994 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5995
5996 if (!value.IsRegister()) {
5997 // Just setting null.
5998 DCHECK(instruction->InputAt(2)->IsNullConstant());
5999 DCHECK(value.IsConstant()) << value;
6000 __ movl(address, Immediate(0));
6001 codegen_->MaybeRecordImplicitNullCheck(instruction);
6002 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6003 // We need to set a write barrier here even though we are writing null, since this write
6004 // barrier is being relied on.
6005 DCHECK(needs_write_barrier);
6006 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6007 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6008 codegen_->MarkGCCard(temp, card, array);
6009 }
6010 DCHECK(!needs_type_check);
6011 break;
6012 }
6013
6014 CpuRegister register_value = value.AsRegister<CpuRegister>();
6015 const bool can_value_be_null = instruction->GetValueCanBeNull();
6016 // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
6017 // value is null (without an extra CompareAndBranchIfZero since we already checked if the
6018 // value is null for the type check).
6019 const bool skip_marking_gc_card =
6020 can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
6021 NearLabel do_store;
6022 NearLabel skip_writing_card;
6023 if (can_value_be_null) {
6024 __ testl(register_value, register_value);
6025 if (skip_marking_gc_card) {
6026 __ j(kEqual, &skip_writing_card);
6027 } else {
6028 __ j(kEqual, &do_store);
6029 }
6030 }
6031
6032 SlowPathCode* slow_path = nullptr;
6033 if (needs_type_check) {
6034 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
6035 codegen_->AddSlowPath(slow_path);
6036
6037 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6038 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6039 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6040
6041 // Note that when Baker read barriers are enabled, the type
6042 // checks are performed without read barriers. This is fine,
6043 // even in the case where a class object is in the from-space
6044 // after the flip, as a comparison involving such a type would
6045 // not produce a false positive; it may of course produce a
6046 // false negative, in which case we would take the ArraySet
6047 // slow path.
6048
6049 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6050 // /* HeapReference<Class> */ temp = array->klass_
6051 __ movl(temp, Address(array, class_offset));
6052 codegen_->MaybeRecordImplicitNullCheck(instruction);
6053 __ MaybeUnpoisonHeapReference(temp);
6054
6055 // /* HeapReference<Class> */ temp = temp->component_type_
6056 __ movl(temp, Address(temp, component_offset));
6057 // If heap poisoning is enabled, no need to unpoison `temp`
6058 // nor the object reference in `register_value->klass`, as
6059 // we are comparing two poisoned references.
6060 __ cmpl(temp, Address(register_value, class_offset));
6061
6062 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6063 NearLabel do_put;
6064 __ j(kEqual, &do_put);
6065 // If heap poisoning is enabled, the `temp` reference has
6066 // not been unpoisoned yet; unpoison it now.
6067 __ MaybeUnpoisonHeapReference(temp);
6068
6069 // If heap poisoning is enabled, no need to unpoison the
6070 // heap reference loaded below, as it is only used for a
6071 // comparison with null.
6072 __ cmpl(Address(temp, super_offset), Immediate(0));
6073 __ j(kNotEqual, slow_path->GetEntryLabel());
6074 __ Bind(&do_put);
6075 } else {
6076 __ j(kNotEqual, slow_path->GetEntryLabel());
6077 }
6078 }
6079
6080 if (can_value_be_null && !skip_marking_gc_card) {
6081 DCHECK(do_store.IsLinked());
6082 __ Bind(&do_store);
6083 }
6084
6085 if (needs_write_barrier) {
6086 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6087 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6088 codegen_->MarkGCCard(temp, card, array);
6089 } else if (codegen_->ShouldCheckGCCard(
6090 value_type, instruction->GetValue(), write_barrier_kind)) {
6091 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6092 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6093 codegen_->CheckGCCardIsValid(temp, card, array);
6094 }
6095
6096 if (skip_marking_gc_card) {
6097 // Note that we don't check that the GC card is valid as it can be correctly clean.
6098 DCHECK(skip_writing_card.IsLinked());
6099 __ Bind(&skip_writing_card);
6100 }
6101
6102 Location source = value;
6103 if (kPoisonHeapReferences) {
6104 Location temp_loc = locations->GetTemp(0);
6105 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6106 __ movl(temp, register_value);
6107 __ PoisonHeapReference(temp);
6108 source = temp_loc;
6109 }
6110
6111 __ movl(address, source.AsRegister<CpuRegister>());
6112
6113 if (can_value_be_null || !needs_type_check) {
6114 codegen_->MaybeRecordImplicitNullCheck(instruction);
6115 }
6116
6117 if (slow_path != nullptr) {
6118 __ Bind(slow_path->GetExitLabel());
6119 }
6120
6121 break;
6122 }
6123
6124 case DataType::Type::kInt32: {
6125 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6126 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6127 if (value.IsRegister()) {
6128 __ movl(address, value.AsRegister<CpuRegister>());
6129 } else {
6130 DCHECK(value.IsConstant()) << value;
6131 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6132 __ movl(address, Immediate(v));
6133 }
6134 codegen_->MaybeRecordImplicitNullCheck(instruction);
6135 break;
6136 }
6137
6138 case DataType::Type::kInt64: {
6139 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6140 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6141 if (value.IsRegister()) {
6142 __ movq(address, value.AsRegister<CpuRegister>());
6143 codegen_->MaybeRecordImplicitNullCheck(instruction);
6144 } else {
6145 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
6146 Address address_high =
6147 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6148 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6149 }
6150 break;
6151 }
6152
6153 case DataType::Type::kFloat32: {
6154 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6155 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6156 if (value.IsFpuRegister()) {
6157 __ movss(address, value.AsFpuRegister<XmmRegister>());
6158 } else {
6159 DCHECK(value.IsConstant());
6160 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6161 __ movl(address, Immediate(v));
6162 }
6163 codegen_->MaybeRecordImplicitNullCheck(instruction);
6164 break;
6165 }
6166
6167 case DataType::Type::kFloat64: {
6168 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6169 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6170 if (value.IsFpuRegister()) {
6171 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6172 codegen_->MaybeRecordImplicitNullCheck(instruction);
6173 } else {
6174 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6175 Address address_high =
6176 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6177 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6178 }
6179 break;
6180 }
6181
6182 case DataType::Type::kUint32:
6183 case DataType::Type::kUint64:
6184 case DataType::Type::kVoid:
6185 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6186 UNREACHABLE();
6187 }
6188 }
6189
VisitArrayLength(HArrayLength * instruction)6190 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
6191 LocationSummary* locations =
6192 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6193 locations->SetInAt(0, Location::RequiresRegister());
6194 if (!instruction->IsEmittedAtUseSite()) {
6195 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6196 }
6197 }
6198
VisitArrayLength(HArrayLength * instruction)6199 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
6200 if (instruction->IsEmittedAtUseSite()) {
6201 return;
6202 }
6203
6204 LocationSummary* locations = instruction->GetLocations();
6205 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6206 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
6207 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
6208 __ movl(out, Address(obj, offset));
6209 codegen_->MaybeRecordImplicitNullCheck(instruction);
6210 // Mask out most significant bit in case the array is String's array of char.
6211 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6212 __ shrl(out, Immediate(1));
6213 }
6214 }
6215
VisitBoundsCheck(HBoundsCheck * instruction)6216 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6217 RegisterSet caller_saves = RegisterSet::Empty();
6218 InvokeRuntimeCallingConvention calling_convention;
6219 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6220 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6221 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6222 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6223 HInstruction* length = instruction->InputAt(1);
6224 if (!length->IsEmittedAtUseSite()) {
6225 locations->SetInAt(1, Location::RegisterOrConstant(length));
6226 }
6227 }
6228
VisitBoundsCheck(HBoundsCheck * instruction)6229 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6230 LocationSummary* locations = instruction->GetLocations();
6231 Location index_loc = locations->InAt(0);
6232 Location length_loc = locations->InAt(1);
6233 SlowPathCode* slow_path =
6234 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
6235
6236 if (length_loc.IsConstant()) {
6237 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6238 if (index_loc.IsConstant()) {
6239 // BCE will remove the bounds check if we are guarenteed to pass.
6240 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6241 if (index < 0 || index >= length) {
6242 codegen_->AddSlowPath(slow_path);
6243 __ jmp(slow_path->GetEntryLabel());
6244 } else {
6245 // Some optimization after BCE may have generated this, and we should not
6246 // generate a bounds check if it is a valid range.
6247 }
6248 return;
6249 }
6250
6251 // We have to reverse the jump condition because the length is the constant.
6252 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
6253 __ cmpl(index_reg, Immediate(length));
6254 codegen_->AddSlowPath(slow_path);
6255 __ j(kAboveEqual, slow_path->GetEntryLabel());
6256 } else {
6257 HInstruction* array_length = instruction->InputAt(1);
6258 if (array_length->IsEmittedAtUseSite()) {
6259 // Address the length field in the array.
6260 DCHECK(array_length->IsArrayLength());
6261 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6262 Location array_loc = array_length->GetLocations()->InAt(0);
6263 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
6264 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6265 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6266 // the string compression flag) with the in-memory length and avoid the temporary.
6267 CpuRegister length_reg = CpuRegister(TMP);
6268 __ movl(length_reg, array_len);
6269 codegen_->MaybeRecordImplicitNullCheck(array_length);
6270 __ shrl(length_reg, Immediate(1));
6271 codegen_->GenerateIntCompare(length_reg, index_loc);
6272 } else {
6273 // Checking the bound for general case:
6274 // Array of char or String's array when the compression feature off.
6275 if (index_loc.IsConstant()) {
6276 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6277 __ cmpl(array_len, Immediate(value));
6278 } else {
6279 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
6280 }
6281 codegen_->MaybeRecordImplicitNullCheck(array_length);
6282 }
6283 } else {
6284 codegen_->GenerateIntCompare(length_loc, index_loc);
6285 }
6286 codegen_->AddSlowPath(slow_path);
6287 __ j(kBelowEqual, slow_path->GetEntryLabel());
6288 }
6289 }
6290
MaybeMarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool emit_null_check)6291 void CodeGeneratorX86_64::MaybeMarkGCCard(CpuRegister temp,
6292 CpuRegister card,
6293 CpuRegister object,
6294 CpuRegister value,
6295 bool emit_null_check) {
6296 NearLabel is_null;
6297 if (emit_null_check) {
6298 __ testl(value, value);
6299 __ j(kEqual, &is_null);
6300 }
6301 MarkGCCard(temp, card, object);
6302 if (emit_null_check) {
6303 __ Bind(&is_null);
6304 }
6305 }
6306
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object)6307 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object) {
6308 // Load the address of the card table into `card`.
6309 __ gs()->movq(card,
6310 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6311 /* no_rip= */ true));
6312 // Calculate the offset (in the card table) of the card corresponding to `object`.
6313 __ movq(temp, object);
6314 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6315 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
6316 // `object`'s card.
6317 //
6318 // Register `card` contains the address of the card table. Note that the card
6319 // table's base is biased during its creation so that it always starts at an
6320 // address whose least-significant byte is equal to `kCardDirty` (see
6321 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
6322 // below writes the `kCardDirty` (byte) value into the `object`'s card
6323 // (located at `card + object >> kCardShift`).
6324 //
6325 // This dual use of the value in register `card` (1. to calculate the location
6326 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
6327 // (no need to explicitly load `kCardDirty` as an immediate value).
6328 __ movb(Address(temp, card, TIMES_1, 0), card);
6329 }
6330
CheckGCCardIsValid(CpuRegister temp,CpuRegister card,CpuRegister object)6331 void CodeGeneratorX86_64::CheckGCCardIsValid(CpuRegister temp,
6332 CpuRegister card,
6333 CpuRegister object) {
6334 NearLabel done;
6335 // Load the address of the card table into `card`.
6336 __ gs()->movq(card,
6337 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6338 /* no_rip= */ true));
6339 // Calculate the offset (in the card table) of the card corresponding to `object`.
6340 __ movq(temp, object);
6341 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6342 // assert (!clean || !self->is_gc_marking)
6343 __ cmpb(Address(temp, card, TIMES_1, 0), Immediate(gc::accounting::CardTable::kCardClean));
6344 __ j(kNotEqual, &done);
6345 __ gs()->cmpl(
6346 Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
6347 Immediate(0));
6348 __ j(kEqual, &done);
6349 __ int3();
6350 __ Bind(&done);
6351 }
6352
VisitParallelMove(HParallelMove * instruction)6353 void LocationsBuilderX86_64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6354 LOG(FATAL) << "Unimplemented";
6355 }
6356
VisitParallelMove(HParallelMove * instruction)6357 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
6358 if (instruction->GetNext()->IsSuspendCheck() &&
6359 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6360 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6361 // The back edge will generate the suspend check.
6362 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6363 }
6364
6365 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6366 }
6367
VisitSuspendCheck(HSuspendCheck * instruction)6368 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6369 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6370 instruction, LocationSummary::kCallOnSlowPath);
6371 // In suspend check slow path, usually there are no caller-save registers at all.
6372 // If SIMD instructions are present, however, we force spilling all live SIMD
6373 // registers in full width (since the runtime only saves/restores lower part).
6374 locations->SetCustomSlowPathCallerSaves(
6375 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6376 }
6377
VisitSuspendCheck(HSuspendCheck * instruction)6378 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6379 HBasicBlock* block = instruction->GetBlock();
6380 if (block->GetLoopInformation() != nullptr) {
6381 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6382 // The back edge will generate the suspend check.
6383 return;
6384 }
6385 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6386 // The goto will generate the suspend check.
6387 return;
6388 }
6389 GenerateSuspendCheck(instruction, nullptr);
6390 }
6391
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6392 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
6393 HBasicBlock* successor) {
6394 SuspendCheckSlowPathX86_64* slow_path =
6395 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
6396 if (slow_path == nullptr) {
6397 slow_path =
6398 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
6399 instruction->SetSlowPath(slow_path);
6400 codegen_->AddSlowPath(slow_path);
6401 if (successor != nullptr) {
6402 DCHECK(successor->IsLoopHeader());
6403 }
6404 } else {
6405 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6406 }
6407
6408 __ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
6409 /* no_rip= */ true),
6410 Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6411 if (successor == nullptr) {
6412 __ j(kNotZero, slow_path->GetEntryLabel());
6413 __ Bind(slow_path->GetReturnLabel());
6414 } else {
6415 __ j(kZero, codegen_->GetLabelOf(successor));
6416 __ jmp(slow_path->GetEntryLabel());
6417 }
6418 }
6419
GetAssembler() const6420 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
6421 return codegen_->GetAssembler();
6422 }
6423
EmitMove(size_t index)6424 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
6425 MoveOperands* move = moves_[index];
6426 Location source = move->GetSource();
6427 Location destination = move->GetDestination();
6428
6429 if (source.IsRegister()) {
6430 if (destination.IsRegister()) {
6431 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
6432 } else if (destination.IsStackSlot()) {
6433 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
6434 source.AsRegister<CpuRegister>());
6435 } else {
6436 DCHECK(destination.IsDoubleStackSlot());
6437 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
6438 source.AsRegister<CpuRegister>());
6439 }
6440 } else if (source.IsStackSlot()) {
6441 if (destination.IsRegister()) {
6442 __ movl(destination.AsRegister<CpuRegister>(),
6443 Address(CpuRegister(RSP), source.GetStackIndex()));
6444 } else if (destination.IsFpuRegister()) {
6445 __ movss(destination.AsFpuRegister<XmmRegister>(),
6446 Address(CpuRegister(RSP), source.GetStackIndex()));
6447 } else {
6448 DCHECK(destination.IsStackSlot());
6449 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6450 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6451 }
6452 } else if (source.IsDoubleStackSlot()) {
6453 if (destination.IsRegister()) {
6454 __ movq(destination.AsRegister<CpuRegister>(),
6455 Address(CpuRegister(RSP), source.GetStackIndex()));
6456 } else if (destination.IsFpuRegister()) {
6457 __ movsd(destination.AsFpuRegister<XmmRegister>(),
6458 Address(CpuRegister(RSP), source.GetStackIndex()));
6459 } else {
6460 DCHECK(destination.IsDoubleStackSlot()) << destination;
6461 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6462 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6463 }
6464 } else if (source.IsSIMDStackSlot()) {
6465 if (destination.IsFpuRegister()) {
6466 __ movups(destination.AsFpuRegister<XmmRegister>(),
6467 Address(CpuRegister(RSP), source.GetStackIndex()));
6468 } else {
6469 DCHECK(destination.IsSIMDStackSlot());
6470 size_t high = kX86_64WordSize;
6471 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6472 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6473 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
6474 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
6475 }
6476 } else if (source.IsConstant()) {
6477 HConstant* constant = source.GetConstant();
6478 if (constant->IsIntConstant() || constant->IsNullConstant()) {
6479 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6480 if (destination.IsRegister()) {
6481 if (value == 0) {
6482 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6483 } else {
6484 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
6485 }
6486 } else {
6487 DCHECK(destination.IsStackSlot()) << destination;
6488 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
6489 }
6490 } else if (constant->IsLongConstant()) {
6491 int64_t value = constant->AsLongConstant()->GetValue();
6492 if (destination.IsRegister()) {
6493 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
6494 } else {
6495 DCHECK(destination.IsDoubleStackSlot()) << destination;
6496 codegen_->Store64BitValueToStack(destination, value);
6497 }
6498 } else if (constant->IsFloatConstant()) {
6499 float fp_value = constant->AsFloatConstant()->GetValue();
6500 if (destination.IsFpuRegister()) {
6501 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6502 codegen_->Load32BitValue(dest, fp_value);
6503 } else {
6504 DCHECK(destination.IsStackSlot()) << destination;
6505 Immediate imm(bit_cast<int32_t, float>(fp_value));
6506 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
6507 }
6508 } else {
6509 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
6510 double fp_value = constant->AsDoubleConstant()->GetValue();
6511 int64_t value = bit_cast<int64_t, double>(fp_value);
6512 if (destination.IsFpuRegister()) {
6513 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6514 codegen_->Load64BitValue(dest, fp_value);
6515 } else {
6516 DCHECK(destination.IsDoubleStackSlot()) << destination;
6517 codegen_->Store64BitValueToStack(destination, value);
6518 }
6519 }
6520 } else if (source.IsFpuRegister()) {
6521 if (destination.IsFpuRegister()) {
6522 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6523 } else if (destination.IsStackSlot()) {
6524 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
6525 source.AsFpuRegister<XmmRegister>());
6526 } else if (destination.IsDoubleStackSlot()) {
6527 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
6528 source.AsFpuRegister<XmmRegister>());
6529 } else {
6530 DCHECK(destination.IsSIMDStackSlot());
6531 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
6532 source.AsFpuRegister<XmmRegister>());
6533 }
6534 }
6535 }
6536
Exchange32(CpuRegister reg,int mem)6537 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
6538 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6539 __ movl(Address(CpuRegister(RSP), mem), reg);
6540 __ movl(reg, CpuRegister(TMP));
6541 }
6542
Exchange64(CpuRegister reg1,CpuRegister reg2)6543 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
6544 __ movq(CpuRegister(TMP), reg1);
6545 __ movq(reg1, reg2);
6546 __ movq(reg2, CpuRegister(TMP));
6547 }
6548
Exchange64(CpuRegister reg,int mem)6549 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
6550 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6551 __ movq(Address(CpuRegister(RSP), mem), reg);
6552 __ movq(reg, CpuRegister(TMP));
6553 }
6554
Exchange32(XmmRegister reg,int mem)6555 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
6556 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6557 __ movss(Address(CpuRegister(RSP), mem), reg);
6558 __ movd(reg, CpuRegister(TMP));
6559 }
6560
Exchange64(XmmRegister reg,int mem)6561 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
6562 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6563 __ movsd(Address(CpuRegister(RSP), mem), reg);
6564 __ movd(reg, CpuRegister(TMP));
6565 }
6566
Exchange128(XmmRegister reg,int mem)6567 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
6568 size_t extra_slot = 2 * kX86_64WordSize;
6569 __ subq(CpuRegister(RSP), Immediate(extra_slot));
6570 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
6571 ExchangeMemory64(0, mem + extra_slot, 2);
6572 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
6573 __ addq(CpuRegister(RSP), Immediate(extra_slot));
6574 }
6575
ExchangeMemory32(int mem1,int mem2)6576 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
6577 ScratchRegisterScope ensure_scratch(
6578 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6579
6580 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6581 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
6582 __ movl(CpuRegister(ensure_scratch.GetRegister()),
6583 Address(CpuRegister(RSP), mem2 + stack_offset));
6584 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
6585 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
6586 CpuRegister(ensure_scratch.GetRegister()));
6587 }
6588
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)6589 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
6590 ScratchRegisterScope ensure_scratch(
6591 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6592
6593 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6594
6595 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6596 for (int i = 0; i < num_of_qwords; i++) {
6597 __ movq(CpuRegister(TMP),
6598 Address(CpuRegister(RSP), mem1 + stack_offset));
6599 __ movq(CpuRegister(ensure_scratch.GetRegister()),
6600 Address(CpuRegister(RSP), mem2 + stack_offset));
6601 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
6602 CpuRegister(TMP));
6603 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
6604 CpuRegister(ensure_scratch.GetRegister()));
6605 stack_offset += kX86_64WordSize;
6606 }
6607 }
6608
EmitSwap(size_t index)6609 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
6610 MoveOperands* move = moves_[index];
6611 Location source = move->GetSource();
6612 Location destination = move->GetDestination();
6613
6614 if (source.IsRegister() && destination.IsRegister()) {
6615 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6616 } else if (source.IsRegister() && destination.IsStackSlot()) {
6617 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6618 } else if (source.IsStackSlot() && destination.IsRegister()) {
6619 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6620 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6621 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6622 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6623 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6624 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6625 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6626 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6627 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6628 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6629 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6630 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6631 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6632 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6633 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6634 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6635 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6636 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6637 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6638 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6639 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6640 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6641 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6642 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6643 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6644 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6645 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6646 } else {
6647 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6648 }
6649 }
6650
6651
SpillScratch(int reg)6652 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6653 __ pushq(CpuRegister(reg));
6654 }
6655
6656
RestoreScratch(int reg)6657 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6658 __ popq(CpuRegister(reg));
6659 }
6660
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6661 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6662 SlowPathCode* slow_path, CpuRegister class_reg) {
6663 __ cmpb(Address(class_reg, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
6664 __ j(kBelow, slow_path->GetEntryLabel());
6665 __ Bind(slow_path->GetExitLabel());
6666 }
6667
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6668 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6669 CpuRegister temp) {
6670 uint32_t path_to_root = check->GetBitstringPathToRoot();
6671 uint32_t mask = check->GetBitstringMask();
6672 DCHECK(IsPowerOfTwo(mask + 1));
6673 size_t mask_bits = WhichPowerOf2(mask + 1);
6674
6675 if (mask_bits == 16u) {
6676 // Compare the bitstring in memory.
6677 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6678 } else {
6679 // /* uint32_t */ temp = temp->status_
6680 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6681 // Compare the bitstring bits using SUB.
6682 __ subl(temp, Immediate(path_to_root));
6683 // Shift out bits that do not contribute to the comparison.
6684 __ shll(temp, Immediate(32u - mask_bits));
6685 }
6686 }
6687
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6688 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6689 HLoadClass::LoadKind desired_class_load_kind) {
6690 switch (desired_class_load_kind) {
6691 case HLoadClass::LoadKind::kInvalid:
6692 LOG(FATAL) << "UNREACHABLE";
6693 UNREACHABLE();
6694 case HLoadClass::LoadKind::kReferrersClass:
6695 break;
6696 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6697 case HLoadClass::LoadKind::kBootImageRelRo:
6698 case HLoadClass::LoadKind::kAppImageRelRo:
6699 case HLoadClass::LoadKind::kBssEntry:
6700 case HLoadClass::LoadKind::kBssEntryPublic:
6701 case HLoadClass::LoadKind::kBssEntryPackage:
6702 DCHECK(!GetCompilerOptions().IsJitCompiler());
6703 break;
6704 case HLoadClass::LoadKind::kJitBootImageAddress:
6705 case HLoadClass::LoadKind::kJitTableAddress:
6706 DCHECK(GetCompilerOptions().IsJitCompiler());
6707 break;
6708 case HLoadClass::LoadKind::kRuntimeCall:
6709 break;
6710 }
6711 return desired_class_load_kind;
6712 }
6713
VisitLoadClass(HLoadClass * cls)6714 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6715 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6716 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6717 // Custom calling convention: RAX serves as both input and output.
6718 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6719 cls,
6720 Location::RegisterLocation(RAX),
6721 Location::RegisterLocation(RAX));
6722 return;
6723 }
6724 DCHECK_EQ(cls->NeedsAccessCheck(),
6725 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6726 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6727
6728 const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
6729 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6730 ? LocationSummary::kCallOnSlowPath
6731 : LocationSummary::kNoCall;
6732 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6733 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6734 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6735 }
6736
6737 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6738 locations->SetInAt(0, Location::RequiresRegister());
6739 }
6740 locations->SetOut(Location::RequiresRegister());
6741 if (load_kind == HLoadClass::LoadKind::kBssEntry ||
6742 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6743 load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
6744 if (codegen_->EmitNonBakerReadBarrier()) {
6745 // For non-Baker read barrier we have a temp-clobbering call.
6746 } else {
6747 // Rely on the type resolution and/or initialization to save everything.
6748 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6749 }
6750 }
6751 }
6752
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6753 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6754 dex::TypeIndex type_index,
6755 Handle<mirror::Class> handle) {
6756 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6757 // Add a patch entry and return the label.
6758 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6759 PatchInfo<Label>* info = &jit_class_patches_.back();
6760 return &info->label;
6761 }
6762
6763 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6764 // move.
VisitLoadClass(HLoadClass * cls)6765 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6766 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6767 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6768 codegen_->GenerateLoadClassRuntimeCall(cls);
6769 return;
6770 }
6771 DCHECK_EQ(cls->NeedsAccessCheck(),
6772 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6773 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6774
6775 LocationSummary* locations = cls->GetLocations();
6776 Location out_loc = locations->Out();
6777 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6778
6779 const ReadBarrierOption read_barrier_option =
6780 cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
6781 bool generate_null_check = false;
6782 switch (load_kind) {
6783 case HLoadClass::LoadKind::kReferrersClass: {
6784 DCHECK(!cls->CanCallRuntime());
6785 DCHECK(!cls->MustGenerateClinitCheck());
6786 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6787 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6788 GenerateGcRootFieldLoad(
6789 cls,
6790 out_loc,
6791 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6792 /* fixup_label= */ nullptr,
6793 read_barrier_option);
6794 break;
6795 }
6796 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6797 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6798 codegen_->GetCompilerOptions().IsBootImageExtension());
6799 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6800 __ leal(out,
6801 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6802 codegen_->RecordBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6803 break;
6804 case HLoadClass::LoadKind::kBootImageRelRo: {
6805 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6806 __ movl(out,
6807 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6808 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
6809 break;
6810 }
6811 case HLoadClass::LoadKind::kAppImageRelRo: {
6812 DCHECK(codegen_->GetCompilerOptions().IsAppImage());
6813 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6814 __ movl(out,
6815 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6816 codegen_->RecordAppImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6817 break;
6818 }
6819 case HLoadClass::LoadKind::kBssEntry:
6820 case HLoadClass::LoadKind::kBssEntryPublic:
6821 case HLoadClass::LoadKind::kBssEntryPackage: {
6822 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6823 /* no_rip= */ false);
6824 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6825 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6826 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6827 // No need for memory fence, thanks to the x86-64 memory model.
6828 generate_null_check = true;
6829 break;
6830 }
6831 case HLoadClass::LoadKind::kJitBootImageAddress: {
6832 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6833 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6834 DCHECK_NE(address, 0u);
6835 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6836 break;
6837 }
6838 case HLoadClass::LoadKind::kJitTableAddress: {
6839 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6840 /* no_rip= */ true);
6841 Label* fixup_label =
6842 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6843 // /* GcRoot<mirror::Class> */ out = *address
6844 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6845 break;
6846 }
6847 default:
6848 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6849 UNREACHABLE();
6850 }
6851
6852 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6853 DCHECK(cls->CanCallRuntime());
6854 SlowPathCode* slow_path =
6855 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6856 codegen_->AddSlowPath(slow_path);
6857 if (generate_null_check) {
6858 __ testl(out, out);
6859 __ j(kEqual, slow_path->GetEntryLabel());
6860 }
6861 if (cls->MustGenerateClinitCheck()) {
6862 GenerateClassInitializationCheck(slow_path, out);
6863 } else {
6864 __ Bind(slow_path->GetExitLabel());
6865 }
6866 }
6867 }
6868
VisitClinitCheck(HClinitCheck * check)6869 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6870 LocationSummary* locations =
6871 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6872 locations->SetInAt(0, Location::RequiresRegister());
6873 if (check->HasUses()) {
6874 locations->SetOut(Location::SameAsFirstInput());
6875 }
6876 // Rely on the type initialization to save everything we need.
6877 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6878 }
6879
VisitLoadMethodHandle(HLoadMethodHandle * load)6880 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6881 // Custom calling convention: RAX serves as both input and output.
6882 Location location = Location::RegisterLocation(RAX);
6883 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6884 }
6885
VisitLoadMethodHandle(HLoadMethodHandle * load)6886 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6887 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6888 }
6889
NewJitRootMethodTypePatch(const DexFile & dex_file,dex::ProtoIndex proto_index,Handle<mirror::MethodType> handle)6890 Label* CodeGeneratorX86_64::NewJitRootMethodTypePatch(const DexFile& dex_file,
6891 dex::ProtoIndex proto_index,
6892 Handle<mirror::MethodType> handle) {
6893 ReserveJitMethodTypeRoot(ProtoReference(&dex_file, proto_index), handle);
6894 // Add a patch entry and return the label.
6895 jit_method_type_patches_.emplace_back(&dex_file, proto_index.index_);
6896 PatchInfo<Label>* info = &jit_method_type_patches_.back();
6897 return &info->label;
6898 }
6899
VisitLoadMethodType(HLoadMethodType * load)6900 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6901 LocationSummary* locations =
6902 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
6903 if (load->GetLoadKind() == HLoadMethodType::LoadKind::kRuntimeCall) {
6904 Location location = Location::RegisterLocation(RAX);
6905 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6906 } else {
6907 locations->SetOut(Location::RequiresRegister());
6908 if (load->GetLoadKind() == HLoadMethodType::LoadKind::kBssEntry) {
6909 if (codegen_->EmitNonBakerReadBarrier()) {
6910 // For non-Baker read barrier we have a temp-clobbering call.
6911 } else {
6912 // Rely on the pResolveMethodType to save everything.
6913 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6914 }
6915 }
6916 }
6917 }
6918
VisitLoadMethodType(HLoadMethodType * load)6919 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6920 LocationSummary* locations = load->GetLocations();
6921 Location out_loc = locations->Out();
6922 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6923
6924 switch (load->GetLoadKind()) {
6925 case HLoadMethodType::LoadKind::kBssEntry: {
6926 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6927 /* no_rip= */ false);
6928 Label* fixup_label = codegen_->NewMethodTypeBssEntryPatch(load);
6929 // /* GcRoot<mirror::MethodType> */ out = *address /* PC-relative */
6930 GenerateGcRootFieldLoad(
6931 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6932 // No need for memory fence, thanks to the x86-64 memory model.
6933 SlowPathCode* slow_path =
6934 new (codegen_->GetScopedAllocator()) LoadMethodTypeSlowPathX86_64(load);
6935 codegen_->AddSlowPath(slow_path);
6936 __ testl(out, out);
6937 __ j(kEqual, slow_path->GetEntryLabel());
6938 __ Bind(slow_path->GetExitLabel());
6939 return;
6940 }
6941 case HLoadMethodType::LoadKind::kJitTableAddress: {
6942 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6943 /* no_rip= */ true);
6944 Handle<mirror::MethodType> method_type = load->GetMethodType();
6945 DCHECK(method_type != nullptr);
6946 Label* fixup_label = codegen_->NewJitRootMethodTypePatch(
6947 load->GetDexFile(), load->GetProtoIndex(), method_type);
6948 GenerateGcRootFieldLoad(
6949 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6950 return;
6951 }
6952 default:
6953 DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kRuntimeCall);
6954 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6955 break;
6956 }
6957 }
6958
VisitClinitCheck(HClinitCheck * check)6959 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6960 // We assume the class to not be null.
6961 SlowPathCode* slow_path =
6962 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6963 codegen_->AddSlowPath(slow_path);
6964 GenerateClassInitializationCheck(slow_path,
6965 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6966 }
6967
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6968 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6969 HLoadString::LoadKind desired_string_load_kind) {
6970 switch (desired_string_load_kind) {
6971 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6972 case HLoadString::LoadKind::kBootImageRelRo:
6973 case HLoadString::LoadKind::kBssEntry:
6974 DCHECK(!GetCompilerOptions().IsJitCompiler());
6975 break;
6976 case HLoadString::LoadKind::kJitBootImageAddress:
6977 case HLoadString::LoadKind::kJitTableAddress:
6978 DCHECK(GetCompilerOptions().IsJitCompiler());
6979 break;
6980 case HLoadString::LoadKind::kRuntimeCall:
6981 break;
6982 }
6983 return desired_string_load_kind;
6984 }
6985
VisitLoadString(HLoadString * load)6986 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6987 LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
6988 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6989 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6990 locations->SetOut(Location::RegisterLocation(RAX));
6991 } else {
6992 locations->SetOut(Location::RequiresRegister());
6993 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6994 if (codegen_->EmitNonBakerReadBarrier()) {
6995 // For non-Baker read barrier we have a temp-clobbering call.
6996 } else {
6997 // Rely on the pResolveString to save everything.
6998 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6999 }
7000 }
7001 }
7002 }
7003
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7004 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
7005 dex::StringIndex string_index,
7006 Handle<mirror::String> handle) {
7007 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7008 // Add a patch entry and return the label.
7009 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7010 PatchInfo<Label>* info = &jit_string_patches_.back();
7011 return &info->label;
7012 }
7013
7014 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7015 // move.
VisitLoadString(HLoadString * load)7016 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7017 LocationSummary* locations = load->GetLocations();
7018 Location out_loc = locations->Out();
7019 CpuRegister out = out_loc.AsRegister<CpuRegister>();
7020
7021 switch (load->GetLoadKind()) {
7022 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7023 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7024 codegen_->GetCompilerOptions().IsBootImageExtension());
7025 __ leal(out,
7026 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
7027 codegen_->RecordBootImageStringPatch(load);
7028 return;
7029 }
7030 case HLoadString::LoadKind::kBootImageRelRo: {
7031 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7032 __ movl(out,
7033 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
7034 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
7035 return;
7036 }
7037 case HLoadString::LoadKind::kBssEntry: {
7038 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
7039 /* no_rip= */ false);
7040 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7041 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
7042 GenerateGcRootFieldLoad(
7043 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7044 // No need for memory fence, thanks to the x86-64 memory model.
7045 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
7046 codegen_->AddSlowPath(slow_path);
7047 __ testl(out, out);
7048 __ j(kEqual, slow_path->GetEntryLabel());
7049 __ Bind(slow_path->GetExitLabel());
7050 return;
7051 }
7052 case HLoadString::LoadKind::kJitBootImageAddress: {
7053 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7054 DCHECK_NE(address, 0u);
7055 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
7056 return;
7057 }
7058 case HLoadString::LoadKind::kJitTableAddress: {
7059 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
7060 /* no_rip= */ true);
7061 Label* fixup_label = codegen_->NewJitRootStringPatch(
7062 load->GetDexFile(), load->GetStringIndex(), load->GetString());
7063 // /* GcRoot<mirror::String> */ out = *address
7064 GenerateGcRootFieldLoad(
7065 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7066 return;
7067 }
7068 default:
7069 break;
7070 }
7071
7072 // Custom calling convention: RAX serves as both input and output.
7073 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
7074 codegen_->InvokeRuntime(kQuickResolveString,
7075 load,
7076 load->GetDexPc());
7077 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7078 }
7079
GetExceptionTlsAddress()7080 static Address GetExceptionTlsAddress() {
7081 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
7082 /* no_rip= */ true);
7083 }
7084
VisitLoadException(HLoadException * load)7085 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
7086 LocationSummary* locations =
7087 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7088 locations->SetOut(Location::RequiresRegister());
7089 }
7090
VisitLoadException(HLoadException * load)7091 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
7092 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
7093 }
7094
VisitClearException(HClearException * clear)7095 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
7096 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7097 }
7098
VisitClearException(HClearException * clear)7099 void InstructionCodeGeneratorX86_64::VisitClearException([[maybe_unused]] HClearException* clear) {
7100 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
7101 }
7102
VisitThrow(HThrow * instruction)7103 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
7104 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7105 instruction, LocationSummary::kCallOnMainOnly);
7106 InvokeRuntimeCallingConvention calling_convention;
7107 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7108 }
7109
VisitThrow(HThrow * instruction)7110 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
7111 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7112 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7113 }
7114
7115 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7116 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7117 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7118 return 1;
7119 }
7120 if (emit_read_barrier &&
7121 !kUseBakerReadBarrier &&
7122 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7123 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7124 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7125 return 1;
7126 }
7127 return 0;
7128 }
7129
7130 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7131 // interface pointer, the current interface is compared in memory.
7132 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7133 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7134 return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
7135 }
7136
VisitInstanceOf(HInstanceOf * instruction)7137 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7138 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7139 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7140 bool baker_read_barrier_slow_path = false;
7141 switch (type_check_kind) {
7142 case TypeCheckKind::kExactCheck:
7143 case TypeCheckKind::kAbstractClassCheck:
7144 case TypeCheckKind::kClassHierarchyCheck:
7145 case TypeCheckKind::kArrayObjectCheck:
7146 case TypeCheckKind::kInterfaceCheck: {
7147 bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
7148 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7149 baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
7150 (type_check_kind != TypeCheckKind::kInterfaceCheck);
7151 break;
7152 }
7153 case TypeCheckKind::kArrayCheck:
7154 case TypeCheckKind::kUnresolvedCheck:
7155 call_kind = LocationSummary::kCallOnSlowPath;
7156 break;
7157 case TypeCheckKind::kBitstringCheck:
7158 break;
7159 }
7160
7161 LocationSummary* locations =
7162 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7163 if (baker_read_barrier_slow_path) {
7164 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7165 }
7166 locations->SetInAt(0, Location::RequiresRegister());
7167 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7168 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7169 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7170 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7171 } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7172 locations->SetInAt(1, Location::RequiresRegister());
7173 } else {
7174 locations->SetInAt(1, Location::Any());
7175 }
7176 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
7177 locations->SetOut(Location::RequiresRegister());
7178 locations->AddRegisterTemps(
7179 NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
7180 }
7181
VisitInstanceOf(HInstanceOf * instruction)7182 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7183 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7184 LocationSummary* locations = instruction->GetLocations();
7185 Location obj_loc = locations->InAt(0);
7186 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7187 Location cls = locations->InAt(1);
7188 Location out_loc = locations->Out();
7189 CpuRegister out = out_loc.AsRegister<CpuRegister>();
7190 const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
7191 DCHECK_LE(num_temps, 1u);
7192 Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
7193 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7194 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7195 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7196 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7197 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7198 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7199 const uint32_t object_array_data_offset =
7200 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7201 SlowPathCode* slow_path = nullptr;
7202 NearLabel done, zero;
7203
7204 // Return 0 if `obj` is null.
7205 // Avoid null check if we know obj is not null.
7206 if (instruction->MustDoNullCheck()) {
7207 __ testl(obj, obj);
7208 __ j(kEqual, &zero);
7209 }
7210
7211 switch (type_check_kind) {
7212 case TypeCheckKind::kExactCheck: {
7213 ReadBarrierOption read_barrier_option =
7214 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7215 // /* HeapReference<Class> */ out = obj->klass_
7216 GenerateReferenceLoadTwoRegisters(instruction,
7217 out_loc,
7218 obj_loc,
7219 class_offset,
7220 read_barrier_option);
7221 if (cls.IsRegister()) {
7222 __ cmpl(out, cls.AsRegister<CpuRegister>());
7223 } else {
7224 DCHECK(cls.IsStackSlot()) << cls;
7225 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7226 }
7227 if (zero.IsLinked()) {
7228 // Classes must be equal for the instanceof to succeed.
7229 __ j(kNotEqual, &zero);
7230 __ movl(out, Immediate(1));
7231 __ jmp(&done);
7232 } else {
7233 __ setcc(kEqual, out);
7234 // setcc only sets the low byte.
7235 __ andl(out, Immediate(1));
7236 }
7237 break;
7238 }
7239
7240 case TypeCheckKind::kAbstractClassCheck: {
7241 ReadBarrierOption read_barrier_option =
7242 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7243 // /* HeapReference<Class> */ out = obj->klass_
7244 GenerateReferenceLoadTwoRegisters(instruction,
7245 out_loc,
7246 obj_loc,
7247 class_offset,
7248 read_barrier_option);
7249 // If the class is abstract, we eagerly fetch the super class of the
7250 // object to avoid doing a comparison we know will fail.
7251 NearLabel loop, success;
7252 __ Bind(&loop);
7253 // /* HeapReference<Class> */ out = out->super_class_
7254 GenerateReferenceLoadOneRegister(instruction,
7255 out_loc,
7256 super_offset,
7257 maybe_temp_loc,
7258 read_barrier_option);
7259 __ testl(out, out);
7260 // If `out` is null, we use it for the result, and jump to `done`.
7261 __ j(kEqual, &done);
7262 if (cls.IsRegister()) {
7263 __ cmpl(out, cls.AsRegister<CpuRegister>());
7264 } else {
7265 DCHECK(cls.IsStackSlot()) << cls;
7266 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7267 }
7268 __ j(kNotEqual, &loop);
7269 __ movl(out, Immediate(1));
7270 if (zero.IsLinked()) {
7271 __ jmp(&done);
7272 }
7273 break;
7274 }
7275
7276 case TypeCheckKind::kClassHierarchyCheck: {
7277 ReadBarrierOption read_barrier_option =
7278 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7279 // /* HeapReference<Class> */ out = obj->klass_
7280 GenerateReferenceLoadTwoRegisters(instruction,
7281 out_loc,
7282 obj_loc,
7283 class_offset,
7284 read_barrier_option);
7285 // Walk over the class hierarchy to find a match.
7286 NearLabel loop, success;
7287 __ Bind(&loop);
7288 if (cls.IsRegister()) {
7289 __ cmpl(out, cls.AsRegister<CpuRegister>());
7290 } else {
7291 DCHECK(cls.IsStackSlot()) << cls;
7292 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7293 }
7294 __ j(kEqual, &success);
7295 // /* HeapReference<Class> */ out = out->super_class_
7296 GenerateReferenceLoadOneRegister(instruction,
7297 out_loc,
7298 super_offset,
7299 maybe_temp_loc,
7300 read_barrier_option);
7301 __ testl(out, out);
7302 __ j(kNotEqual, &loop);
7303 // If `out` is null, we use it for the result, and jump to `done`.
7304 __ jmp(&done);
7305 __ Bind(&success);
7306 __ movl(out, Immediate(1));
7307 if (zero.IsLinked()) {
7308 __ jmp(&done);
7309 }
7310 break;
7311 }
7312
7313 case TypeCheckKind::kArrayObjectCheck: {
7314 ReadBarrierOption read_barrier_option =
7315 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7316 // /* HeapReference<Class> */ out = obj->klass_
7317 GenerateReferenceLoadTwoRegisters(instruction,
7318 out_loc,
7319 obj_loc,
7320 class_offset,
7321 read_barrier_option);
7322 // Do an exact check.
7323 NearLabel exact_check;
7324 if (cls.IsRegister()) {
7325 __ cmpl(out, cls.AsRegister<CpuRegister>());
7326 } else {
7327 DCHECK(cls.IsStackSlot()) << cls;
7328 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7329 }
7330 __ j(kEqual, &exact_check);
7331 // Otherwise, we need to check that the object's class is a non-primitive array.
7332 // /* HeapReference<Class> */ out = out->component_type_
7333 GenerateReferenceLoadOneRegister(instruction,
7334 out_loc,
7335 component_offset,
7336 maybe_temp_loc,
7337 read_barrier_option);
7338 __ testl(out, out);
7339 // If `out` is null, we use it for the result, and jump to `done`.
7340 __ j(kEqual, &done);
7341 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7342 __ j(kNotEqual, &zero);
7343 __ Bind(&exact_check);
7344 __ movl(out, Immediate(1));
7345 __ jmp(&done);
7346 break;
7347 }
7348
7349 case TypeCheckKind::kArrayCheck: {
7350 // No read barrier since the slow path will retry upon failure.
7351 // /* HeapReference<Class> */ out = obj->klass_
7352 GenerateReferenceLoadTwoRegisters(instruction,
7353 out_loc,
7354 obj_loc,
7355 class_offset,
7356 kWithoutReadBarrier);
7357 if (cls.IsRegister()) {
7358 __ cmpl(out, cls.AsRegister<CpuRegister>());
7359 } else {
7360 DCHECK(cls.IsStackSlot()) << cls;
7361 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7362 }
7363 DCHECK(locations->OnlyCallsOnSlowPath());
7364 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7365 instruction, /* is_fatal= */ false);
7366 codegen_->AddSlowPath(slow_path);
7367 __ j(kNotEqual, slow_path->GetEntryLabel());
7368 __ movl(out, Immediate(1));
7369 if (zero.IsLinked()) {
7370 __ jmp(&done);
7371 }
7372 break;
7373 }
7374
7375 case TypeCheckKind::kInterfaceCheck: {
7376 if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
7377 DCHECK(locations->OnlyCallsOnSlowPath());
7378 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7379 instruction, /* is_fatal= */ false);
7380 codegen_->AddSlowPath(slow_path);
7381 if (codegen_->EmitNonBakerReadBarrier()) {
7382 __ jmp(slow_path->GetEntryLabel());
7383 break;
7384 }
7385 // For Baker read barrier, take the slow path while marking.
7386 __ gs()->cmpl(
7387 Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
7388 Immediate(0));
7389 __ j(kNotEqual, slow_path->GetEntryLabel());
7390 }
7391
7392 // Fast-path without read barriers.
7393 CpuRegister temp = maybe_temp_loc.AsRegister<CpuRegister>();
7394 // /* HeapReference<Class> */ temp = obj->klass_
7395 __ movl(temp, Address(obj, class_offset));
7396 __ MaybeUnpoisonHeapReference(temp);
7397 // /* HeapReference<Class> */ temp = temp->iftable_
7398 __ movl(temp, Address(temp, iftable_offset));
7399 __ MaybeUnpoisonHeapReference(temp);
7400 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7401 __ movl(out, Address(temp, array_length_offset));
7402 // Maybe poison the `cls` for direct comparison with memory.
7403 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7404 // Loop through the iftable and check if any class matches.
7405 NearLabel loop, end;
7406 __ Bind(&loop);
7407 // Check if we still have an entry to compare.
7408 __ subl(out, Immediate(2));
7409 __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end);
7410 // Go to next interface if the classes do not match.
7411 __ cmpl(cls.AsRegister<CpuRegister>(),
7412 CodeGeneratorX86_64::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset));
7413 __ j(kNotEqual, &loop);
7414 if (zero.IsLinked()) {
7415 __ movl(out, Immediate(1));
7416 // If `cls` was poisoned above, unpoison it.
7417 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7418 __ jmp(&done);
7419 if (kPoisonHeapReferences) {
7420 // The false case needs to unpoison the class before jumping to `zero`.
7421 __ Bind(&end);
7422 __ UnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7423 __ jmp(&zero);
7424 }
7425 } else {
7426 // To reduce branching, use the fact that the false case branches with a `-2` in `out`.
7427 __ movl(out, Immediate(-1));
7428 __ Bind(&end);
7429 __ addl(out, Immediate(2));
7430 // If `cls` was poisoned above, unpoison it.
7431 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7432 }
7433 break;
7434 }
7435
7436 case TypeCheckKind::kUnresolvedCheck: {
7437 // Note that we indeed only call on slow path, but we always go
7438 // into the slow path for the unresolved check case.
7439 //
7440 // We cannot directly call the InstanceofNonTrivial runtime
7441 // entry point without resorting to a type checking slow path
7442 // here (i.e. by calling InvokeRuntime directly), as it would
7443 // require to assign fixed registers for the inputs of this
7444 // HInstanceOf instruction (following the runtime calling
7445 // convention), which might be cluttered by the potential first
7446 // read barrier emission at the beginning of this method.
7447 //
7448 // TODO: Introduce a new runtime entry point taking the object
7449 // to test (instead of its class) as argument, and let it deal
7450 // with the read barrier issues. This will let us refactor this
7451 // case of the `switch` code as it was previously (with a direct
7452 // call to the runtime not using a type checking slow path).
7453 // This should also be beneficial for the other cases above.
7454 DCHECK(locations->OnlyCallsOnSlowPath());
7455 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7456 instruction, /* is_fatal= */ false);
7457 codegen_->AddSlowPath(slow_path);
7458 __ jmp(slow_path->GetEntryLabel());
7459 break;
7460 }
7461
7462 case TypeCheckKind::kBitstringCheck: {
7463 // /* HeapReference<Class> */ temp = obj->klass_
7464 GenerateReferenceLoadTwoRegisters(instruction,
7465 out_loc,
7466 obj_loc,
7467 class_offset,
7468 kWithoutReadBarrier);
7469
7470 GenerateBitstringTypeCheckCompare(instruction, out);
7471 if (zero.IsLinked()) {
7472 __ j(kNotEqual, &zero);
7473 __ movl(out, Immediate(1));
7474 __ jmp(&done);
7475 } else {
7476 __ setcc(kEqual, out);
7477 // setcc only sets the low byte.
7478 __ andl(out, Immediate(1));
7479 }
7480 break;
7481 }
7482 }
7483
7484 if (zero.IsLinked()) {
7485 __ Bind(&zero);
7486 __ xorl(out, out);
7487 }
7488
7489 if (done.IsLinked()) {
7490 __ Bind(&done);
7491 }
7492
7493 if (slow_path != nullptr) {
7494 __ Bind(slow_path->GetExitLabel());
7495 }
7496 }
7497
VisitCheckCast(HCheckCast * instruction)7498 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
7499 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7500 LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
7501 LocationSummary* locations =
7502 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7503 locations->SetInAt(0, Location::RequiresRegister());
7504 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7505 // Require a register for the interface check since there is a loop that compares the class to
7506 // a memory address.
7507 locations->SetInAt(1, Location::RequiresRegister());
7508 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7509 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7510 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7511 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7512 } else {
7513 locations->SetInAt(1, Location::Any());
7514 }
7515 locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
7516 }
7517
VisitCheckCast(HCheckCast * instruction)7518 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
7519 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7520 LocationSummary* locations = instruction->GetLocations();
7521 Location obj_loc = locations->InAt(0);
7522 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7523 Location cls = locations->InAt(1);
7524 Location temp_loc = locations->GetTemp(0);
7525 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
7526 const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
7527 DCHECK_GE(num_temps, 1u);
7528 DCHECK_LE(num_temps, 2u);
7529 Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
7530 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7531 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7532 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7533 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7534 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7535 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7536 const uint32_t object_array_data_offset =
7537 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7538
7539 bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
7540 SlowPathCode* type_check_slow_path =
7541 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7542 instruction, is_type_check_slow_path_fatal);
7543 codegen_->AddSlowPath(type_check_slow_path);
7544
7545
7546 NearLabel done;
7547 // Avoid null check if we know obj is not null.
7548 if (instruction->MustDoNullCheck()) {
7549 __ testl(obj, obj);
7550 __ j(kEqual, &done);
7551 }
7552
7553 switch (type_check_kind) {
7554 case TypeCheckKind::kExactCheck:
7555 case TypeCheckKind::kArrayCheck: {
7556 // /* HeapReference<Class> */ temp = obj->klass_
7557 GenerateReferenceLoadTwoRegisters(instruction,
7558 temp_loc,
7559 obj_loc,
7560 class_offset,
7561 kWithoutReadBarrier);
7562 if (cls.IsRegister()) {
7563 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7564 } else {
7565 DCHECK(cls.IsStackSlot()) << cls;
7566 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7567 }
7568 // Jump to slow path for throwing the exception or doing a
7569 // more involved array check.
7570 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7571 break;
7572 }
7573
7574 case TypeCheckKind::kAbstractClassCheck: {
7575 // /* HeapReference<Class> */ temp = obj->klass_
7576 GenerateReferenceLoadTwoRegisters(instruction,
7577 temp_loc,
7578 obj_loc,
7579 class_offset,
7580 kWithoutReadBarrier);
7581 // If the class is abstract, we eagerly fetch the super class of the
7582 // object to avoid doing a comparison we know will fail.
7583 NearLabel loop;
7584 __ Bind(&loop);
7585 // /* HeapReference<Class> */ temp = temp->super_class_
7586 GenerateReferenceLoadOneRegister(instruction,
7587 temp_loc,
7588 super_offset,
7589 maybe_temp2_loc,
7590 kWithoutReadBarrier);
7591
7592 // If the class reference currently in `temp` is null, jump to the slow path to throw the
7593 // exception.
7594 __ testl(temp, temp);
7595 // Otherwise, compare the classes.
7596 __ j(kZero, type_check_slow_path->GetEntryLabel());
7597 if (cls.IsRegister()) {
7598 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7599 } else {
7600 DCHECK(cls.IsStackSlot()) << cls;
7601 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7602 }
7603 __ j(kNotEqual, &loop);
7604 break;
7605 }
7606
7607 case TypeCheckKind::kClassHierarchyCheck: {
7608 // /* HeapReference<Class> */ temp = obj->klass_
7609 GenerateReferenceLoadTwoRegisters(instruction,
7610 temp_loc,
7611 obj_loc,
7612 class_offset,
7613 kWithoutReadBarrier);
7614 // Walk over the class hierarchy to find a match.
7615 NearLabel loop;
7616 __ Bind(&loop);
7617 if (cls.IsRegister()) {
7618 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7619 } else {
7620 DCHECK(cls.IsStackSlot()) << cls;
7621 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7622 }
7623 __ j(kEqual, &done);
7624
7625 // /* HeapReference<Class> */ temp = temp->super_class_
7626 GenerateReferenceLoadOneRegister(instruction,
7627 temp_loc,
7628 super_offset,
7629 maybe_temp2_loc,
7630 kWithoutReadBarrier);
7631
7632 // If the class reference currently in `temp` is not null, jump
7633 // back at the beginning of the loop.
7634 __ testl(temp, temp);
7635 __ j(kNotZero, &loop);
7636 // Otherwise, jump to the slow path to throw the exception.
7637 __ jmp(type_check_slow_path->GetEntryLabel());
7638 break;
7639 }
7640
7641 case TypeCheckKind::kArrayObjectCheck: {
7642 // /* HeapReference<Class> */ temp = obj->klass_
7643 GenerateReferenceLoadTwoRegisters(instruction,
7644 temp_loc,
7645 obj_loc,
7646 class_offset,
7647 kWithoutReadBarrier);
7648 // Do an exact check.
7649 NearLabel check_non_primitive_component_type;
7650 if (cls.IsRegister()) {
7651 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7652 } else {
7653 DCHECK(cls.IsStackSlot()) << cls;
7654 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7655 }
7656 __ j(kEqual, &done);
7657
7658 // Otherwise, we need to check that the object's class is a non-primitive array.
7659 // /* HeapReference<Class> */ temp = temp->component_type_
7660 GenerateReferenceLoadOneRegister(instruction,
7661 temp_loc,
7662 component_offset,
7663 maybe_temp2_loc,
7664 kWithoutReadBarrier);
7665
7666 // If the component type is not null (i.e. the object is indeed
7667 // an array), jump to label `check_non_primitive_component_type`
7668 // to further check that this component type is not a primitive
7669 // type.
7670 __ testl(temp, temp);
7671 // Otherwise, jump to the slow path to throw the exception.
7672 __ j(kZero, type_check_slow_path->GetEntryLabel());
7673 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7674 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7675 break;
7676 }
7677
7678 case TypeCheckKind::kUnresolvedCheck: {
7679 // We always go into the type check slow path for the unresolved case.
7680 //
7681 // We cannot directly call the CheckCast runtime entry point
7682 // without resorting to a type checking slow path here (i.e. by
7683 // calling InvokeRuntime directly), as it would require to
7684 // assign fixed registers for the inputs of this HInstanceOf
7685 // instruction (following the runtime calling convention), which
7686 // might be cluttered by the potential first read barrier
7687 // emission at the beginning of this method.
7688 __ jmp(type_check_slow_path->GetEntryLabel());
7689 break;
7690 }
7691
7692 case TypeCheckKind::kInterfaceCheck: {
7693 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7694 // We can not get false positives by doing this.
7695 // /* HeapReference<Class> */ temp = obj->klass_
7696 GenerateReferenceLoadTwoRegisters(instruction,
7697 temp_loc,
7698 obj_loc,
7699 class_offset,
7700 kWithoutReadBarrier);
7701
7702 // /* HeapReference<Class> */ temp = temp->iftable_
7703 GenerateReferenceLoadOneRegister(instruction,
7704 temp_loc,
7705 iftable_offset,
7706 maybe_temp2_loc,
7707 kWithoutReadBarrier);
7708 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7709 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
7710 // Maybe poison the `cls` for direct comparison with memory.
7711 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7712 // Loop through the iftable and check if any class matches.
7713 NearLabel start_loop;
7714 __ Bind(&start_loop);
7715 // Check if we still have an entry to compare.
7716 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
7717 __ j(kNegative, type_check_slow_path->GetEntryLabel());
7718 // Go to next interface if the classes do not match.
7719 __ cmpl(cls.AsRegister<CpuRegister>(),
7720 CodeGeneratorX86_64::ArrayAddress(temp,
7721 maybe_temp2_loc,
7722 TIMES_4,
7723 object_array_data_offset));
7724 __ j(kNotEqual, &start_loop); // Return if same class.
7725 // If `cls` was poisoned above, unpoison it.
7726 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7727 break;
7728 }
7729
7730 case TypeCheckKind::kBitstringCheck: {
7731 // /* HeapReference<Class> */ temp = obj->klass_
7732 GenerateReferenceLoadTwoRegisters(instruction,
7733 temp_loc,
7734 obj_loc,
7735 class_offset,
7736 kWithoutReadBarrier);
7737
7738 GenerateBitstringTypeCheckCompare(instruction, temp);
7739 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7740 break;
7741 }
7742 }
7743
7744 if (done.IsLinked()) {
7745 __ Bind(&done);
7746 }
7747
7748 __ Bind(type_check_slow_path->GetExitLabel());
7749 }
7750
VisitMonitorOperation(HMonitorOperation * instruction)7751 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7752 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7753 instruction, LocationSummary::kCallOnMainOnly);
7754 InvokeRuntimeCallingConvention calling_convention;
7755 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7756 }
7757
VisitMonitorOperation(HMonitorOperation * instruction)7758 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7759 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7760 instruction,
7761 instruction->GetDexPc());
7762 if (instruction->IsEnter()) {
7763 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7764 } else {
7765 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7766 }
7767 }
7768
VisitX86AndNot(HX86AndNot * instruction)7769 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7770 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7771 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7772 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7773 locations->SetInAt(0, Location::RequiresRegister());
7774 // There is no immediate variant of negated bitwise and in X86.
7775 locations->SetInAt(1, Location::RequiresRegister());
7776 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7777 }
7778
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7779 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7780 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7781 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7782 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7783 locations->SetInAt(0, Location::RequiresRegister());
7784 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7785 }
7786
VisitX86AndNot(HX86AndNot * instruction)7787 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7788 LocationSummary* locations = instruction->GetLocations();
7789 Location first = locations->InAt(0);
7790 Location second = locations->InAt(1);
7791 Location dest = locations->Out();
7792 __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7793 }
7794
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7795 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7796 LocationSummary* locations = instruction->GetLocations();
7797 Location src = locations->InAt(0);
7798 Location dest = locations->Out();
7799 switch (instruction->GetOpKind()) {
7800 case HInstruction::kAnd:
7801 __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7802 break;
7803 case HInstruction::kXor:
7804 __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7805 break;
7806 default:
7807 LOG(FATAL) << "Unreachable";
7808 }
7809 }
7810
VisitAnd(HAnd * instruction)7811 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7812 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7813 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7814
HandleBitwiseOperation(HBinaryOperation * instruction)7815 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7816 LocationSummary* locations =
7817 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7818 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7819 || instruction->GetResultType() == DataType::Type::kInt64);
7820 locations->SetInAt(0, Location::RequiresRegister());
7821 locations->SetInAt(1, Location::Any());
7822 locations->SetOut(Location::SameAsFirstInput());
7823 }
7824
VisitAnd(HAnd * instruction)7825 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7826 HandleBitwiseOperation(instruction);
7827 }
7828
VisitOr(HOr * instruction)7829 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7830 HandleBitwiseOperation(instruction);
7831 }
7832
VisitXor(HXor * instruction)7833 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7834 HandleBitwiseOperation(instruction);
7835 }
7836
HandleBitwiseOperation(HBinaryOperation * instruction)7837 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7838 LocationSummary* locations = instruction->GetLocations();
7839 Location first = locations->InAt(0);
7840 Location second = locations->InAt(1);
7841 DCHECK(first.Equals(locations->Out()));
7842
7843 if (instruction->GetResultType() == DataType::Type::kInt32) {
7844 if (second.IsRegister()) {
7845 if (instruction->IsAnd()) {
7846 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7847 } else if (instruction->IsOr()) {
7848 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7849 } else {
7850 DCHECK(instruction->IsXor());
7851 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7852 }
7853 } else if (second.IsConstant()) {
7854 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7855 if (instruction->IsAnd()) {
7856 __ andl(first.AsRegister<CpuRegister>(), imm);
7857 } else if (instruction->IsOr()) {
7858 __ orl(first.AsRegister<CpuRegister>(), imm);
7859 } else {
7860 DCHECK(instruction->IsXor());
7861 __ xorl(first.AsRegister<CpuRegister>(), imm);
7862 }
7863 } else {
7864 Address address(CpuRegister(RSP), second.GetStackIndex());
7865 if (instruction->IsAnd()) {
7866 __ andl(first.AsRegister<CpuRegister>(), address);
7867 } else if (instruction->IsOr()) {
7868 __ orl(first.AsRegister<CpuRegister>(), address);
7869 } else {
7870 DCHECK(instruction->IsXor());
7871 __ xorl(first.AsRegister<CpuRegister>(), address);
7872 }
7873 }
7874 } else {
7875 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7876 CpuRegister first_reg = first.AsRegister<CpuRegister>();
7877 bool second_is_constant = false;
7878 int64_t value = 0;
7879 if (second.IsConstant()) {
7880 second_is_constant = true;
7881 value = second.GetConstant()->AsLongConstant()->GetValue();
7882 }
7883 bool is_int32_value = IsInt<32>(value);
7884
7885 if (instruction->IsAnd()) {
7886 if (second_is_constant) {
7887 if (is_int32_value) {
7888 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7889 } else {
7890 __ andq(first_reg, codegen_->LiteralInt64Address(value));
7891 }
7892 } else if (second.IsDoubleStackSlot()) {
7893 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7894 } else {
7895 __ andq(first_reg, second.AsRegister<CpuRegister>());
7896 }
7897 } else if (instruction->IsOr()) {
7898 if (second_is_constant) {
7899 if (is_int32_value) {
7900 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7901 } else {
7902 __ orq(first_reg, codegen_->LiteralInt64Address(value));
7903 }
7904 } else if (second.IsDoubleStackSlot()) {
7905 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7906 } else {
7907 __ orq(first_reg, second.AsRegister<CpuRegister>());
7908 }
7909 } else {
7910 DCHECK(instruction->IsXor());
7911 if (second_is_constant) {
7912 if (is_int32_value) {
7913 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7914 } else {
7915 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7916 }
7917 } else if (second.IsDoubleStackSlot()) {
7918 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7919 } else {
7920 __ xorq(first_reg, second.AsRegister<CpuRegister>());
7921 }
7922 }
7923 }
7924 }
7925
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7926 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7927 HInstruction* instruction,
7928 Location out,
7929 uint32_t offset,
7930 Location maybe_temp,
7931 ReadBarrierOption read_barrier_option) {
7932 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7933 if (read_barrier_option == kWithReadBarrier) {
7934 DCHECK(codegen_->EmitReadBarrier());
7935 if (kUseBakerReadBarrier) {
7936 // Load with fast path based Baker's read barrier.
7937 // /* HeapReference<Object> */ out = *(out + offset)
7938 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7939 instruction, out, out_reg, offset, /* needs_null_check= */ false);
7940 } else {
7941 // Load with slow path based read barrier.
7942 // Save the value of `out` into `maybe_temp` before overwriting it
7943 // in the following move operation, as we will need it for the
7944 // read barrier below.
7945 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7946 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7947 // /* HeapReference<Object> */ out = *(out + offset)
7948 __ movl(out_reg, Address(out_reg, offset));
7949 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7950 }
7951 } else {
7952 // Plain load with no read barrier.
7953 // /* HeapReference<Object> */ out = *(out + offset)
7954 __ movl(out_reg, Address(out_reg, offset));
7955 __ MaybeUnpoisonHeapReference(out_reg);
7956 }
7957 }
7958
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7959 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7960 HInstruction* instruction,
7961 Location out,
7962 Location obj,
7963 uint32_t offset,
7964 ReadBarrierOption read_barrier_option) {
7965 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7966 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7967 if (read_barrier_option == kWithReadBarrier) {
7968 DCHECK(codegen_->EmitReadBarrier());
7969 if (kUseBakerReadBarrier) {
7970 // Load with fast path based Baker's read barrier.
7971 // /* HeapReference<Object> */ out = *(obj + offset)
7972 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7973 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7974 } else {
7975 // Load with slow path based read barrier.
7976 // /* HeapReference<Object> */ out = *(obj + offset)
7977 __ movl(out_reg, Address(obj_reg, offset));
7978 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7979 }
7980 } else {
7981 // Plain load with no read barrier.
7982 // /* HeapReference<Object> */ out = *(obj + offset)
7983 __ movl(out_reg, Address(obj_reg, offset));
7984 __ MaybeUnpoisonHeapReference(out_reg);
7985 }
7986 }
7987
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7988 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7989 HInstruction* instruction,
7990 Location root,
7991 const Address& address,
7992 Label* fixup_label,
7993 ReadBarrierOption read_barrier_option) {
7994 CpuRegister root_reg = root.AsRegister<CpuRegister>();
7995 if (read_barrier_option == kWithReadBarrier) {
7996 DCHECK(codegen_->EmitReadBarrier());
7997 if (kUseBakerReadBarrier) {
7998 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7999 // Baker's read barrier are used:
8000 //
8001 // root = obj.field;
8002 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
8003 // if (temp != null) {
8004 // root = temp(root)
8005 // }
8006
8007 // /* GcRoot<mirror::Object> */ root = *address
8008 __ movl(root_reg, address);
8009 if (fixup_label != nullptr) {
8010 __ Bind(fixup_label);
8011 }
8012 static_assert(
8013 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
8014 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
8015 "have different sizes.");
8016 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
8017 "art::mirror::CompressedReference<mirror::Object> and int32_t "
8018 "have different sizes.");
8019
8020 // Slow path marking the GC root `root`.
8021 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
8022 instruction, root, /* unpoison_ref_before_marking= */ false);
8023 codegen_->AddSlowPath(slow_path);
8024
8025 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
8026 const int32_t entry_point_offset =
8027 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
8028 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
8029 // The entrypoint is null when the GC is not marking.
8030 __ j(kNotEqual, slow_path->GetEntryLabel());
8031 __ Bind(slow_path->GetExitLabel());
8032 } else {
8033 // GC root loaded through a slow path for read barriers other
8034 // than Baker's.
8035 // /* GcRoot<mirror::Object>* */ root = address
8036 __ leaq(root_reg, address);
8037 if (fixup_label != nullptr) {
8038 __ Bind(fixup_label);
8039 }
8040 // /* mirror::Object* */ root = root->Read()
8041 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8042 }
8043 } else {
8044 // Plain GC root load with no read barrier.
8045 // /* GcRoot<mirror::Object> */ root = *address
8046 __ movl(root_reg, address);
8047 if (fixup_label != nullptr) {
8048 __ Bind(fixup_label);
8049 }
8050 // Note that GC roots are not affected by heap poisoning, thus we
8051 // do not have to unpoison `root_reg` here.
8052 }
8053 }
8054
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)8055 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8056 Location ref,
8057 CpuRegister obj,
8058 uint32_t offset,
8059 bool needs_null_check) {
8060 DCHECK(EmitBakerReadBarrier());
8061
8062 // /* HeapReference<Object> */ ref = *(obj + offset)
8063 Address src(obj, offset);
8064 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8065 }
8066
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)8067 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8068 Location ref,
8069 CpuRegister obj,
8070 uint32_t data_offset,
8071 Location index,
8072 bool needs_null_check) {
8073 DCHECK(EmitBakerReadBarrier());
8074
8075 static_assert(
8076 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8077 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8078 // /* HeapReference<Object> */ ref =
8079 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
8080 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
8081 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8082 }
8083
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)8084 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8085 Location ref,
8086 CpuRegister obj,
8087 const Address& src,
8088 bool needs_null_check,
8089 bool always_update_field,
8090 CpuRegister* temp1,
8091 CpuRegister* temp2) {
8092 DCHECK(EmitBakerReadBarrier());
8093
8094 // In slow path based read barriers, the read barrier call is
8095 // inserted after the original load. However, in fast path based
8096 // Baker's read barriers, we need to perform the load of
8097 // mirror::Object::monitor_ *before* the original reference load.
8098 // This load-load ordering is required by the read barrier.
8099 // The fast path/slow path (for Baker's algorithm) should look like:
8100 //
8101 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8102 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
8103 // HeapReference<Object> ref = *src; // Original reference load.
8104 // bool is_gray = (rb_state == ReadBarrier::GrayState());
8105 // if (is_gray) {
8106 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
8107 // }
8108 //
8109 // Note: the original implementation in ReadBarrier::Barrier is
8110 // slightly more complex as:
8111 // - it implements the load-load fence using a data dependency on
8112 // the high-bits of rb_state, which are expected to be all zeroes
8113 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
8114 // here, which is a no-op thanks to the x86-64 memory model);
8115 // - it performs additional checks that we do not do here for
8116 // performance reasons.
8117
8118 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
8119 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8120
8121 // Given the numeric representation, it's enough to check the low bit of the rb_state.
8122 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8123 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8124 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8125 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8126 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8127
8128 // if (rb_state == ReadBarrier::GrayState())
8129 // ref = ReadBarrier::Mark(ref);
8130 // At this point, just do the "if" and make sure that flags are preserved until the branch.
8131 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8132 if (needs_null_check) {
8133 MaybeRecordImplicitNullCheck(instruction);
8134 }
8135
8136 // Load fence to prevent load-load reordering.
8137 // Note that this is a no-op, thanks to the x86-64 memory model.
8138 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8139
8140 // The actual reference load.
8141 // /* HeapReference<Object> */ ref = *src
8142 __ movl(ref_reg, src); // Flags are unaffected.
8143
8144 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8145 // Slow path marking the object `ref` when it is gray.
8146 SlowPathCode* slow_path;
8147 if (always_update_field) {
8148 DCHECK(temp1 != nullptr);
8149 DCHECK(temp2 != nullptr);
8150 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
8151 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
8152 } else {
8153 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
8154 instruction, ref, /* unpoison_ref_before_marking= */ true);
8155 }
8156 AddSlowPath(slow_path);
8157
8158 // We have done the "if" of the gray bit check above, now branch based on the flags.
8159 __ j(kNotZero, slow_path->GetEntryLabel());
8160
8161 // Object* ref = ref_addr->AsMirrorPtr()
8162 __ MaybeUnpoisonHeapReference(ref_reg);
8163
8164 __ Bind(slow_path->GetExitLabel());
8165 }
8166
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8167 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
8168 Location out,
8169 Location ref,
8170 Location obj,
8171 uint32_t offset,
8172 Location index) {
8173 DCHECK(EmitReadBarrier());
8174
8175 // Insert a slow path based read barrier *after* the reference load.
8176 //
8177 // If heap poisoning is enabled, the unpoisoning of the loaded
8178 // reference will be carried out by the runtime within the slow
8179 // path.
8180 //
8181 // Note that `ref` currently does not get unpoisoned (when heap
8182 // poisoning is enabled), which is alright as the `ref` argument is
8183 // not used by the artReadBarrierSlow entry point.
8184 //
8185 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8186 SlowPathCode* slow_path = new (GetScopedAllocator())
8187 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
8188 AddSlowPath(slow_path);
8189
8190 __ jmp(slow_path->GetEntryLabel());
8191 __ Bind(slow_path->GetExitLabel());
8192 }
8193
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8194 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8195 Location out,
8196 Location ref,
8197 Location obj,
8198 uint32_t offset,
8199 Location index) {
8200 if (EmitReadBarrier()) {
8201 // Baker's read barriers shall be handled by the fast path
8202 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
8203 DCHECK(!kUseBakerReadBarrier);
8204 // If heap poisoning is enabled, unpoisoning will be taken care of
8205 // by the runtime within the slow path.
8206 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8207 } else if (kPoisonHeapReferences) {
8208 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
8209 }
8210 }
8211
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8212 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8213 Location out,
8214 Location root) {
8215 DCHECK(EmitReadBarrier());
8216
8217 // Insert a slow path based read barrier *after* the GC root load.
8218 //
8219 // Note that GC roots are not affected by heap poisoning, so we do
8220 // not need to do anything special for this here.
8221 SlowPathCode* slow_path =
8222 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
8223 AddSlowPath(slow_path);
8224
8225 __ jmp(slow_path->GetEntryLabel());
8226 __ Bind(slow_path->GetExitLabel());
8227 }
8228
VisitBoundType(HBoundType * instruction)8229 void LocationsBuilderX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8230 // Nothing to do, this should be removed during prepare for register allocator.
8231 LOG(FATAL) << "Unreachable";
8232 }
8233
VisitBoundType(HBoundType * instruction)8234 void InstructionCodeGeneratorX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8235 // Nothing to do, this should be removed during prepare for register allocator.
8236 LOG(FATAL) << "Unreachable";
8237 }
8238
8239 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8240 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8241 LocationSummary* locations =
8242 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8243 locations->SetInAt(0, Location::RequiresRegister());
8244 locations->AddRegisterTemps(2);
8245 }
8246
VisitPackedSwitch(HPackedSwitch * switch_instr)8247 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8248 int32_t lower_bound = switch_instr->GetStartValue();
8249 uint32_t num_entries = switch_instr->GetNumEntries();
8250 LocationSummary* locations = switch_instr->GetLocations();
8251 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
8252 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
8253 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
8254 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8255
8256 // Should we generate smaller inline compare/jumps?
8257 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8258 // Figure out the correct compare values and jump conditions.
8259 // Handle the first compare/branch as a special case because it might
8260 // jump to the default case.
8261 DCHECK_GT(num_entries, 2u);
8262 Condition first_condition;
8263 uint32_t index;
8264 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
8265 if (lower_bound != 0) {
8266 first_condition = kLess;
8267 __ cmpl(value_reg_in, Immediate(lower_bound));
8268 __ j(first_condition, codegen_->GetLabelOf(default_block));
8269 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8270
8271 index = 1;
8272 } else {
8273 // Handle all the compare/jumps below.
8274 first_condition = kBelow;
8275 index = 0;
8276 }
8277
8278 // Handle the rest of the compare/jumps.
8279 for (; index + 1 < num_entries; index += 2) {
8280 int32_t compare_to_value = lower_bound + index + 1;
8281 __ cmpl(value_reg_in, Immediate(compare_to_value));
8282 // Jump to successors[index] if value < case_value[index].
8283 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8284 // Jump to successors[index + 1] if value == case_value[index + 1].
8285 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8286 }
8287
8288 if (index != num_entries) {
8289 // There are an odd number of entries. Handle the last one.
8290 DCHECK_EQ(index + 1, num_entries);
8291 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
8292 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8293 }
8294
8295 // And the default for any other value.
8296 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
8297 __ jmp(codegen_->GetLabelOf(default_block));
8298 }
8299 return;
8300 }
8301
8302 // Remove the bias, if needed.
8303 Register value_reg_out = value_reg_in.AsRegister();
8304 if (lower_bound != 0) {
8305 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
8306 value_reg_out = temp_reg.AsRegister();
8307 }
8308 CpuRegister value_reg(value_reg_out);
8309
8310 // Is the value in range?
8311 __ cmpl(value_reg, Immediate(num_entries - 1));
8312 __ j(kAbove, codegen_->GetLabelOf(default_block));
8313
8314 // We are in the range of the table.
8315 // Load the address of the jump table in the constant area.
8316 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
8317
8318 // Load the (signed) offset from the jump table.
8319 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
8320
8321 // Add the offset to the address of the table base.
8322 __ addq(temp_reg, base_reg);
8323
8324 // And jump.
8325 __ jmp(temp_reg);
8326 }
8327
VisitIntermediateAddress(HIntermediateAddress * instruction)8328 void LocationsBuilderX86_64::VisitIntermediateAddress(
8329 [[maybe_unused]] HIntermediateAddress* instruction) {
8330 LOG(FATAL) << "Unreachable";
8331 }
8332
VisitIntermediateAddress(HIntermediateAddress * instruction)8333 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(
8334 [[maybe_unused]] HIntermediateAddress* instruction) {
8335 LOG(FATAL) << "Unreachable";
8336 }
8337
Load32BitValue(CpuRegister dest,int32_t value)8338 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
8339 if (value == 0) {
8340 __ xorl(dest, dest);
8341 } else {
8342 __ movl(dest, Immediate(value));
8343 }
8344 }
8345
Load64BitValue(CpuRegister dest,int64_t value)8346 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
8347 if (value == 0) {
8348 // Clears upper bits too.
8349 __ xorl(dest, dest);
8350 } else if (IsUint<32>(value)) {
8351 // We can use a 32 bit move, as it will zero-extend and is shorter.
8352 __ movl(dest, Immediate(static_cast<int32_t>(value)));
8353 } else {
8354 __ movq(dest, Immediate(value));
8355 }
8356 }
8357
Load32BitValue(XmmRegister dest,int32_t value)8358 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
8359 if (value == 0) {
8360 __ xorps(dest, dest);
8361 } else {
8362 __ movss(dest, LiteralInt32Address(value));
8363 }
8364 }
8365
Load64BitValue(XmmRegister dest,int64_t value)8366 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
8367 if (value == 0) {
8368 __ xorpd(dest, dest);
8369 } else {
8370 __ movsd(dest, LiteralInt64Address(value));
8371 }
8372 }
8373
Load32BitValue(XmmRegister dest,float value)8374 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
8375 Load32BitValue(dest, bit_cast<int32_t, float>(value));
8376 }
8377
Load64BitValue(XmmRegister dest,double value)8378 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
8379 Load64BitValue(dest, bit_cast<int64_t, double>(value));
8380 }
8381
Compare32BitValue(CpuRegister dest,int32_t value)8382 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
8383 if (value == 0) {
8384 __ testl(dest, dest);
8385 } else {
8386 __ cmpl(dest, Immediate(value));
8387 }
8388 }
8389
Compare64BitValue(CpuRegister dest,int64_t value)8390 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
8391 if (IsInt<32>(value)) {
8392 if (value == 0) {
8393 __ testq(dest, dest);
8394 } else {
8395 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
8396 }
8397 } else {
8398 // Value won't fit in an int.
8399 __ cmpq(dest, LiteralInt64Address(value));
8400 }
8401 }
8402
GenerateIntCompare(Location lhs,Location rhs)8403 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
8404 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8405 GenerateIntCompare(lhs_reg, rhs);
8406 }
8407
GenerateIntCompare(CpuRegister lhs,Location rhs)8408 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
8409 if (rhs.IsConstant()) {
8410 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8411 Compare32BitValue(lhs, value);
8412 } else if (rhs.IsStackSlot()) {
8413 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8414 } else {
8415 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
8416 }
8417 }
8418
GenerateLongCompare(Location lhs,Location rhs)8419 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
8420 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8421 if (rhs.IsConstant()) {
8422 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
8423 Compare64BitValue(lhs_reg, value);
8424 } else if (rhs.IsDoubleStackSlot()) {
8425 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8426 } else {
8427 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
8428 }
8429 }
8430
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)8431 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
8432 Location index,
8433 ScaleFactor scale,
8434 uint32_t data_offset) {
8435 return index.IsConstant()
8436 ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
8437 : Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
8438 }
8439
Store64BitValueToStack(Location dest,int64_t value)8440 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
8441 DCHECK(dest.IsDoubleStackSlot());
8442 if (IsInt<32>(value)) {
8443 // Can move directly as an int32 constant.
8444 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
8445 Immediate(static_cast<int32_t>(value)));
8446 } else {
8447 Load64BitValue(CpuRegister(TMP), value);
8448 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
8449 }
8450 }
8451
8452 /**
8453 * Class to handle late fixup of offsets into constant area.
8454 */
8455 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8456 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)8457 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
8458 : codegen_(&codegen), offset_into_constant_area_(offset) {}
8459
8460 protected:
SetOffset(size_t offset)8461 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8462
8463 CodeGeneratorX86_64* codegen_;
8464
8465 private:
Process(const MemoryRegion & region,int pos)8466 void Process(const MemoryRegion& region, int pos) override {
8467 // Patch the correct offset for the instruction. We use the address of the
8468 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
8469 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8470 int32_t relative_position = constant_offset - pos;
8471
8472 // Patch in the right value.
8473 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8474 }
8475
8476 // Location in constant area that the fixup refers to.
8477 size_t offset_into_constant_area_;
8478 };
8479
8480 /**
8481 t * Class to handle late fixup of offsets to a jump table that will be created in the
8482 * constant area.
8483 */
8484 class JumpTableRIPFixup : public RIPFixup {
8485 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)8486 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
8487 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
8488
CreateJumpTable()8489 void CreateJumpTable() {
8490 X86_64Assembler* assembler = codegen_->GetAssembler();
8491
8492 // Ensure that the reference to the jump table has the correct offset.
8493 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8494 SetOffset(offset_in_constant_table);
8495
8496 // Compute the offset from the start of the function to this jump table.
8497 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
8498
8499 // Populate the jump table with the correct values for the jump table.
8500 int32_t num_entries = switch_instr_->GetNumEntries();
8501 HBasicBlock* block = switch_instr_->GetBlock();
8502 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8503 // The value that we want is the target offset - the position of the table.
8504 for (int32_t i = 0; i < num_entries; i++) {
8505 HBasicBlock* b = successors[i];
8506 Label* l = codegen_->GetLabelOf(b);
8507 DCHECK(l->IsBound());
8508 int32_t offset_to_block = l->Position() - current_table_offset;
8509 assembler->AppendInt32(offset_to_block);
8510 }
8511 }
8512
8513 private:
8514 const HPackedSwitch* switch_instr_;
8515 };
8516
Finalize()8517 void CodeGeneratorX86_64::Finalize() {
8518 // Generate the constant area if needed.
8519 X86_64Assembler* assembler = GetAssembler();
8520 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8521 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
8522 assembler->Align(4, 0);
8523 constant_area_start_ = assembler->CodeSize();
8524
8525 // Populate any jump tables.
8526 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8527 jump_table->CreateJumpTable();
8528 }
8529
8530 // And now add the constant area to the generated code.
8531 assembler->AddConstantArea();
8532 }
8533
8534 // And finish up.
8535 CodeGenerator::Finalize();
8536 }
8537
LiteralDoubleAddress(double v)8538 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
8539 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
8540 return Address::RIP(fixup);
8541 }
8542
LiteralFloatAddress(float v)8543 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
8544 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
8545 return Address::RIP(fixup);
8546 }
8547
LiteralInt32Address(int32_t v)8548 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
8549 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
8550 return Address::RIP(fixup);
8551 }
8552
LiteralInt64Address(int64_t v)8553 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
8554 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
8555 return Address::RIP(fixup);
8556 }
8557
8558 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)8559 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
8560 if (!trg.IsValid()) {
8561 DCHECK_EQ(type, DataType::Type::kVoid);
8562 return;
8563 }
8564
8565 DCHECK_NE(type, DataType::Type::kVoid);
8566
8567 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
8568 if (trg.Equals(return_loc)) {
8569 return;
8570 }
8571
8572 // Let the parallel move resolver take care of all of this.
8573 HParallelMove parallel_move(GetGraph()->GetAllocator());
8574 parallel_move.AddMove(return_loc, trg, type, nullptr);
8575 GetMoveResolver()->EmitNativeCode(¶llel_move);
8576 }
8577
LiteralCaseTable(HPackedSwitch * switch_instr)8578 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
8579 // Create a fixup to be used to create and address the jump table.
8580 JumpTableRIPFixup* table_fixup =
8581 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8582
8583 // We have to populate the jump tables.
8584 fixups_to_jump_tables_.push_back(table_fixup);
8585 return Address::RIP(table_fixup);
8586 }
8587
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)8588 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
8589 const Address& addr_high,
8590 int64_t v,
8591 HInstruction* instruction) {
8592 if (IsInt<32>(v)) {
8593 int32_t v_32 = v;
8594 __ movq(addr_low, Immediate(v_32));
8595 MaybeRecordImplicitNullCheck(instruction);
8596 } else {
8597 // Didn't fit in a register. Do it in pieces.
8598 int32_t low_v = Low32Bits(v);
8599 int32_t high_v = High32Bits(v);
8600 __ movl(addr_low, Immediate(low_v));
8601 MaybeRecordImplicitNullCheck(instruction);
8602 __ movl(addr_high, Immediate(high_v));
8603 }
8604 }
8605
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8606 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
8607 const uint8_t* roots_data,
8608 const PatchInfo<Label>& info,
8609 uint64_t index_in_table) const {
8610 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8611 uintptr_t address =
8612 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8613 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8614 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8615 dchecked_integral_cast<uint32_t>(address);
8616 }
8617
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8618 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8619 for (const PatchInfo<Label>& info : jit_string_patches_) {
8620 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8621 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8622 PatchJitRootUse(code, roots_data, info, index_in_table);
8623 }
8624
8625 for (const PatchInfo<Label>& info : jit_class_patches_) {
8626 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8627 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8628 PatchJitRootUse(code, roots_data, info, index_in_table);
8629 }
8630
8631 for (const PatchInfo<Label>& info : jit_method_type_patches_) {
8632 ProtoReference proto_reference(info.target_dex_file, dex::ProtoIndex(info.offset_or_index));
8633 uint64_t index_in_table = GetJitMethodTypeRootIndex(proto_reference);
8634 PatchJitRootUse(code, roots_data, info, index_in_table);
8635 }
8636 }
8637
CpuHasAvxFeatureFlag()8638 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
8639 return codegen_->GetInstructionSetFeatures().HasAVX();
8640 }
8641
CpuHasAvx2FeatureFlag()8642 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
8643 return codegen_->GetInstructionSetFeatures().HasAVX2();
8644 }
8645
CpuHasAvxFeatureFlag()8646 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
8647 return codegen_->GetInstructionSetFeatures().HasAVX();
8648 }
8649
CpuHasAvx2FeatureFlag()8650 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
8651 return codegen_->GetInstructionSetFeatures().HasAVX2();
8652 }
8653
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8654 void LocationsBuilderX86_64::VisitBitwiseNegatedRight(
8655 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8656 LOG(FATAL) << "Unimplemented";
8657 }
8658
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8659 void InstructionCodeGeneratorX86_64::VisitBitwiseNegatedRight(
8660 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8661 LOG(FATAL) << "Unimplemented";
8662 }
8663
8664 #undef __
8665
8666 } // namespace x86_64
8667 } // namespace art
8668