xref: /aosp_15_r20/art/compiler/optimizing/code_generator_x86_64.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_root-inl.h"
22 #include "class_table.h"
23 #include "code_generator_utils.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_list.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86_64.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/method_type.h"
39 #include "mirror/object_reference.h"
40 #include "mirror/var_handle.h"
41 #include "optimizing/nodes.h"
42 #include "profiling_info_builder.h"
43 #include "scoped_thread_state_change-inl.h"
44 #include "thread.h"
45 #include "trace.h"
46 #include "utils/assembler.h"
47 #include "utils/stack_checks.h"
48 #include "utils/x86_64/assembler_x86_64.h"
49 #include "utils/x86_64/constants_x86_64.h"
50 #include "utils/x86_64/managed_register_x86_64.h"
51 
52 namespace art HIDDEN {
53 
54 template<class MirrorType>
55 class GcRoot;
56 
57 namespace x86_64 {
58 
59 static constexpr int kCurrentMethodStackOffset = 0;
60 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
61 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
62 // generates less code/data with a small num_entries.
63 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
64 
65 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
66 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
67 
68 static constexpr int kC2ConditionMask = 0x400;
69 
OneRegInReferenceOutSaveEverythingCallerSaves()70 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
71   // Custom calling convention: RAX serves as both input and output.
72   RegisterSet caller_saves = RegisterSet::Empty();
73   caller_saves.Add(Location::RegisterLocation(RAX));
74   return caller_saves;
75 }
76 
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
79 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
80 
81 class NullCheckSlowPathX86_64 : public SlowPathCode {
82  public:
NullCheckSlowPathX86_64(HNullCheck * instruction)83   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
84 
EmitNativeCode(CodeGenerator * codegen)85   void EmitNativeCode(CodeGenerator* codegen) override {
86     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
87     __ Bind(GetEntryLabel());
88     if (instruction_->CanThrowIntoCatchBlock()) {
89       // Live registers will be restored in the catch block if caught.
90       SaveLiveRegisters(codegen, instruction_->GetLocations());
91     }
92     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
93                                   instruction_,
94                                   instruction_->GetDexPc(),
95                                   this);
96     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
97   }
98 
IsFatal() const99   bool IsFatal() const override { return true; }
100 
GetDescription() const101   const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
102 
103  private:
104   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
105 };
106 
107 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
108  public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)109   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
110 
EmitNativeCode(CodeGenerator * codegen)111   void EmitNativeCode(CodeGenerator* codegen) override {
112     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
113     __ Bind(GetEntryLabel());
114     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
115     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
116   }
117 
IsFatal() const118   bool IsFatal() const override { return true; }
119 
GetDescription() const120   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
121 
122  private:
123   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
124 };
125 
126 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
127  public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)128   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
129       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
130 
EmitNativeCode(CodeGenerator * codegen)131   void EmitNativeCode(CodeGenerator* codegen) override {
132     __ Bind(GetEntryLabel());
133     if (type_ == DataType::Type::kInt32) {
134       if (is_div_) {
135         __ negl(cpu_reg_);
136       } else {
137         __ xorl(cpu_reg_, cpu_reg_);
138       }
139 
140     } else {
141       DCHECK_EQ(DataType::Type::kInt64, type_);
142       if (is_div_) {
143         __ negq(cpu_reg_);
144       } else {
145         __ xorl(cpu_reg_, cpu_reg_);
146       }
147     }
148     __ jmp(GetExitLabel());
149   }
150 
GetDescription() const151   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
152 
153  private:
154   const CpuRegister cpu_reg_;
155   const DataType::Type type_;
156   const bool is_div_;
157   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
158 };
159 
160 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
161  public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)162   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
163       : SlowPathCode(instruction), successor_(successor) {}
164 
EmitNativeCode(CodeGenerator * codegen)165   void EmitNativeCode(CodeGenerator* codegen) override {
166     LocationSummary* locations = instruction_->GetLocations();
167     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
168     __ Bind(GetEntryLabel());
169     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
170     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
171     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
172     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
173     if (successor_ == nullptr) {
174       __ jmp(GetReturnLabel());
175     } else {
176       __ jmp(x86_64_codegen->GetLabelOf(successor_));
177     }
178   }
179 
GetReturnLabel()180   Label* GetReturnLabel() {
181     DCHECK(successor_ == nullptr);
182     return &return_label_;
183   }
184 
GetSuccessor() const185   HBasicBlock* GetSuccessor() const {
186     return successor_;
187   }
188 
GetDescription() const189   const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
190 
191  private:
192   HBasicBlock* const successor_;
193   Label return_label_;
194 
195   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
196 };
197 
198 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
199  public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)200   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
201     : SlowPathCode(instruction) {}
202 
EmitNativeCode(CodeGenerator * codegen)203   void EmitNativeCode(CodeGenerator* codegen) override {
204     LocationSummary* locations = instruction_->GetLocations();
205     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
206     __ Bind(GetEntryLabel());
207     if (instruction_->CanThrowIntoCatchBlock()) {
208       // Live registers will be restored in the catch block if caught.
209       SaveLiveRegisters(codegen, locations);
210     }
211 
212     Location index_loc = locations->InAt(0);
213     Location length_loc = locations->InAt(1);
214     InvokeRuntimeCallingConvention calling_convention;
215     Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
216     Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
217 
218     // Are we using an array length from memory?
219     if (!length_loc.IsValid()) {
220       DCHECK(instruction_->InputAt(1)->IsArrayLength());
221       HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
222       DCHECK(array_length->IsEmittedAtUseSite());
223       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
224       Location array_loc = array_length->GetLocations()->InAt(0);
225       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
226       if (!index_loc.Equals(length_arg)) {
227         // The index is not clobbered by loading the length directly to `length_arg`.
228         __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
229         x86_64_codegen->Move(index_arg, index_loc);
230       } else if (!array_loc.Equals(index_arg)) {
231         // The array reference is not clobbered by the index move.
232         x86_64_codegen->Move(index_arg, index_loc);
233         __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
234       } else {
235         // Load the array length into `TMP`.
236         DCHECK(codegen->IsBlockedCoreRegister(TMP));
237         __ movl(CpuRegister(TMP), array_len);
238         // Single move to CPU register does not clobber `TMP`.
239         x86_64_codegen->Move(index_arg, index_loc);
240         __ movl(length_arg.AsRegister<CpuRegister>(), CpuRegister(TMP));
241       }
242       if (mirror::kUseStringCompression && array_length->IsStringLength()) {
243         __ shrl(length_arg.AsRegister<CpuRegister>(), Immediate(1));
244       }
245     } else {
246       // We're moving two locations to locations that could overlap,
247       // so we need a parallel move resolver.
248       codegen->EmitParallelMoves(
249           index_loc,
250           index_arg,
251           DataType::Type::kInt32,
252           length_loc,
253           length_arg,
254           DataType::Type::kInt32);
255     }
256 
257     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
258         ? kQuickThrowStringBounds
259         : kQuickThrowArrayBounds;
260     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
261     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
262     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
263   }
264 
IsFatal() const265   bool IsFatal() const override { return true; }
266 
GetDescription() const267   const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
268 
269  private:
270   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
271 };
272 
273 class LoadMethodTypeSlowPathX86_64: public SlowPathCode {
274  public:
LoadMethodTypeSlowPathX86_64(HLoadMethodType * mt)275   explicit LoadMethodTypeSlowPathX86_64(HLoadMethodType* mt) : SlowPathCode(mt) {}
276 
EmitNativeCode(CodeGenerator * codegen)277   void EmitNativeCode(CodeGenerator* codegen) override {
278     LocationSummary* locations = instruction_->GetLocations();
279     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
280 
281     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
282     __ Bind(GetEntryLabel());
283     SaveLiveRegisters(codegen, locations);
284 
285     const dex::ProtoIndex proto_index = instruction_->AsLoadMethodType()->GetProtoIndex();
286     // Custom calling convention: RAX serves as both input and output.
287     __ movl(CpuRegister(RAX), Immediate(proto_index.index_));
288     x86_64_codegen->InvokeRuntime(kQuickResolveMethodType,
289                                   instruction_,
290                                   instruction_->GetDexPc(),
291                                   this);
292     CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>();
293     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
294     RestoreLiveRegisters(codegen, locations);
295 
296     __ jmp(GetExitLabel());
297   }
298 
GetDescription() const299   const char* GetDescription() const override { return "LoadMethodTypeSlowPathX86_64"; }
300 
301  private:
302   DISALLOW_COPY_AND_ASSIGN(LoadMethodTypeSlowPathX86_64);
303 };
304 
305 class LoadClassSlowPathX86_64 : public SlowPathCode {
306  public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)307   LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
308       : SlowPathCode(at), cls_(cls) {
309     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
310     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
311   }
312 
EmitNativeCode(CodeGenerator * codegen)313   void EmitNativeCode(CodeGenerator* codegen) override {
314     LocationSummary* locations = instruction_->GetLocations();
315     Location out = locations->Out();
316     const uint32_t dex_pc = instruction_->GetDexPc();
317     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
318     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
319 
320     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
321     __ Bind(GetEntryLabel());
322     SaveLiveRegisters(codegen, locations);
323 
324     // Custom calling convention: RAX serves as both input and output.
325     if (must_resolve_type) {
326       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()) ||
327              x86_64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
328              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
329                              &cls_->GetDexFile()));
330       dex::TypeIndex type_index = cls_->GetTypeIndex();
331       __ movl(CpuRegister(RAX), Immediate(type_index.index_));
332       if (cls_->NeedsAccessCheck()) {
333         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
334         x86_64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
335       } else {
336         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
337         x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
338       }
339       // If we also must_do_clinit, the resolved type is now in the correct register.
340     } else {
341       DCHECK(must_do_clinit);
342       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
343       x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
344     }
345     if (must_do_clinit) {
346       x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
347       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
348     }
349 
350     // Move the class to the desired location.
351     if (out.IsValid()) {
352       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
353       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
354     }
355 
356     RestoreLiveRegisters(codegen, locations);
357     __ jmp(GetExitLabel());
358   }
359 
GetDescription() const360   const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
361 
362  private:
363   // The class this slow path will load.
364   HLoadClass* const cls_;
365 
366   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
367 };
368 
369 class LoadStringSlowPathX86_64 : public SlowPathCode {
370  public:
LoadStringSlowPathX86_64(HLoadString * instruction)371   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
372 
EmitNativeCode(CodeGenerator * codegen)373   void EmitNativeCode(CodeGenerator* codegen) override {
374     LocationSummary* locations = instruction_->GetLocations();
375     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
376 
377     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
378     __ Bind(GetEntryLabel());
379     SaveLiveRegisters(codegen, locations);
380 
381     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
382     // Custom calling convention: RAX serves as both input and output.
383     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
384     x86_64_codegen->InvokeRuntime(kQuickResolveString,
385                                   instruction_,
386                                   instruction_->GetDexPc(),
387                                   this);
388     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
389     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
390     RestoreLiveRegisters(codegen, locations);
391 
392     __ jmp(GetExitLabel());
393   }
394 
GetDescription() const395   const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
396 
397  private:
398   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
399 };
400 
401 class TypeCheckSlowPathX86_64 : public SlowPathCode {
402  public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)403   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
404       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
405 
EmitNativeCode(CodeGenerator * codegen)406   void EmitNativeCode(CodeGenerator* codegen) override {
407     LocationSummary* locations = instruction_->GetLocations();
408     uint32_t dex_pc = instruction_->GetDexPc();
409     DCHECK(instruction_->IsCheckCast()
410            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
411 
412     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
413     __ Bind(GetEntryLabel());
414 
415     if (kPoisonHeapReferences &&
416         instruction_->IsCheckCast() &&
417         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
418       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
419       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
420     }
421 
422     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
423       SaveLiveRegisters(codegen, locations);
424     }
425 
426     // We're moving two locations to locations that could overlap, so we need a parallel
427     // move resolver.
428     InvokeRuntimeCallingConvention calling_convention;
429     codegen->EmitParallelMoves(locations->InAt(0),
430                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
431                                DataType::Type::kReference,
432                                locations->InAt(1),
433                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
434                                DataType::Type::kReference);
435     if (instruction_->IsInstanceOf()) {
436       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
437       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
438     } else {
439       DCHECK(instruction_->IsCheckCast());
440       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
441       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
442     }
443 
444     if (!is_fatal_) {
445       if (instruction_->IsInstanceOf()) {
446         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
447       }
448 
449       RestoreLiveRegisters(codegen, locations);
450       __ jmp(GetExitLabel());
451     }
452   }
453 
GetDescription() const454   const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
455 
IsFatal() const456   bool IsFatal() const override { return is_fatal_; }
457 
458  private:
459   const bool is_fatal_;
460 
461   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
462 };
463 
464 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
465  public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)466   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
467       : SlowPathCode(instruction) {}
468 
EmitNativeCode(CodeGenerator * codegen)469   void EmitNativeCode(CodeGenerator* codegen) override {
470     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
471     __ Bind(GetEntryLabel());
472     LocationSummary* locations = instruction_->GetLocations();
473     SaveLiveRegisters(codegen, locations);
474     InvokeRuntimeCallingConvention calling_convention;
475     x86_64_codegen->Load32BitValue(
476         CpuRegister(calling_convention.GetRegisterAt(0)),
477         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
478     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
479     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
480   }
481 
GetDescription() const482   const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
483 
484  private:
485   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
486 };
487 
488 class ArraySetSlowPathX86_64 : public SlowPathCode {
489  public:
ArraySetSlowPathX86_64(HInstruction * instruction)490   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
491 
EmitNativeCode(CodeGenerator * codegen)492   void EmitNativeCode(CodeGenerator* codegen) override {
493     LocationSummary* locations = instruction_->GetLocations();
494     __ Bind(GetEntryLabel());
495     SaveLiveRegisters(codegen, locations);
496 
497     InvokeRuntimeCallingConvention calling_convention;
498     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
499     parallel_move.AddMove(
500         locations->InAt(0),
501         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
502         DataType::Type::kReference,
503         nullptr);
504     parallel_move.AddMove(
505         locations->InAt(1),
506         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
507         DataType::Type::kInt32,
508         nullptr);
509     parallel_move.AddMove(
510         locations->InAt(2),
511         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
512         DataType::Type::kReference,
513         nullptr);
514     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
515 
516     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
517     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
518     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
519     RestoreLiveRegisters(codegen, locations);
520     __ jmp(GetExitLabel());
521   }
522 
GetDescription() const523   const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
524 
525  private:
526   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
527 };
528 
529 // Slow path marking an object reference `ref` during a read
530 // barrier. The field `obj.field` in the object `obj` holding this
531 // reference does not get updated by this slow path after marking (see
532 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
533 //
534 // This means that after the execution of this slow path, `ref` will
535 // always be up-to-date, but `obj.field` may not; i.e., after the
536 // flip, `ref` will be a to-space reference, but `obj.field` will
537 // probably still be a from-space reference (unless it gets updated by
538 // another thread, or if another thread installed another object
539 // reference (different from `ref`) in `obj.field`).
540 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
541  public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)542   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
543                                 Location ref,
544                                 bool unpoison_ref_before_marking)
545       : SlowPathCode(instruction),
546         ref_(ref),
547         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
548   }
549 
GetDescription() const550   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
551 
EmitNativeCode(CodeGenerator * codegen)552   void EmitNativeCode(CodeGenerator* codegen) override {
553     DCHECK(codegen->EmitReadBarrier());
554     LocationSummary* locations = instruction_->GetLocations();
555     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
556     Register ref_reg = ref_cpu_reg.AsRegister();
557     DCHECK(locations->CanCall());
558     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
559     DCHECK(instruction_->IsInstanceFieldGet() ||
560            instruction_->IsStaticFieldGet() ||
561            instruction_->IsArrayGet() ||
562            instruction_->IsArraySet() ||
563            instruction_->IsLoadClass() ||
564            instruction_->IsLoadMethodType() ||
565            instruction_->IsLoadString() ||
566            instruction_->IsInstanceOf() ||
567            instruction_->IsCheckCast() ||
568            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
569         << "Unexpected instruction in read barrier marking slow path: "
570         << instruction_->DebugName();
571 
572     __ Bind(GetEntryLabel());
573     if (unpoison_ref_before_marking_) {
574       // Object* ref = ref_addr->AsMirrorPtr()
575       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
576     }
577     // No need to save live registers; it's taken care of by the
578     // entrypoint. Also, there is no need to update the stack mask,
579     // as this runtime call will not trigger a garbage collection.
580     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
581     DCHECK_NE(ref_reg, RSP);
582     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
583     // "Compact" slow path, saving two moves.
584     //
585     // Instead of using the standard runtime calling convention (input
586     // and output in R0):
587     //
588     //   RDI <- ref
589     //   RAX <- ReadBarrierMark(RDI)
590     //   ref <- RAX
591     //
592     // we just use rX (the register containing `ref`) as input and output
593     // of a dedicated entrypoint:
594     //
595     //   rX <- ReadBarrierMarkRegX(rX)
596     //
597     int32_t entry_point_offset =
598         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
599     // This runtime call does not require a stack map.
600     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
601     __ jmp(GetExitLabel());
602   }
603 
604  private:
605   // The location (register) of the marked object reference.
606   const Location ref_;
607   // Should the reference in `ref_` be unpoisoned prior to marking it?
608   const bool unpoison_ref_before_marking_;
609 
610   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
611 };
612 
613 // Slow path marking an object reference `ref` during a read barrier,
614 // and if needed, atomically updating the field `obj.field` in the
615 // object `obj` holding this reference after marking (contrary to
616 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
617 // `obj.field`).
618 //
619 // This means that after the execution of this slow path, both `ref`
620 // and `obj.field` will be up-to-date; i.e., after the flip, both will
621 // hold the same to-space reference (unless another thread installed
622 // another object reference (different from `ref`) in `obj.field`).
623 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
624  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)625   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
626                                               Location ref,
627                                               CpuRegister obj,
628                                               const Address& field_addr,
629                                               bool unpoison_ref_before_marking,
630                                               CpuRegister temp1,
631                                               CpuRegister temp2)
632       : SlowPathCode(instruction),
633         ref_(ref),
634         obj_(obj),
635         field_addr_(field_addr),
636         unpoison_ref_before_marking_(unpoison_ref_before_marking),
637         temp1_(temp1),
638         temp2_(temp2) {
639   }
640 
GetDescription() const641   const char* GetDescription() const override {
642     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
643   }
644 
EmitNativeCode(CodeGenerator * codegen)645   void EmitNativeCode(CodeGenerator* codegen) override {
646     DCHECK(codegen->EmitReadBarrier());
647     LocationSummary* locations = instruction_->GetLocations();
648     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
649     Register ref_reg = ref_cpu_reg.AsRegister();
650     DCHECK(locations->CanCall());
651     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
652     DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
653         << "Unexpected instruction in read barrier marking and field updating slow path: "
654         << instruction_->DebugName();
655     HInvoke* invoke = instruction_->AsInvoke();
656     DCHECK(IsUnsafeCASReference(invoke) ||
657            IsUnsafeGetAndSetReference(invoke) ||
658            IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
659 
660     __ Bind(GetEntryLabel());
661     if (unpoison_ref_before_marking_) {
662       // Object* ref = ref_addr->AsMirrorPtr()
663       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
664     }
665 
666     // Save the old (unpoisoned) reference.
667     __ movl(temp1_, ref_cpu_reg);
668 
669     // No need to save live registers; it's taken care of by the
670     // entrypoint. Also, there is no need to update the stack mask,
671     // as this runtime call will not trigger a garbage collection.
672     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
673     DCHECK_NE(ref_reg, RSP);
674     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
675     // "Compact" slow path, saving two moves.
676     //
677     // Instead of using the standard runtime calling convention (input
678     // and output in R0):
679     //
680     //   RDI <- ref
681     //   RAX <- ReadBarrierMark(RDI)
682     //   ref <- RAX
683     //
684     // we just use rX (the register containing `ref`) as input and output
685     // of a dedicated entrypoint:
686     //
687     //   rX <- ReadBarrierMarkRegX(rX)
688     //
689     int32_t entry_point_offset =
690         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
691     // This runtime call does not require a stack map.
692     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
693 
694     // If the new reference is different from the old reference,
695     // update the field in the holder (`*field_addr`).
696     //
697     // Note that this field could also hold a different object, if
698     // another thread had concurrently changed it. In that case, the
699     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
700     // operation below would abort the CAS, leaving the field as-is.
701     NearLabel done;
702     __ cmpl(temp1_, ref_cpu_reg);
703     __ j(kEqual, &done);
704 
705     // Update the holder's field atomically.  This may fail if
706     // mutator updates before us, but it's OK.  This is achived
707     // using a strong compare-and-set (CAS) operation with relaxed
708     // memory synchronization ordering, where the expected value is
709     // the old reference and the desired value is the new reference.
710     // This operation is implemented with a 32-bit LOCK CMPXLCHG
711     // instruction, which requires the expected value (the old
712     // reference) to be in EAX.  Save RAX beforehand, and move the
713     // expected value (stored in `temp1_`) into EAX.
714     __ movq(temp2_, CpuRegister(RAX));
715     __ movl(CpuRegister(RAX), temp1_);
716 
717     // Convenience aliases.
718     CpuRegister base = obj_;
719     CpuRegister expected = CpuRegister(RAX);
720     CpuRegister value = ref_cpu_reg;
721 
722     bool base_equals_value = (base.AsRegister() == value.AsRegister());
723     Register value_reg = ref_reg;
724     if (kPoisonHeapReferences) {
725       if (base_equals_value) {
726         // If `base` and `value` are the same register location, move
727         // `value_reg` to a temporary register.  This way, poisoning
728         // `value_reg` won't invalidate `base`.
729         value_reg = temp1_.AsRegister();
730         __ movl(CpuRegister(value_reg), base);
731       }
732 
733       // Check that the register allocator did not assign the location
734       // of `expected` (RAX) to `value` nor to `base`, so that heap
735       // poisoning (when enabled) works as intended below.
736       // - If `value` were equal to `expected`, both references would
737       //   be poisoned twice, meaning they would not be poisoned at
738       //   all, as heap poisoning uses address negation.
739       // - If `base` were equal to `expected`, poisoning `expected`
740       //   would invalidate `base`.
741       DCHECK_NE(value_reg, expected.AsRegister());
742       DCHECK_NE(base.AsRegister(), expected.AsRegister());
743 
744       __ PoisonHeapReference(expected);
745       __ PoisonHeapReference(CpuRegister(value_reg));
746     }
747 
748     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
749 
750     // If heap poisoning is enabled, we need to unpoison the values
751     // that were poisoned earlier.
752     if (kPoisonHeapReferences) {
753       if (base_equals_value) {
754         // `value_reg` has been moved to a temporary register, no need
755         // to unpoison it.
756       } else {
757         __ UnpoisonHeapReference(CpuRegister(value_reg));
758       }
759       // No need to unpoison `expected` (RAX), as it is be overwritten below.
760     }
761 
762     // Restore RAX.
763     __ movq(CpuRegister(RAX), temp2_);
764 
765     __ Bind(&done);
766     __ jmp(GetExitLabel());
767   }
768 
769  private:
770   // The location (register) of the marked object reference.
771   const Location ref_;
772   // The register containing the object holding the marked object reference field.
773   const CpuRegister obj_;
774   // The address of the marked reference field.  The base of this address must be `obj_`.
775   const Address field_addr_;
776 
777   // Should the reference in `ref_` be unpoisoned prior to marking it?
778   const bool unpoison_ref_before_marking_;
779 
780   const CpuRegister temp1_;
781   const CpuRegister temp2_;
782 
783   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
784 };
785 
786 // Slow path generating a read barrier for a heap reference.
787 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
788  public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)789   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
790                                             Location out,
791                                             Location ref,
792                                             Location obj,
793                                             uint32_t offset,
794                                             Location index)
795       : SlowPathCode(instruction),
796         out_(out),
797         ref_(ref),
798         obj_(obj),
799         offset_(offset),
800         index_(index) {
801     // If `obj` is equal to `out` or `ref`, it means the initial
802     // object has been overwritten by (or after) the heap object
803     // reference load to be instrumented, e.g.:
804     //
805     //   __ movl(out, Address(out, offset));
806     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
807     //
808     // In that case, we have lost the information about the original
809     // object, and the emitted read barrier cannot work properly.
810     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
811     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
812 }
813 
EmitNativeCode(CodeGenerator * codegen)814   void EmitNativeCode(CodeGenerator* codegen) override {
815     DCHECK(codegen->EmitReadBarrier());
816     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
817     LocationSummary* locations = instruction_->GetLocations();
818     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
819     DCHECK(locations->CanCall());
820     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
821     DCHECK(instruction_->IsInstanceFieldGet() ||
822            instruction_->IsStaticFieldGet() ||
823            instruction_->IsArrayGet() ||
824            instruction_->IsInstanceOf() ||
825            instruction_->IsCheckCast() ||
826            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
827         << "Unexpected instruction in read barrier for heap reference slow path: "
828         << instruction_->DebugName();
829 
830     __ Bind(GetEntryLabel());
831     SaveLiveRegisters(codegen, locations);
832 
833     // We may have to change the index's value, but as `index_` is a
834     // constant member (like other "inputs" of this slow path),
835     // introduce a copy of it, `index`.
836     Location index = index_;
837     if (index_.IsValid()) {
838       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
839       if (instruction_->IsArrayGet()) {
840         // Compute real offset and store it in index_.
841         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
842         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
843         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
844           // We are about to change the value of `index_reg` (see the
845           // calls to art::x86_64::X86_64Assembler::shll and
846           // art::x86_64::X86_64Assembler::AddImmediate below), but it
847           // has not been saved by the previous call to
848           // art::SlowPathCode::SaveLiveRegisters, as it is a
849           // callee-save register --
850           // art::SlowPathCode::SaveLiveRegisters does not consider
851           // callee-save registers, as it has been designed with the
852           // assumption that callee-save registers are supposed to be
853           // handled by the called function.  So, as a callee-save
854           // register, `index_reg` _would_ eventually be saved onto
855           // the stack, but it would be too late: we would have
856           // changed its value earlier.  Therefore, we manually save
857           // it here into another freely available register,
858           // `free_reg`, chosen of course among the caller-save
859           // registers (as a callee-save `free_reg` register would
860           // exhibit the same problem).
861           //
862           // Note we could have requested a temporary register from
863           // the register allocator instead; but we prefer not to, as
864           // this is a slow path, and we know we can find a
865           // caller-save register that is available.
866           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
867           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
868           index_reg = free_reg;
869           index = Location::RegisterLocation(index_reg);
870         } else {
871           // The initial register stored in `index_` has already been
872           // saved in the call to art::SlowPathCode::SaveLiveRegisters
873           // (as it is not a callee-save register), so we can freely
874           // use it.
875         }
876         // Shifting the index value contained in `index_reg` by the
877         // scale factor (2) cannot overflow in practice, as the
878         // runtime is unable to allocate object arrays with a size
879         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
880         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
881         static_assert(
882             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
883             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
884         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
885       } else {
886         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
887         // intrinsics, `index_` is not shifted by a scale factor of 2
888         // (as in the case of ArrayGet), as it is actually an offset
889         // to an object field within an object.
890         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
891         DCHECK(instruction_->GetLocations()->Intrinsified());
892         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
893                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
894                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
895                (instruction_->AsInvoke()->GetIntrinsic() ==
896                     Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
897                (instruction_->AsInvoke()->GetIntrinsic() ==
898                     Intrinsics::kJdkUnsafeGetReferenceAcquire))
899             << instruction_->AsInvoke()->GetIntrinsic();
900         DCHECK_EQ(offset_, 0U);
901         DCHECK(index_.IsRegister());
902       }
903     }
904 
905     // We're moving two or three locations to locations that could
906     // overlap, so we need a parallel move resolver.
907     InvokeRuntimeCallingConvention calling_convention;
908     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
909     parallel_move.AddMove(ref_,
910                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
911                           DataType::Type::kReference,
912                           nullptr);
913     parallel_move.AddMove(obj_,
914                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
915                           DataType::Type::kReference,
916                           nullptr);
917     if (index.IsValid()) {
918       parallel_move.AddMove(index,
919                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
920                             DataType::Type::kInt32,
921                             nullptr);
922       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
923     } else {
924       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
925       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
926     }
927     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
928                                   instruction_,
929                                   instruction_->GetDexPc(),
930                                   this);
931     CheckEntrypointTypes<
932         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
933     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
934 
935     RestoreLiveRegisters(codegen, locations);
936     __ jmp(GetExitLabel());
937   }
938 
GetDescription() const939   const char* GetDescription() const override {
940     return "ReadBarrierForHeapReferenceSlowPathX86_64";
941   }
942 
943  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)944   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
945     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
946     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
947     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
948       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
949         return static_cast<CpuRegister>(i);
950       }
951     }
952     // We shall never fail to find a free caller-save register, as
953     // there are more than two core caller-save registers on x86-64
954     // (meaning it is possible to find one which is different from
955     // `ref` and `obj`).
956     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
957     LOG(FATAL) << "Could not find a free caller-save register";
958     UNREACHABLE();
959   }
960 
961   const Location out_;
962   const Location ref_;
963   const Location obj_;
964   const uint32_t offset_;
965   // An additional location containing an index to an array.
966   // Only used for HArrayGet and the UnsafeGetObject &
967   // UnsafeGetObjectVolatile intrinsics.
968   const Location index_;
969 
970   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
971 };
972 
973 // Slow path generating a read barrier for a GC root.
974 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
975  public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)976   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
977       : SlowPathCode(instruction), out_(out), root_(root) {
978   }
979 
EmitNativeCode(CodeGenerator * codegen)980   void EmitNativeCode(CodeGenerator* codegen) override {
981     DCHECK(codegen->EmitReadBarrier());
982     LocationSummary* locations = instruction_->GetLocations();
983     DCHECK(locations->CanCall());
984     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
985     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
986         << "Unexpected instruction in read barrier for GC root slow path: "
987         << instruction_->DebugName();
988 
989     __ Bind(GetEntryLabel());
990     SaveLiveRegisters(codegen, locations);
991 
992     InvokeRuntimeCallingConvention calling_convention;
993     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
994     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
995     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
996                                   instruction_,
997                                   instruction_->GetDexPc(),
998                                   this);
999     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
1000     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
1001 
1002     RestoreLiveRegisters(codegen, locations);
1003     __ jmp(GetExitLabel());
1004   }
1005 
GetDescription() const1006   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
1007 
1008  private:
1009   const Location out_;
1010   const Location root_;
1011 
1012   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
1013 };
1014 
1015 class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
1016  public:
MethodEntryExitHooksSlowPathX86_64(HInstruction * instruction)1017   explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction)
1018       : SlowPathCode(instruction) {}
1019 
EmitNativeCode(CodeGenerator * codegen)1020   void EmitNativeCode(CodeGenerator* codegen) override {
1021     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1022     LocationSummary* locations = instruction_->GetLocations();
1023     QuickEntrypointEnum entry_point =
1024         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
1025     __ Bind(GetEntryLabel());
1026     SaveLiveRegisters(codegen, locations);
1027     if (instruction_->IsMethodExitHook()) {
1028       // Load FrameSize to pass to the exit hook.
1029       __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize()));
1030     }
1031     x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
1032     RestoreLiveRegisters(codegen, locations);
1033     __ jmp(GetExitLabel());
1034   }
1035 
GetDescription() const1036   const char* GetDescription() const override {
1037     return "MethodEntryExitHooksSlowPath";
1038   }
1039 
1040  private:
1041   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64);
1042 };
1043 
1044 class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
1045  public:
CompileOptimizedSlowPathX86_64(HSuspendCheck * suspend_check,uint64_t counter_address)1046   CompileOptimizedSlowPathX86_64(HSuspendCheck* suspend_check, uint64_t counter_address)
1047       : SlowPathCode(suspend_check),
1048         counter_address_(counter_address) {}
1049 
EmitNativeCode(CodeGenerator * codegen)1050   void EmitNativeCode(CodeGenerator* codegen) override {
1051     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1052     __ Bind(GetEntryLabel());
1053     __ movq(CpuRegister(TMP), Immediate(counter_address_));
1054     __ movw(Address(CpuRegister(TMP), 0), Immediate(ProfilingInfo::GetOptimizeThreshold()));
1055     if (instruction_ != nullptr) {
1056       // Only saves full width XMM for SIMD.
1057       SaveLiveRegisters(codegen, instruction_->GetLocations());
1058     }
1059     x86_64_codegen->GenerateInvokeRuntime(
1060         GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1061     if (instruction_ != nullptr) {
1062       // Only restores full width XMM for SIMD.
1063       RestoreLiveRegisters(codegen, instruction_->GetLocations());
1064     }
1065     __ jmp(GetExitLabel());
1066   }
1067 
GetDescription() const1068   const char* GetDescription() const override {
1069     return "CompileOptimizedSlowPath";
1070   }
1071 
1072  private:
1073   uint64_t counter_address_;
1074 
1075   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64);
1076 };
1077 
1078 #undef __
1079 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1080 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
1081 
X86_64IntegerCondition(IfCondition cond)1082 inline Condition X86_64IntegerCondition(IfCondition cond) {
1083   switch (cond) {
1084     case kCondEQ: return kEqual;
1085     case kCondNE: return kNotEqual;
1086     case kCondLT: return kLess;
1087     case kCondLE: return kLessEqual;
1088     case kCondGT: return kGreater;
1089     case kCondGE: return kGreaterEqual;
1090     case kCondB:  return kBelow;
1091     case kCondBE: return kBelowEqual;
1092     case kCondA:  return kAbove;
1093     case kCondAE: return kAboveEqual;
1094   }
1095   LOG(FATAL) << "Unreachable";
1096   UNREACHABLE();
1097 }
1098 
1099 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)1100 inline Condition X86_64FPCondition(IfCondition cond) {
1101   switch (cond) {
1102     case kCondEQ: return kEqual;
1103     case kCondNE: return kNotEqual;
1104     case kCondLT: return kBelow;
1105     case kCondLE: return kBelowEqual;
1106     case kCondGT: return kAbove;
1107     case kCondGE: return kAboveEqual;
1108     default:      break;  // should not happen
1109   }
1110   LOG(FATAL) << "Unreachable";
1111   UNREACHABLE();
1112 }
1113 
BlockNonVolatileXmmRegisters(LocationSummary * locations)1114 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
1115   // We have to ensure that the native code we call directly (such as @CriticalNative
1116   // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
1117   // which are non-volatile for ART, but volatile for Native calls.  This will ensure
1118   // that they are saved in the prologue and properly restored.
1119   for (FloatRegister fp_reg : non_volatile_xmm_regs) {
1120     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
1121   }
1122 }
1123 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)1124 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
1125     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
1126     [[maybe_unused]] ArtMethod* method) {
1127   return desired_dispatch_info;
1128 }
1129 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)1130 void CodeGeneratorX86_64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
1131   switch (load_kind) {
1132     case MethodLoadKind::kBootImageLinkTimePcRelative:
1133       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1134       __ leal(temp.AsRegister<CpuRegister>(),
1135               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1136       RecordBootImageMethodPatch(invoke);
1137       break;
1138     case MethodLoadKind::kBootImageRelRo: {
1139       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1140       __ movl(temp.AsRegister<CpuRegister>(),
1141               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1142       RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1143       break;
1144     }
1145     case MethodLoadKind::kAppImageRelRo: {
1146       DCHECK(GetCompilerOptions().IsAppImage());
1147       __ movl(temp.AsRegister<CpuRegister>(),
1148               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1149       RecordAppImageMethodPatch(invoke);
1150       break;
1151     }
1152     case MethodLoadKind::kBssEntry: {
1153       __ movq(temp.AsRegister<CpuRegister>(),
1154               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1155       RecordMethodBssEntryPatch(invoke);
1156       // No need for memory fence, thanks to the x86-64 memory model.
1157       break;
1158     }
1159     case MethodLoadKind::kJitDirectAddress: {
1160       Load64BitValue(temp.AsRegister<CpuRegister>(),
1161                      reinterpret_cast<int64_t>(invoke->GetResolvedMethod()));
1162       break;
1163     }
1164     case MethodLoadKind::kRuntimeCall: {
1165       // Test situation, don't do anything.
1166       break;
1167     }
1168     default: {
1169       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
1170       UNREACHABLE();
1171     }
1172   }
1173 }
1174 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)1175 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
1176     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1177   // All registers are assumed to be correctly set up.
1178 
1179   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
1180   switch (invoke->GetMethodLoadKind()) {
1181     case MethodLoadKind::kStringInit: {
1182       // temp = thread->string_init_entrypoint
1183       uint32_t offset =
1184           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1185       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1186       break;
1187     }
1188     case MethodLoadKind::kRecursive: {
1189       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1190       break;
1191     }
1192     case MethodLoadKind::kRuntimeCall: {
1193       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1194       return;  // No code pointer retrieval; the runtime performs the call directly.
1195     }
1196     case MethodLoadKind::kBootImageLinkTimePcRelative:
1197       // For kCallCriticalNative we skip loading the method and do the call directly.
1198       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
1199         break;
1200       }
1201       FALLTHROUGH_INTENDED;
1202     default: {
1203       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
1204       break;
1205     }
1206   }
1207 
1208   switch (invoke->GetCodePtrLocation()) {
1209     case CodePtrLocation::kCallSelf:
1210       DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
1211       __ call(&frame_entry_label_);
1212       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1213       break;
1214     case CodePtrLocation::kCallCriticalNative: {
1215       size_t out_frame_size =
1216           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1217                                     kNativeStackAlignment,
1218                                     GetCriticalNativeDirectCallFrameSize>(invoke);
1219       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
1220         DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1221         __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1222         RecordBootImageJniEntrypointPatch(invoke);
1223       } else {
1224         // (callee_method + offset_of_jni_entry_point)()
1225         __ call(Address(callee_method.AsRegister<CpuRegister>(),
1226                          ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1227       }
1228       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1229       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1230       switch (invoke->GetType()) {
1231         case DataType::Type::kBool:
1232           __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1233           break;
1234         case DataType::Type::kInt8:
1235           __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1236           break;
1237         case DataType::Type::kUint16:
1238           __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1239           break;
1240         case DataType::Type::kInt16:
1241           __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1242           break;
1243         case DataType::Type::kInt32:
1244         case DataType::Type::kInt64:
1245         case DataType::Type::kFloat32:
1246         case DataType::Type::kFloat64:
1247         case DataType::Type::kVoid:
1248           break;
1249         default:
1250           DCHECK(false) << invoke->GetType();
1251           break;
1252       }
1253       if (out_frame_size != 0u) {
1254         DecreaseFrame(out_frame_size);
1255       }
1256       break;
1257     }
1258     case CodePtrLocation::kCallArtMethod:
1259       // (callee_method + offset_of_quick_compiled_code)()
1260       __ call(Address(callee_method.AsRegister<CpuRegister>(),
1261                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1262                           kX86_64PointerSize).SizeValue()));
1263       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1264       break;
1265   }
1266 
1267   DCHECK(!IsLeafMethod());
1268 }
1269 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1270 void CodeGeneratorX86_64::GenerateVirtualCall(
1271     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1272   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1273   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1274       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1275 
1276   // Use the calling convention instead of the location of the receiver, as
1277   // intrinsics may have put the receiver in a different register. In the intrinsics
1278   // slow path, the arguments have been moved to the right place, so here we are
1279   // guaranteed that the receiver is the first register of the calling convention.
1280   InvokeDexCallingConvention calling_convention;
1281   Register receiver = calling_convention.GetRegisterAt(0);
1282 
1283   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1284   // /* HeapReference<Class> */ temp = receiver->klass_
1285   __ movl(temp, Address(CpuRegister(receiver), class_offset));
1286   MaybeRecordImplicitNullCheck(invoke);
1287   // Instead of simply (possibly) unpoisoning `temp` here, we should
1288   // emit a read barrier for the previous class reference load.
1289   // However this is not required in practice, as this is an
1290   // intermediate/temporary reference and because the current
1291   // concurrent copying collector keeps the from-space memory
1292   // intact/accessible until the end of the marking phase (the
1293   // concurrent copying collector may not in the future).
1294   __ MaybeUnpoisonHeapReference(temp);
1295 
1296   MaybeGenerateInlineCacheCheck(invoke, temp);
1297 
1298   // temp = temp->GetMethodAt(method_offset);
1299   __ movq(temp, Address(temp, method_offset));
1300   // call temp->GetEntryPoint();
1301   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1302       kX86_64PointerSize).SizeValue()));
1303   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1304 }
1305 
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1306 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1307   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1308   __ Bind(&boot_image_other_patches_.back().label);
1309 }
1310 
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1311 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1312   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1313   __ Bind(&boot_image_other_patches_.back().label);
1314 }
1315 
RecordBootImageMethodPatch(HInvoke * invoke)1316 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvoke* invoke) {
1317   boot_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1318                                           invoke->GetResolvedMethodReference().index);
1319   __ Bind(&boot_image_method_patches_.back().label);
1320 }
1321 
RecordAppImageMethodPatch(HInvoke * invoke)1322 void CodeGeneratorX86_64::RecordAppImageMethodPatch(HInvoke* invoke) {
1323   app_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1324                                          invoke->GetResolvedMethodReference().index);
1325   __ Bind(&app_image_method_patches_.back().label);
1326 }
1327 
RecordMethodBssEntryPatch(HInvoke * invoke)1328 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvoke* invoke) {
1329   DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
1330          GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
1331          ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
1332                          invoke->GetMethodReference().dex_file));
1333   method_bss_entry_patches_.emplace_back(invoke->GetMethodReference().dex_file,
1334                                          invoke->GetMethodReference().index);
1335   __ Bind(&method_bss_entry_patches_.back().label);
1336 }
1337 
RecordBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1338 void CodeGeneratorX86_64::RecordBootImageTypePatch(const DexFile& dex_file,
1339                                                    dex::TypeIndex type_index) {
1340   boot_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1341   __ Bind(&boot_image_type_patches_.back().label);
1342 }
1343 
RecordAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1344 void CodeGeneratorX86_64::RecordAppImageTypePatch(const DexFile& dex_file,
1345                                                   dex::TypeIndex type_index) {
1346   app_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1347   __ Bind(&app_image_type_patches_.back().label);
1348 }
1349 
NewTypeBssEntryPatch(HLoadClass * load_class)1350 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1351   ArenaDeque<PatchInfo<Label>>* patches = nullptr;
1352   switch (load_class->GetLoadKind()) {
1353     case HLoadClass::LoadKind::kBssEntry:
1354       patches = &type_bss_entry_patches_;
1355       break;
1356     case HLoadClass::LoadKind::kBssEntryPublic:
1357       patches = &public_type_bss_entry_patches_;
1358       break;
1359     case HLoadClass::LoadKind::kBssEntryPackage:
1360       patches = &package_type_bss_entry_patches_;
1361       break;
1362     default:
1363       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
1364       UNREACHABLE();
1365   }
1366   patches->emplace_back(&load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1367   return &patches->back().label;
1368 }
1369 
RecordBootImageStringPatch(HLoadString * load_string)1370 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1371   boot_image_string_patches_.emplace_back(
1372       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1373   __ Bind(&boot_image_string_patches_.back().label);
1374 }
1375 
NewStringBssEntryPatch(HLoadString * load_string)1376 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1377   string_bss_entry_patches_.emplace_back(
1378       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1379   return &string_bss_entry_patches_.back().label;
1380 }
1381 
NewMethodTypeBssEntryPatch(HLoadMethodType * load_method_type)1382 Label* CodeGeneratorX86_64::NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type) {
1383   method_type_bss_entry_patches_.emplace_back(
1384       &load_method_type->GetDexFile(), load_method_type->GetProtoIndex().index_);
1385   return &method_type_bss_entry_patches_.back().label;
1386 }
1387 
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)1388 void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
1389   boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1390                                                   invoke->GetResolvedMethodReference().index);
1391   __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
1392 }
1393 
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1394 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1395   if (GetCompilerOptions().IsBootImage()) {
1396     __ leal(reg,
1397             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1398     RecordBootImageIntrinsicPatch(boot_image_reference);
1399   } else if (GetCompilerOptions().GetCompilePic()) {
1400     __ movl(reg,
1401             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1402     RecordBootImageRelRoPatch(boot_image_reference);
1403   } else {
1404     DCHECK(GetCompilerOptions().IsJitCompiler());
1405     gc::Heap* heap = Runtime::Current()->GetHeap();
1406     DCHECK(!heap->GetBootImageSpaces().empty());
1407     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1408     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1409   }
1410 }
1411 
LoadIntrinsicDeclaringClass(CpuRegister reg,HInvoke * invoke)1412 void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
1413   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
1414   if (GetCompilerOptions().IsBootImage()) {
1415     // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1416     __ leal(reg,
1417             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1418     MethodReference target_method = invoke->GetResolvedMethodReference();
1419     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1420     boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1421     __ Bind(&boot_image_type_patches_.back().label);
1422   } else {
1423     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
1424     LoadBootImageAddress(reg, boot_image_offset);
1425   }
1426 }
1427 
LoadClassRootForIntrinsic(CpuRegister reg,ClassRoot class_root)1428 void CodeGeneratorX86_64::LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root) {
1429   if (GetCompilerOptions().IsBootImage()) {
1430     ScopedObjectAccess soa(Thread::Current());
1431     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
1432     boot_image_type_patches_.emplace_back(&klass->GetDexFile(), klass->GetDexTypeIndex().index_);
1433     __ Bind(&boot_image_type_patches_.back().label);
1434   } else {
1435     uint32_t boot_image_offset = GetBootImageOffset(class_root);
1436     LoadBootImageAddress(reg, boot_image_offset);
1437   }
1438 }
1439 
1440 // The label points to the end of the "movl" or another instruction but the literal offset
1441 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1442 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1443 
1444 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1445 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1446     const ArenaDeque<PatchInfo<Label>>& infos,
1447     ArenaVector<linker::LinkerPatch>* linker_patches) {
1448   for (const PatchInfo<Label>& info : infos) {
1449     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1450     linker_patches->push_back(
1451         Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1452   }
1453 }
1454 
1455 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1456 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1457                                      const DexFile* target_dex_file,
1458                                      uint32_t pc_insn_offset,
1459                                      uint32_t boot_image_offset) {
1460   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
1461   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1462 }
1463 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1464 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1465   DCHECK(linker_patches->empty());
1466   size_t size =
1467       boot_image_method_patches_.size() +
1468       app_image_method_patches_.size() +
1469       method_bss_entry_patches_.size() +
1470       boot_image_type_patches_.size() +
1471       app_image_type_patches_.size() +
1472       type_bss_entry_patches_.size() +
1473       public_type_bss_entry_patches_.size() +
1474       package_type_bss_entry_patches_.size() +
1475       boot_image_string_patches_.size() +
1476       string_bss_entry_patches_.size() +
1477       method_type_bss_entry_patches_.size() +
1478       boot_image_jni_entrypoint_patches_.size() +
1479       boot_image_other_patches_.size();
1480   linker_patches->reserve(size);
1481   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1482     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1483         boot_image_method_patches_, linker_patches);
1484     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1485         boot_image_type_patches_, linker_patches);
1486     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1487         boot_image_string_patches_, linker_patches);
1488   } else {
1489     DCHECK(boot_image_method_patches_.empty());
1490     DCHECK(boot_image_type_patches_.empty());
1491     DCHECK(boot_image_string_patches_.empty());
1492   }
1493   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
1494   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
1495   if (GetCompilerOptions().IsBootImage()) {
1496     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1497         boot_image_other_patches_, linker_patches);
1498   } else {
1499     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
1500         boot_image_other_patches_, linker_patches);
1501     EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
1502         app_image_method_patches_, linker_patches);
1503     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
1504         app_image_type_patches_, linker_patches);
1505   }
1506   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1507       method_bss_entry_patches_, linker_patches);
1508   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1509       type_bss_entry_patches_, linker_patches);
1510   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
1511       public_type_bss_entry_patches_, linker_patches);
1512   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
1513       package_type_bss_entry_patches_, linker_patches);
1514   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1515       string_bss_entry_patches_, linker_patches);
1516   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodTypeBssEntryPatch>(
1517       method_type_bss_entry_patches_, linker_patches);
1518   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
1519       boot_image_jni_entrypoint_patches_, linker_patches);
1520   DCHECK_EQ(size, linker_patches->size());
1521 }
1522 
DumpCoreRegister(std::ostream & stream,int reg) const1523 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1524   stream << Register(reg);
1525 }
1526 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1527 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1528   stream << FloatRegister(reg);
1529 }
1530 
GetInstructionSetFeatures() const1531 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1532   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1533 }
1534 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1535 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1536   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1537   return kX86_64WordSize;
1538 }
1539 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1540 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1541   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1542   return kX86_64WordSize;
1543 }
1544 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1545 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1546   if (GetGraph()->HasSIMD()) {
1547     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1548   } else {
1549     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1550   }
1551   return GetSlowPathFPWidth();
1552 }
1553 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1554 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1555   if (GetGraph()->HasSIMD()) {
1556     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1557   } else {
1558     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1559   }
1560   return GetSlowPathFPWidth();
1561 }
1562 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1563 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1564                                         HInstruction* instruction,
1565                                         uint32_t dex_pc,
1566                                         SlowPathCode* slow_path) {
1567   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1568   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1569   if (EntrypointRequiresStackMap(entrypoint)) {
1570     RecordPcInfo(instruction, dex_pc, slow_path);
1571   }
1572 }
1573 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1574 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1575                                                               HInstruction* instruction,
1576                                                               SlowPathCode* slow_path) {
1577   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1578   GenerateInvokeRuntime(entry_point_offset);
1579 }
1580 
GenerateInvokeRuntime(int32_t entry_point_offset)1581 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1582   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1583 }
1584 
1585 namespace detail {
1586 
1587 // Mark which intrinsics we don't have handcrafted code for.
1588 template <Intrinsics T>
1589 struct IsUnimplemented {
1590   bool is_unimplemented = false;
1591 };
1592 
1593 #define TRUE_OVERRIDE(Name)                     \
1594   template <>                                   \
1595   struct IsUnimplemented<Intrinsics::k##Name> { \
1596     bool is_unimplemented = true;               \
1597   };
1598 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
1599 #undef TRUE_OVERRIDE
1600 
1601 static constexpr bool kIsIntrinsicUnimplemented[] = {
1602     false,  // kNone
1603 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1604     IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1605     ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1606 #undef IS_UNIMPLEMENTED
1607 };
1608 
1609 }  // namespace detail
1610 
1611 static constexpr int kNumberOfCpuRegisterPairs = 0;
1612 // Use a fake return address register to mimic Quick.
1613 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1614 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1615                                          const CompilerOptions& compiler_options,
1616                                          OptimizingCompilerStats* stats)
1617     : CodeGenerator(graph,
1618                     kNumberOfCpuRegisters,
1619                     kNumberOfFloatRegisters,
1620                     kNumberOfCpuRegisterPairs,
1621                     ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
1622                         | (1 << kFakeReturnRegister),
1623                     ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
1624                     compiler_options,
1625                     stats,
1626                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1627       block_labels_(nullptr),
1628       location_builder_(graph, this),
1629       instruction_visitor_(graph, this),
1630       move_resolver_(graph->GetAllocator(), this),
1631       assembler_(graph->GetAllocator(),
1632                  compiler_options.GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures()),
1633       constant_area_start_(0),
1634       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1635       app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1636       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1637       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1638       app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1639       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1640       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1641       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1642       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1643       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1644       method_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1645       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1646       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1647       jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1648       jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1649       jit_method_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1650       fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1651   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1652 }
1653 
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1654 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1655                                                                CodeGeneratorX86_64* codegen)
1656       : InstructionCodeGenerator(graph, codegen),
1657         assembler_(codegen->GetAssembler()),
1658         codegen_(codegen) {}
1659 
SetupBlockedRegisters() const1660 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1661   // Stack register is always reserved.
1662   blocked_core_registers_[RSP] = true;
1663 
1664   // Block the register used as TMP.
1665   blocked_core_registers_[TMP] = true;
1666 }
1667 
DWARFReg(Register reg)1668 static dwarf::Reg DWARFReg(Register reg) {
1669   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1670 }
1671 
DWARFReg(FloatRegister reg)1672 static dwarf::Reg DWARFReg(FloatRegister reg) {
1673   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1674 }
1675 
VisitMethodEntryHook(HMethodEntryHook * method_hook)1676 void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1677   LocationSummary* locations = new (GetGraph()->GetAllocator())
1678       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1679   // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1680   // two 32-bit values in EAX + EDX even on 64-bit architectures.
1681   locations->AddTemp(Location::RegisterLocation(RAX));
1682   locations->AddTemp(Location::RegisterLocation(RDX));
1683 }
1684 
GenerateMethodEntryExitHook(HInstruction * instruction)1685 void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1686   SlowPathCode* slow_path =
1687       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
1688   LocationSummary* locations = instruction->GetLocations();
1689   codegen_->AddSlowPath(slow_path);
1690 
1691   if (instruction->IsMethodExitHook()) {
1692     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1693     // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1694     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1695     // disabled in debuggable runtime. The other bit is used when this method itself requires a
1696     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1697     __ cmpl(Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()),
1698             Immediate(0));
1699     __ j(kNotEqual, slow_path->GetEntryLabel());
1700   }
1701 
1702   uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1703   MemberOffset  offset = instruction->IsMethodExitHook() ?
1704       instrumentation::Instrumentation::HaveMethodExitListenersOffset()
1705       : instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1706   __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
1707   __ cmpb(Address(CpuRegister(TMP), 0),
1708           Immediate(instrumentation::Instrumentation::kFastTraceListeners));
1709   // Check if there are any method entry / exit listeners. If no, continue with execution.
1710   __ j(kLess, slow_path->GetExitLabel());
1711   // Check if there are any slow method entry / exit listeners. If yes, take the slow path.
1712   __ j(kGreater, slow_path->GetEntryLabel());
1713 
1714   // Check if there is place in the buffer for a new entry, if no, take slow path.
1715   CpuRegister init_entry = locations->GetTemp(0).AsRegister<CpuRegister>();
1716   // Use a register that is different from RAX and RDX. RDTSC returns result in RAX and RDX and we
1717   // use curr entry to store the result into the buffer.
1718   CpuRegister curr_entry = CpuRegister(TMP);
1719   DCHECK(curr_entry.AsRegister() != RAX);
1720   DCHECK(curr_entry.AsRegister() != RDX);
1721   uint64_t trace_buffer_curr_entry_offset =
1722       Thread::TraceBufferCurrPtrOffset<kX86_64PointerSize>().SizeValue();
1723   __ gs()->movq(CpuRegister(curr_entry),
1724                 Address::Absolute(trace_buffer_curr_entry_offset, /* no_rip= */ true));
1725   __ subq(CpuRegister(curr_entry), Immediate(kNumEntriesForWallClock * sizeof(void*)));
1726   __ gs()->movq(init_entry,
1727                 Address::Absolute(Thread::TraceBufferPtrOffset<kX86_64PointerSize>().SizeValue(),
1728                                   /* no_rip= */ true));
1729   __ cmpq(curr_entry, init_entry);
1730   __ j(kLess, slow_path->GetEntryLabel());
1731 
1732   // Update the index in the `Thread`.
1733   __ gs()->movq(Address::Absolute(trace_buffer_curr_entry_offset, /* no_rip= */ true),
1734                 CpuRegister(curr_entry));
1735 
1736   // Record method pointer and action.
1737   CpuRegister method = init_entry;
1738   __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1739   // Use last two bits to encode trace method action. For MethodEntry it is 0
1740   // so no need to set the bits since they are 0 already.
1741   if (instruction->IsMethodExitHook()) {
1742     DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1743     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1744     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1745     __ orq(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1746   }
1747   __ movq(Address(curr_entry, kMethodOffsetInBytes), CpuRegister(method));
1748   // Get the timestamp. rdtsc returns timestamp in RAX + RDX even in 64-bit architectures.
1749   __ rdtsc();
1750   __ shlq(CpuRegister(RDX), Immediate(32));
1751   __ orq(CpuRegister(RAX), CpuRegister(RDX));
1752   __ movq(Address(curr_entry, kTimestampOffsetInBytes), CpuRegister(RAX));
1753   __ Bind(slow_path->GetExitLabel());
1754 }
1755 
VisitMethodEntryHook(HMethodEntryHook * instruction)1756 void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1757   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1758   DCHECK(codegen_->RequiresCurrentMethod());
1759   GenerateMethodEntryExitHook(instruction);
1760 }
1761 
SetInForReturnValue(HInstruction * instr,LocationSummary * locations)1762 void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) {
1763   switch (instr->InputAt(0)->GetType()) {
1764     case DataType::Type::kReference:
1765     case DataType::Type::kBool:
1766     case DataType::Type::kUint8:
1767     case DataType::Type::kInt8:
1768     case DataType::Type::kUint16:
1769     case DataType::Type::kInt16:
1770     case DataType::Type::kInt32:
1771     case DataType::Type::kInt64:
1772       locations->SetInAt(0, Location::RegisterLocation(RAX));
1773       break;
1774 
1775     case DataType::Type::kFloat32:
1776     case DataType::Type::kFloat64:
1777       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1778       break;
1779 
1780     case DataType::Type::kVoid:
1781       locations->SetInAt(0, Location::NoLocation());
1782       break;
1783 
1784     default:
1785       LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType();
1786   }
1787 }
1788 
VisitMethodExitHook(HMethodExitHook * method_hook)1789 void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1790   LocationSummary* locations = new (GetGraph()->GetAllocator())
1791       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1792   SetInForReturnValue(method_hook, locations);
1793   // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1794   // two 32-bit values in EAX + EDX even on 64-bit architectures.
1795   locations->AddTemp(Location::RegisterLocation(RAX));
1796   locations->AddTemp(Location::RegisterLocation(RDX));
1797 }
1798 
VisitMethodExitHook(HMethodExitHook * instruction)1799 void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
1800   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1801   DCHECK(codegen_->RequiresCurrentMethod());
1802   GenerateMethodEntryExitHook(instruction);
1803 }
1804 
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1805 void CodeGeneratorX86_64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1806   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1807     NearLabel overflow;
1808     Register method = kMethodRegisterArgument;
1809     if (!is_frame_entry) {
1810       CHECK(RequiresCurrentMethod());
1811       method = TMP;
1812       __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1813     }
1814     __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1815             Immediate(interpreter::kNterpHotnessValue));
1816     __ j(kEqual, &overflow);
1817     __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1818             Immediate(-1));
1819     __ Bind(&overflow);
1820   }
1821 
1822   if (GetGraph()->IsCompilingBaseline() &&
1823       GetGraph()->IsUsefulOptimizing() &&
1824       !Runtime::Current()->IsAotCompiler()) {
1825     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1826     DCHECK(info != nullptr);
1827     CHECK(!HasEmptyFrame());
1828     uint64_t address = reinterpret_cast64<uint64_t>(info) +
1829         ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1830     SlowPathCode* slow_path =
1831         new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64(suspend_check, address);
1832     AddSlowPath(slow_path);
1833     // Note: if the address was in the 32bit range, we could use
1834     // Address::Absolute and avoid this movq.
1835     __ movq(CpuRegister(TMP), Immediate(address));
1836     // With multiple threads, this can overflow. This is OK, we will eventually get to see
1837     // it reaching 0. Also, at this point we have no register available to look
1838     // at the counter directly.
1839     __ addw(Address(CpuRegister(TMP), 0), Immediate(-1));
1840     __ j(kEqual, slow_path->GetEntryLabel());
1841     __ Bind(slow_path->GetExitLabel());
1842   }
1843 }
1844 
GenerateFrameEntry()1845 void CodeGeneratorX86_64::GenerateFrameEntry() {
1846   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1847 
1848   // Check if we need to generate the clinit check. We will jump to the
1849   // resolution stub if the class is not initialized and the executing thread is
1850   // not the thread initializing it.
1851   // We do this before constructing the frame to get the correct stack trace if
1852   // an exception is thrown.
1853   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1854     NearLabel resolution;
1855     // Check if we're visibly initialized.
1856 
1857     // We don't emit a read barrier here to save on code size. We rely on the
1858     // resolution trampoline to do a suspend check before re-entering this code.
1859     __ movl(CpuRegister(TMP),
1860             Address(CpuRegister(kMethodRegisterArgument),
1861                     ArtMethod::DeclaringClassOffset().Int32Value()));
1862     __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1863             Immediate(kShiftedVisiblyInitializedValue));
1864     __ j(kAboveEqual, &frame_entry_label_);
1865 
1866     // Check if we're initializing and the thread initializing is the one
1867     // executing the code.
1868     __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1869             Immediate(kShiftedInitializingValue));
1870     __ j(kBelow, &resolution);
1871 
1872     __ movl(CpuRegister(TMP),
1873             Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value()));
1874     __ gs()->cmpl(
1875         CpuRegister(TMP),
1876         Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true));
1877     __ j(kEqual, &frame_entry_label_);
1878     __ Bind(&resolution);
1879 
1880     // Jump to the resolution stub.
1881     ThreadOffset64 entrypoint_offset =
1882         GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline);
1883     __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true));
1884   }
1885 
1886   __ Bind(&frame_entry_label_);
1887   bool skip_overflow_check = IsLeafMethod()
1888       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1889   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1890 
1891 
1892   if (!skip_overflow_check) {
1893     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1894     __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1895     RecordPcInfo(nullptr, 0);
1896   }
1897 
1898   if (!HasEmptyFrame()) {
1899     // Make sure the frame size isn't unreasonably large.
1900     DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1901 
1902     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1903       Register reg = kCoreCalleeSaves[i];
1904       if (allocated_registers_.ContainsCoreRegister(reg)) {
1905         __ pushq(CpuRegister(reg));
1906         __ cfi().AdjustCFAOffset(kX86_64WordSize);
1907         __ cfi().RelOffset(DWARFReg(reg), 0);
1908       }
1909     }
1910 
1911     int adjust = GetFrameSize() - GetCoreSpillSize();
1912     IncreaseFrame(adjust);
1913     uint32_t xmm_spill_location = GetFpuSpillStart();
1914     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1915 
1916     for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1917       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1918         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1919         __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1920         __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1921       }
1922     }
1923 
1924     // Save the current method if we need it. Note that we do not
1925     // do this in HCurrentMethod, as the instruction might have been removed
1926     // in the SSA graph.
1927     if (RequiresCurrentMethod()) {
1928       CHECK(!HasEmptyFrame());
1929       __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1930               CpuRegister(kMethodRegisterArgument));
1931     }
1932 
1933     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1934       CHECK(!HasEmptyFrame());
1935       // Initialize should_deoptimize flag to 0.
1936       __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1937     }
1938   }
1939 
1940   MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1941 }
1942 
GenerateFrameExit()1943 void CodeGeneratorX86_64::GenerateFrameExit() {
1944   __ cfi().RememberState();
1945   if (!HasEmptyFrame()) {
1946     uint32_t xmm_spill_location = GetFpuSpillStart();
1947     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1948     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1949       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1950         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1951         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1952         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1953       }
1954     }
1955 
1956     int adjust = GetFrameSize() - GetCoreSpillSize();
1957     DecreaseFrame(adjust);
1958 
1959     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1960       Register reg = kCoreCalleeSaves[i];
1961       if (allocated_registers_.ContainsCoreRegister(reg)) {
1962         __ popq(CpuRegister(reg));
1963         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1964         __ cfi().Restore(DWARFReg(reg));
1965       }
1966     }
1967   }
1968   __ ret();
1969   __ cfi().RestoreState();
1970   __ cfi().DefCFAOffset(GetFrameSize());
1971 }
1972 
Bind(HBasicBlock * block)1973 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1974   __ Bind(GetLabelOf(block));
1975 }
1976 
Move(Location destination,Location source)1977 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1978   if (source.Equals(destination)) {
1979     return;
1980   }
1981   if (destination.IsRegister()) {
1982     CpuRegister dest = destination.AsRegister<CpuRegister>();
1983     if (source.IsRegister()) {
1984       __ movq(dest, source.AsRegister<CpuRegister>());
1985     } else if (source.IsFpuRegister()) {
1986       __ movd(dest, source.AsFpuRegister<XmmRegister>());
1987     } else if (source.IsStackSlot()) {
1988       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1989     } else if (source.IsConstant()) {
1990       HConstant* constant = source.GetConstant();
1991       if (constant->IsLongConstant()) {
1992         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1993       } else if (constant->IsDoubleConstant()) {
1994         Load64BitValue(dest, GetInt64ValueOf(constant));
1995       } else {
1996         Load32BitValue(dest, GetInt32ValueOf(constant));
1997       }
1998     } else {
1999       DCHECK(source.IsDoubleStackSlot());
2000       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2001     }
2002   } else if (destination.IsFpuRegister()) {
2003     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
2004     if (source.IsRegister()) {
2005       __ movd(dest, source.AsRegister<CpuRegister>());
2006     } else if (source.IsFpuRegister()) {
2007       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
2008     } else if (source.IsConstant()) {
2009       HConstant* constant = source.GetConstant();
2010       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
2011       if (constant->IsFloatConstant()) {
2012         Load32BitValue(dest, static_cast<int32_t>(value));
2013       } else {
2014         Load64BitValue(dest, value);
2015       }
2016     } else if (source.IsStackSlot()) {
2017       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2018     } else {
2019       DCHECK(source.IsDoubleStackSlot());
2020       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2021     }
2022   } else if (destination.IsStackSlot()) {
2023     if (source.IsRegister()) {
2024       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
2025               source.AsRegister<CpuRegister>());
2026     } else if (source.IsFpuRegister()) {
2027       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
2028                source.AsFpuRegister<XmmRegister>());
2029     } else if (source.IsConstant()) {
2030       HConstant* constant = source.GetConstant();
2031       int32_t value = GetInt32ValueOf(constant);
2032       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
2033     } else {
2034       DCHECK(source.IsStackSlot()) << source;
2035       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2036       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2037     }
2038   } else {
2039     DCHECK(destination.IsDoubleStackSlot());
2040     if (source.IsRegister()) {
2041       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
2042               source.AsRegister<CpuRegister>());
2043     } else if (source.IsFpuRegister()) {
2044       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
2045                source.AsFpuRegister<XmmRegister>());
2046     } else if (source.IsConstant()) {
2047       HConstant* constant = source.GetConstant();
2048       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
2049       int64_t value = GetInt64ValueOf(constant);
2050       Store64BitValueToStack(destination, value);
2051     } else {
2052       DCHECK(source.IsDoubleStackSlot());
2053       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2054       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2055     }
2056   }
2057 }
2058 
LoadFromMemoryNoReference(DataType::Type type,Location dst,Address src)2059 void CodeGeneratorX86_64::LoadFromMemoryNoReference(DataType::Type type,
2060                                                     Location dst,
2061                                                     Address src) {
2062   switch (type) {
2063     case DataType::Type::kBool:
2064     case DataType::Type::kUint8:
2065       __ movzxb(dst.AsRegister<CpuRegister>(), src);
2066       break;
2067     case DataType::Type::kInt8:
2068       __ movsxb(dst.AsRegister<CpuRegister>(), src);
2069       break;
2070     case DataType::Type::kUint16:
2071       __ movzxw(dst.AsRegister<CpuRegister>(), src);
2072       break;
2073     case DataType::Type::kInt16:
2074       __ movsxw(dst.AsRegister<CpuRegister>(), src);
2075       break;
2076     case DataType::Type::kInt32:
2077     case DataType::Type::kUint32:
2078       __ movl(dst.AsRegister<CpuRegister>(), src);
2079       break;
2080     case DataType::Type::kInt64:
2081     case DataType::Type::kUint64:
2082       __ movq(dst.AsRegister<CpuRegister>(), src);
2083       break;
2084     case DataType::Type::kFloat32:
2085       __ movss(dst.AsFpuRegister<XmmRegister>(), src);
2086       break;
2087     case DataType::Type::kFloat64:
2088       __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
2089       break;
2090     case DataType::Type::kVoid:
2091     case DataType::Type::kReference:
2092       LOG(FATAL) << "Unreachable type " << type;
2093       UNREACHABLE();
2094   }
2095 }
2096 
MoveConstant(Location location,int32_t value)2097 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
2098   DCHECK(location.IsRegister());
2099   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
2100 }
2101 
MoveLocation(Location dst,Location src,DataType::Type dst_type)2102 void CodeGeneratorX86_64::MoveLocation(Location dst,
2103                                        Location src,
2104                                        [[maybe_unused]] DataType::Type dst_type) {
2105   Move(dst, src);
2106 }
2107 
AddLocationAsTemp(Location location,LocationSummary * locations)2108 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
2109   if (location.IsRegister()) {
2110     locations->AddTemp(location);
2111   } else {
2112     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2113   }
2114 }
2115 
HandleGoto(HInstruction * got,HBasicBlock * successor)2116 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2117   if (successor->IsExitBlock()) {
2118     DCHECK(got->GetPrevious()->AlwaysThrows());
2119     return;  // no code needed
2120   }
2121 
2122   HBasicBlock* block = got->GetBlock();
2123   HInstruction* previous = got->GetPrevious();
2124 
2125   HLoopInformation* info = block->GetLoopInformation();
2126   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2127     codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
2128     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2129     return;
2130   }
2131 
2132   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2133     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2134   }
2135   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
2136     __ jmp(codegen_->GetLabelOf(successor));
2137   }
2138 }
2139 
VisitGoto(HGoto * got)2140 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
2141   got->SetLocations(nullptr);
2142 }
2143 
VisitGoto(HGoto * got)2144 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
2145   HandleGoto(got, got->GetSuccessor());
2146 }
2147 
VisitTryBoundary(HTryBoundary * try_boundary)2148 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2149   try_boundary->SetLocations(nullptr);
2150 }
2151 
VisitTryBoundary(HTryBoundary * try_boundary)2152 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2153   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2154   if (!successor->IsExitBlock()) {
2155     HandleGoto(try_boundary, successor);
2156   }
2157 }
2158 
VisitExit(HExit * exit)2159 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
2160   exit->SetLocations(nullptr);
2161 }
2162 
VisitExit(HExit * exit)2163 void InstructionCodeGeneratorX86_64::VisitExit([[maybe_unused]] HExit* exit) {}
2164 
2165 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)2166 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
2167                                                      LabelType* true_label,
2168                                                      LabelType* false_label) {
2169   if (cond->IsFPConditionTrueIfNaN()) {
2170     __ j(kUnordered, true_label);
2171   } else if (cond->IsFPConditionFalseIfNaN()) {
2172     __ j(kUnordered, false_label);
2173   }
2174   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
2175 }
2176 
GenerateCompareTest(HCondition * condition)2177 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
2178   LocationSummary* locations = condition->GetLocations();
2179 
2180   Location left = locations->InAt(0);
2181   Location right = locations->InAt(1);
2182   DataType::Type type = condition->InputAt(0)->GetType();
2183   switch (type) {
2184     case DataType::Type::kBool:
2185     case DataType::Type::kUint8:
2186     case DataType::Type::kInt8:
2187     case DataType::Type::kUint16:
2188     case DataType::Type::kInt16:
2189     case DataType::Type::kInt32:
2190     case DataType::Type::kReference: {
2191       codegen_->GenerateIntCompare(left, right);
2192       break;
2193     }
2194     case DataType::Type::kInt64: {
2195       codegen_->GenerateLongCompare(left, right);
2196       break;
2197     }
2198     case DataType::Type::kFloat32: {
2199       if (right.IsFpuRegister()) {
2200         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2201       } else if (right.IsConstant()) {
2202         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2203                    codegen_->LiteralFloatAddress(
2204                        right.GetConstant()->AsFloatConstant()->GetValue()));
2205       } else {
2206         DCHECK(right.IsStackSlot());
2207         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2208                    Address(CpuRegister(RSP), right.GetStackIndex()));
2209       }
2210       break;
2211     }
2212     case DataType::Type::kFloat64: {
2213       if (right.IsFpuRegister()) {
2214         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2215       } else if (right.IsConstant()) {
2216         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2217                    codegen_->LiteralDoubleAddress(
2218                        right.GetConstant()->AsDoubleConstant()->GetValue()));
2219       } else {
2220         DCHECK(right.IsDoubleStackSlot());
2221         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2222                    Address(CpuRegister(RSP), right.GetStackIndex()));
2223       }
2224       break;
2225     }
2226     default:
2227       LOG(FATAL) << "Unexpected condition type " << type;
2228   }
2229 }
2230 
2231 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2232 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
2233                                                                   LabelType* true_target_in,
2234                                                                   LabelType* false_target_in) {
2235   // Generated branching requires both targets to be explicit. If either of the
2236   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2237   LabelType fallthrough_target;
2238   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2239   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2240 
2241   // Generate the comparison to set the CC.
2242   GenerateCompareTest(condition);
2243 
2244   // Now generate the correct jump(s).
2245   DataType::Type type = condition->InputAt(0)->GetType();
2246   switch (type) {
2247     case DataType::Type::kInt64: {
2248       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2249       break;
2250     }
2251     case DataType::Type::kFloat32: {
2252       GenerateFPJumps(condition, true_target, false_target);
2253       break;
2254     }
2255     case DataType::Type::kFloat64: {
2256       GenerateFPJumps(condition, true_target, false_target);
2257       break;
2258     }
2259     default:
2260       LOG(FATAL) << "Unexpected condition type " << type;
2261   }
2262 
2263   if (false_target != &fallthrough_target) {
2264     __ jmp(false_target);
2265   }
2266 
2267   if (fallthrough_target.IsLinked()) {
2268     __ Bind(&fallthrough_target);
2269   }
2270 }
2271 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch,const CompilerOptions & compiler_options)2272 static bool AreEflagsSetFrom(HInstruction* cond,
2273                              HInstruction* branch,
2274                              const CompilerOptions& compiler_options) {
2275   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2276   // are set only strictly before `branch`. We can't use the eflags on long
2277   // conditions if they are materialized due to the complex branching.
2278   return cond->IsCondition() &&
2279          cond->GetNext() == branch &&
2280          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
2281          !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
2282            compiler_options.ProfileBranches());
2283 }
2284 
2285 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2286 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
2287                                                            size_t condition_input_index,
2288                                                            LabelType* true_target,
2289                                                            LabelType* false_target) {
2290   HInstruction* cond = instruction->InputAt(condition_input_index);
2291 
2292   if (true_target == nullptr && false_target == nullptr) {
2293     // Nothing to do. The code always falls through.
2294     return;
2295   } else if (cond->IsIntConstant()) {
2296     // Constant condition, statically compared against "true" (integer value 1).
2297     if (cond->AsIntConstant()->IsTrue()) {
2298       if (true_target != nullptr) {
2299         __ jmp(true_target);
2300       }
2301     } else {
2302       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2303       if (false_target != nullptr) {
2304         __ jmp(false_target);
2305       }
2306     }
2307     return;
2308   }
2309 
2310   // The following code generates these patterns:
2311   //  (1) true_target == nullptr && false_target != nullptr
2312   //        - opposite condition true => branch to false_target
2313   //  (2) true_target != nullptr && false_target == nullptr
2314   //        - condition true => branch to true_target
2315   //  (3) true_target != nullptr && false_target != nullptr
2316   //        - condition true => branch to true_target
2317   //        - branch to false_target
2318   if (IsBooleanValueOrMaterializedCondition(cond)) {
2319     if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
2320       if (true_target == nullptr) {
2321         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
2322       } else {
2323         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
2324       }
2325     } else {
2326       // Materialized condition, compare against 0.
2327       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2328       if (lhs.IsRegister()) {
2329         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
2330       } else {
2331         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
2332       }
2333       if (true_target == nullptr) {
2334         __ j(kEqual, false_target);
2335       } else {
2336         __ j(kNotEqual, true_target);
2337       }
2338     }
2339   } else {
2340     // Condition has not been materialized, use its inputs as the
2341     // comparison and its condition as the branch condition.
2342     HCondition* condition = cond->AsCondition();
2343 
2344     // If this is a long or FP comparison that has been folded into
2345     // the HCondition, generate the comparison directly.
2346     DataType::Type type = condition->InputAt(0)->GetType();
2347     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2348       GenerateCompareTestAndBranch(condition, true_target, false_target);
2349       return;
2350     }
2351 
2352     Location lhs = condition->GetLocations()->InAt(0);
2353     Location rhs = condition->GetLocations()->InAt(1);
2354     codegen_->GenerateIntCompare(lhs, rhs);
2355       if (true_target == nullptr) {
2356       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
2357     } else {
2358       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2359     }
2360   }
2361 
2362   // If neither branch falls through (case 3), the conditional branch to `true_target`
2363   // was already emitted (case 2) and we need to emit a jump to `false_target`.
2364   if (true_target != nullptr && false_target != nullptr) {
2365     __ jmp(false_target);
2366   }
2367 }
2368 
VisitIf(HIf * if_instr)2369 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
2370   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2371   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2372     if (GetGraph()->IsCompilingBaseline() &&
2373         codegen_->GetCompilerOptions().ProfileBranches() &&
2374         !Runtime::Current()->IsAotCompiler()) {
2375       locations->SetInAt(0, Location::RequiresRegister());
2376       locations->AddTemp(Location::RequiresRegister());
2377     } else {
2378       locations->SetInAt(0, Location::Any());
2379     }
2380   }
2381 }
2382 
VisitIf(HIf * if_instr)2383 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
2384   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2385   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2386   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2387       nullptr : codegen_->GetLabelOf(true_successor);
2388   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2389       nullptr : codegen_->GetLabelOf(false_successor);
2390   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2391     if (GetGraph()->IsCompilingBaseline() &&
2392         codegen_->GetCompilerOptions().ProfileBranches() &&
2393         !Runtime::Current()->IsAotCompiler()) {
2394       DCHECK(if_instr->InputAt(0)->IsCondition());
2395       CpuRegister temp = if_instr->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
2396       ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2397       DCHECK(info != nullptr);
2398       BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
2399       // Currently, not all If branches are profiled.
2400       if (cache != nullptr) {
2401         uint64_t address =
2402             reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
2403         static_assert(
2404             BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
2405             "Unexpected offsets for BranchCache");
2406         NearLabel done;
2407         Location lhs = if_instr->GetLocations()->InAt(0);
2408         __ movq(CpuRegister(TMP), Immediate(address));
2409         __ movzxw(temp, Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0));
2410         __ addw(temp, Immediate(1));
2411         __ j(kZero, &done);
2412         __ movw(Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0), temp);
2413         __ Bind(&done);
2414       }
2415     }
2416   }
2417   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2418 }
2419 
VisitDeoptimize(HDeoptimize * deoptimize)2420 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2421   LocationSummary* locations = new (GetGraph()->GetAllocator())
2422       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2423   InvokeRuntimeCallingConvention calling_convention;
2424   RegisterSet caller_saves = RegisterSet::Empty();
2425   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2426   locations->SetCustomSlowPathCallerSaves(caller_saves);
2427   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2428     locations->SetInAt(0, Location::Any());
2429   }
2430 }
2431 
VisitDeoptimize(HDeoptimize * deoptimize)2432 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2433   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
2434   GenerateTestAndBranch<Label>(deoptimize,
2435                                /* condition_input_index= */ 0,
2436                                slow_path->GetEntryLabel(),
2437                                /* false_target= */ nullptr);
2438 }
2439 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2440 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2441   LocationSummary* locations = new (GetGraph()->GetAllocator())
2442       LocationSummary(flag, LocationSummary::kNoCall);
2443   locations->SetOut(Location::RequiresRegister());
2444 }
2445 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2446 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2447   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
2448           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2449 }
2450 
SelectCanUseCMOV(HSelect * select)2451 static bool SelectCanUseCMOV(HSelect* select) {
2452   // There are no conditional move instructions for XMMs.
2453   if (DataType::IsFloatingPointType(select->GetType())) {
2454     return false;
2455   }
2456 
2457   // A FP condition doesn't generate the single CC that we need.
2458   HInstruction* condition = select->GetCondition();
2459   if (condition->IsCondition() &&
2460       DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
2461     return false;
2462   }
2463 
2464   // We can generate a CMOV for this Select.
2465   return true;
2466 }
2467 
VisitSelect(HSelect * select)2468 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
2469   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2470   if (DataType::IsFloatingPointType(select->GetType())) {
2471     locations->SetInAt(0, Location::RequiresFpuRegister());
2472     locations->SetInAt(1, Location::Any());
2473   } else {
2474     locations->SetInAt(0, Location::RequiresRegister());
2475     if (SelectCanUseCMOV(select)) {
2476       if (select->InputAt(1)->IsConstant()) {
2477         locations->SetInAt(1, Location::RequiresRegister());
2478       } else {
2479         locations->SetInAt(1, Location::Any());
2480       }
2481     } else {
2482       locations->SetInAt(1, Location::Any());
2483     }
2484   }
2485   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2486     locations->SetInAt(2, Location::RequiresRegister());
2487   }
2488   locations->SetOut(Location::SameAsFirstInput());
2489 }
2490 
VisitSelect(HSelect * select)2491 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
2492   LocationSummary* locations = select->GetLocations();
2493   if (SelectCanUseCMOV(select)) {
2494     // If both the condition and the source types are integer, we can generate
2495     // a CMOV to implement Select.
2496     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
2497     Location value_true_loc = locations->InAt(1);
2498     DCHECK(locations->InAt(0).Equals(locations->Out()));
2499 
2500     HInstruction* select_condition = select->GetCondition();
2501     Condition cond = kNotEqual;
2502 
2503     // Figure out how to test the 'condition'.
2504     if (select_condition->IsCondition()) {
2505       HCondition* condition = select_condition->AsCondition();
2506       if (!condition->IsEmittedAtUseSite()) {
2507         // This was a previously materialized condition.
2508         // Can we use the existing condition code?
2509         if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
2510           // Materialization was the previous instruction.  Condition codes are right.
2511           cond = X86_64IntegerCondition(condition->GetCondition());
2512         } else {
2513           // No, we have to recreate the condition code.
2514           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2515           __ testl(cond_reg, cond_reg);
2516         }
2517       } else {
2518         GenerateCompareTest(condition);
2519         cond = X86_64IntegerCondition(condition->GetCondition());
2520       }
2521     } else {
2522       // Must be a Boolean condition, which needs to be compared to 0.
2523       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2524       __ testl(cond_reg, cond_reg);
2525     }
2526 
2527     // If the condition is true, overwrite the output, which already contains false.
2528     // Generate the correct sized CMOV.
2529     bool is_64_bit = DataType::Is64BitType(select->GetType());
2530     if (value_true_loc.IsRegister()) {
2531       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2532     } else {
2533       __ cmov(cond,
2534               value_false,
2535               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2536     }
2537   } else {
2538     NearLabel false_target;
2539     GenerateTestAndBranch<NearLabel>(select,
2540                                      /* condition_input_index= */ 2,
2541                                      /* true_target= */ nullptr,
2542                                      &false_target);
2543     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2544     __ Bind(&false_target);
2545   }
2546 }
2547 
VisitNop(HNop * nop)2548 void LocationsBuilderX86_64::VisitNop(HNop* nop) {
2549   new (GetGraph()->GetAllocator()) LocationSummary(nop);
2550 }
2551 
VisitNop(HNop *)2552 void InstructionCodeGeneratorX86_64::VisitNop(HNop*) {
2553   // The environment recording already happened in CodeGenerator::Compile.
2554 }
2555 
IncreaseFrame(size_t adjustment)2556 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2557   __ subq(CpuRegister(RSP), Immediate(adjustment));
2558   __ cfi().AdjustCFAOffset(adjustment);
2559 }
2560 
DecreaseFrame(size_t adjustment)2561 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2562   __ addq(CpuRegister(RSP), Immediate(adjustment));
2563   __ cfi().AdjustCFAOffset(-adjustment);
2564 }
2565 
GenerateNop()2566 void CodeGeneratorX86_64::GenerateNop() {
2567   __ nop();
2568 }
2569 
HandleCondition(HCondition * cond)2570 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2571   LocationSummary* locations =
2572       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2573   // Handle the long/FP comparisons made in instruction simplification.
2574   switch (cond->InputAt(0)->GetType()) {
2575     case DataType::Type::kInt64:
2576       locations->SetInAt(0, Location::RequiresRegister());
2577       locations->SetInAt(1, Location::Any());
2578       break;
2579     case DataType::Type::kFloat32:
2580     case DataType::Type::kFloat64:
2581       locations->SetInAt(0, Location::RequiresFpuRegister());
2582       locations->SetInAt(1, Location::Any());
2583       break;
2584     default:
2585       locations->SetInAt(0, Location::RequiresRegister());
2586       locations->SetInAt(1, Location::Any());
2587       break;
2588   }
2589   if (!cond->IsEmittedAtUseSite()) {
2590     locations->SetOut(Location::RequiresRegister());
2591   }
2592 }
2593 
HandleCondition(HCondition * cond)2594 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2595   if (cond->IsEmittedAtUseSite()) {
2596     return;
2597   }
2598 
2599   LocationSummary* locations = cond->GetLocations();
2600   Location lhs = locations->InAt(0);
2601   Location rhs = locations->InAt(1);
2602   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2603   NearLabel true_label, false_label;
2604 
2605   switch (cond->InputAt(0)->GetType()) {
2606     default:
2607       // Integer case.
2608 
2609       // Clear output register: setcc only sets the low byte.
2610       __ xorl(reg, reg);
2611 
2612       codegen_->GenerateIntCompare(lhs, rhs);
2613       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2614       return;
2615     case DataType::Type::kInt64:
2616       // Clear output register: setcc only sets the low byte.
2617       __ xorl(reg, reg);
2618 
2619       codegen_->GenerateLongCompare(lhs, rhs);
2620       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2621       return;
2622     case DataType::Type::kFloat32: {
2623       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2624       if (rhs.IsConstant()) {
2625         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2626         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2627       } else if (rhs.IsStackSlot()) {
2628         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2629       } else {
2630         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2631       }
2632       GenerateFPJumps(cond, &true_label, &false_label);
2633       break;
2634     }
2635     case DataType::Type::kFloat64: {
2636       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2637       if (rhs.IsConstant()) {
2638         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2639         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2640       } else if (rhs.IsDoubleStackSlot()) {
2641         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2642       } else {
2643         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2644       }
2645       GenerateFPJumps(cond, &true_label, &false_label);
2646       break;
2647     }
2648   }
2649 
2650   // Convert the jumps into the result.
2651   NearLabel done_label;
2652 
2653   // False case: result = 0.
2654   __ Bind(&false_label);
2655   __ xorl(reg, reg);
2656   __ jmp(&done_label);
2657 
2658   // True case: result = 1.
2659   __ Bind(&true_label);
2660   __ movl(reg, Immediate(1));
2661   __ Bind(&done_label);
2662 }
2663 
VisitEqual(HEqual * comp)2664 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2665   HandleCondition(comp);
2666 }
2667 
VisitEqual(HEqual * comp)2668 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2669   HandleCondition(comp);
2670 }
2671 
VisitNotEqual(HNotEqual * comp)2672 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2673   HandleCondition(comp);
2674 }
2675 
VisitNotEqual(HNotEqual * comp)2676 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2677   HandleCondition(comp);
2678 }
2679 
VisitLessThan(HLessThan * comp)2680 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2681   HandleCondition(comp);
2682 }
2683 
VisitLessThan(HLessThan * comp)2684 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2685   HandleCondition(comp);
2686 }
2687 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2688 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2689   HandleCondition(comp);
2690 }
2691 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2692 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2693   HandleCondition(comp);
2694 }
2695 
VisitGreaterThan(HGreaterThan * comp)2696 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2697   HandleCondition(comp);
2698 }
2699 
VisitGreaterThan(HGreaterThan * comp)2700 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2701   HandleCondition(comp);
2702 }
2703 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2704 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2705   HandleCondition(comp);
2706 }
2707 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2708 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2709   HandleCondition(comp);
2710 }
2711 
VisitBelow(HBelow * comp)2712 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2713   HandleCondition(comp);
2714 }
2715 
VisitBelow(HBelow * comp)2716 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2717   HandleCondition(comp);
2718 }
2719 
VisitBelowOrEqual(HBelowOrEqual * comp)2720 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2721   HandleCondition(comp);
2722 }
2723 
VisitBelowOrEqual(HBelowOrEqual * comp)2724 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2725   HandleCondition(comp);
2726 }
2727 
VisitAbove(HAbove * comp)2728 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2729   HandleCondition(comp);
2730 }
2731 
VisitAbove(HAbove * comp)2732 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2733   HandleCondition(comp);
2734 }
2735 
VisitAboveOrEqual(HAboveOrEqual * comp)2736 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2737   HandleCondition(comp);
2738 }
2739 
VisitAboveOrEqual(HAboveOrEqual * comp)2740 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2741   HandleCondition(comp);
2742 }
2743 
VisitCompare(HCompare * compare)2744 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2745   LocationSummary* locations =
2746       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2747   switch (compare->GetComparisonType()) {
2748     case DataType::Type::kBool:
2749     case DataType::Type::kUint8:
2750     case DataType::Type::kInt8:
2751     case DataType::Type::kUint16:
2752     case DataType::Type::kInt16:
2753     case DataType::Type::kInt32:
2754     case DataType::Type::kUint32:
2755     case DataType::Type::kInt64:
2756     case DataType::Type::kUint64: {
2757       locations->SetInAt(0, Location::RequiresRegister());
2758       locations->SetInAt(1, Location::Any());
2759       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2760       break;
2761     }
2762     case DataType::Type::kFloat32:
2763     case DataType::Type::kFloat64: {
2764       locations->SetInAt(0, Location::RequiresFpuRegister());
2765       locations->SetInAt(1, Location::Any());
2766       locations->SetOut(Location::RequiresRegister());
2767       break;
2768     }
2769     default:
2770       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2771   }
2772 }
2773 
VisitCompare(HCompare * compare)2774 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2775   LocationSummary* locations = compare->GetLocations();
2776   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2777   Location left = locations->InAt(0);
2778   Location right = locations->InAt(1);
2779 
2780   NearLabel less, greater, done;
2781   DataType::Type type = compare->GetComparisonType();
2782   Condition less_cond = kLess;
2783 
2784   switch (type) {
2785     case DataType::Type::kUint32:
2786       less_cond = kBelow;
2787       FALLTHROUGH_INTENDED;
2788     case DataType::Type::kBool:
2789     case DataType::Type::kUint8:
2790     case DataType::Type::kInt8:
2791     case DataType::Type::kUint16:
2792     case DataType::Type::kInt16:
2793     case DataType::Type::kInt32: {
2794       codegen_->GenerateIntCompare(left, right);
2795       break;
2796     }
2797     case DataType::Type::kUint64:
2798       less_cond = kBelow;
2799       FALLTHROUGH_INTENDED;
2800     case DataType::Type::kInt64: {
2801       codegen_->GenerateLongCompare(left, right);
2802       break;
2803     }
2804     case DataType::Type::kFloat32: {
2805       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2806       if (right.IsConstant()) {
2807         float value = right.GetConstant()->AsFloatConstant()->GetValue();
2808         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2809       } else if (right.IsStackSlot()) {
2810         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2811       } else {
2812         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2813       }
2814       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2815       less_cond = kBelow;  //  ucomis{s,d} sets CF
2816       break;
2817     }
2818     case DataType::Type::kFloat64: {
2819       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2820       if (right.IsConstant()) {
2821         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2822         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2823       } else if (right.IsDoubleStackSlot()) {
2824         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2825       } else {
2826         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2827       }
2828       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2829       less_cond = kBelow;  //  ucomis{s,d} sets CF
2830       break;
2831     }
2832     default:
2833       LOG(FATAL) << "Unexpected compare type " << type;
2834   }
2835 
2836   __ movl(out, Immediate(0));
2837   __ j(kEqual, &done);
2838   __ j(less_cond, &less);
2839 
2840   __ Bind(&greater);
2841   __ movl(out, Immediate(1));
2842   __ jmp(&done);
2843 
2844   __ Bind(&less);
2845   __ movl(out, Immediate(-1));
2846 
2847   __ Bind(&done);
2848 }
2849 
VisitIntConstant(HIntConstant * constant)2850 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2851   LocationSummary* locations =
2852       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2853   locations->SetOut(Location::ConstantLocation(constant));
2854 }
2855 
VisitIntConstant(HIntConstant * constant)2856 void InstructionCodeGeneratorX86_64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
2857   // Will be generated at use site.
2858 }
2859 
VisitNullConstant(HNullConstant * constant)2860 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2861   LocationSummary* locations =
2862       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2863   locations->SetOut(Location::ConstantLocation(constant));
2864 }
2865 
VisitNullConstant(HNullConstant * constant)2866 void InstructionCodeGeneratorX86_64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
2867   // Will be generated at use site.
2868 }
2869 
VisitLongConstant(HLongConstant * constant)2870 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2871   LocationSummary* locations =
2872       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2873   locations->SetOut(Location::ConstantLocation(constant));
2874 }
2875 
VisitLongConstant(HLongConstant * constant)2876 void InstructionCodeGeneratorX86_64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
2877   // Will be generated at use site.
2878 }
2879 
VisitFloatConstant(HFloatConstant * constant)2880 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2881   LocationSummary* locations =
2882       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2883   locations->SetOut(Location::ConstantLocation(constant));
2884 }
2885 
VisitFloatConstant(HFloatConstant * constant)2886 void InstructionCodeGeneratorX86_64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
2887   // Will be generated at use site.
2888 }
2889 
VisitDoubleConstant(HDoubleConstant * constant)2890 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2891   LocationSummary* locations =
2892       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2893   locations->SetOut(Location::ConstantLocation(constant));
2894 }
2895 
VisitDoubleConstant(HDoubleConstant * constant)2896 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2897     [[maybe_unused]] HDoubleConstant* constant) {
2898   // Will be generated at use site.
2899 }
2900 
VisitConstructorFence(HConstructorFence * constructor_fence)2901 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2902   constructor_fence->SetLocations(nullptr);
2903 }
2904 
VisitConstructorFence(HConstructorFence * constructor_fence)2905 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2906     [[maybe_unused]] HConstructorFence* constructor_fence) {
2907   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2908 }
2909 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2910 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2911   memory_barrier->SetLocations(nullptr);
2912 }
2913 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2914 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2915   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2916 }
2917 
VisitReturnVoid(HReturnVoid * ret)2918 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2919   ret->SetLocations(nullptr);
2920 }
2921 
VisitReturnVoid(HReturnVoid * ret)2922 void InstructionCodeGeneratorX86_64::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
2923   codegen_->GenerateFrameExit();
2924 }
2925 
VisitReturn(HReturn * ret)2926 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2927   LocationSummary* locations =
2928       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2929   SetInForReturnValue(ret, locations);
2930 }
2931 
VisitReturn(HReturn * ret)2932 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2933   switch (ret->InputAt(0)->GetType()) {
2934     case DataType::Type::kReference:
2935     case DataType::Type::kBool:
2936     case DataType::Type::kUint8:
2937     case DataType::Type::kInt8:
2938     case DataType::Type::kUint16:
2939     case DataType::Type::kInt16:
2940     case DataType::Type::kInt32:
2941     case DataType::Type::kInt64:
2942       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2943       break;
2944 
2945     case DataType::Type::kFloat32: {
2946       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2947                 XMM0);
2948       // To simplify callers of an OSR method, we put the return value in both
2949       // floating point and core register.
2950       if (GetGraph()->IsCompilingOsr()) {
2951         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2952       }
2953       break;
2954     }
2955     case DataType::Type::kFloat64: {
2956       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2957                 XMM0);
2958       // To simplify callers of an OSR method, we put the return value in both
2959       // floating point and core register.
2960       if (GetGraph()->IsCompilingOsr()) {
2961         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2962       }
2963       break;
2964     }
2965 
2966     default:
2967       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2968   }
2969   codegen_->GenerateFrameExit();
2970 }
2971 
GetReturnLocation(DataType::Type type) const2972 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2973   switch (type) {
2974     case DataType::Type::kReference:
2975     case DataType::Type::kBool:
2976     case DataType::Type::kUint8:
2977     case DataType::Type::kInt8:
2978     case DataType::Type::kUint16:
2979     case DataType::Type::kInt16:
2980     case DataType::Type::kUint32:
2981     case DataType::Type::kInt32:
2982     case DataType::Type::kUint64:
2983     case DataType::Type::kInt64:
2984       return Location::RegisterLocation(RAX);
2985 
2986     case DataType::Type::kVoid:
2987       return Location::NoLocation();
2988 
2989     case DataType::Type::kFloat64:
2990     case DataType::Type::kFloat32:
2991       return Location::FpuRegisterLocation(XMM0);
2992   }
2993 }
2994 
GetMethodLocation() const2995 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2996   return Location::RegisterLocation(kMethodRegisterArgument);
2997 }
2998 
GetNextLocation(DataType::Type type)2999 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
3000   switch (type) {
3001     case DataType::Type::kReference:
3002     case DataType::Type::kBool:
3003     case DataType::Type::kUint8:
3004     case DataType::Type::kInt8:
3005     case DataType::Type::kUint16:
3006     case DataType::Type::kInt16:
3007     case DataType::Type::kInt32: {
3008       uint32_t index = gp_index_++;
3009       stack_index_++;
3010       if (index < calling_convention.GetNumberOfRegisters()) {
3011         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
3012       } else {
3013         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
3014       }
3015     }
3016 
3017     case DataType::Type::kInt64: {
3018       uint32_t index = gp_index_;
3019       stack_index_ += 2;
3020       if (index < calling_convention.GetNumberOfRegisters()) {
3021         gp_index_ += 1;
3022         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
3023       } else {
3024         gp_index_ += 2;
3025         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
3026       }
3027     }
3028 
3029     case DataType::Type::kFloat32: {
3030       uint32_t index = float_index_++;
3031       stack_index_++;
3032       if (index < calling_convention.GetNumberOfFpuRegisters()) {
3033         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3034       } else {
3035         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
3036       }
3037     }
3038 
3039     case DataType::Type::kFloat64: {
3040       uint32_t index = float_index_++;
3041       stack_index_ += 2;
3042       if (index < calling_convention.GetNumberOfFpuRegisters()) {
3043         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3044       } else {
3045         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
3046       }
3047     }
3048 
3049     case DataType::Type::kUint32:
3050     case DataType::Type::kUint64:
3051     case DataType::Type::kVoid:
3052       LOG(FATAL) << "Unexpected parameter type " << type;
3053       UNREACHABLE();
3054   }
3055   return Location::NoLocation();
3056 }
3057 
GetNextLocation(DataType::Type type)3058 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
3059   DCHECK_NE(type, DataType::Type::kReference);
3060 
3061   Location location = Location::NoLocation();
3062   if (DataType::IsFloatingPointType(type)) {
3063     if (fpr_index_ < kParameterFloatRegistersLength) {
3064       location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
3065       ++fpr_index_;
3066     }
3067   } else {
3068     // Native ABI uses the same registers as managed, except that the method register RDI
3069     // is a normal argument.
3070     if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
3071       location = Location::RegisterLocation(
3072           gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
3073       ++gpr_index_;
3074     }
3075   }
3076   if (location.IsInvalid()) {
3077     if (DataType::Is64BitType(type)) {
3078       location = Location::DoubleStackSlot(stack_offset_);
3079     } else {
3080       location = Location::StackSlot(stack_offset_);
3081     }
3082     stack_offset_ += kFramePointerSize;
3083 
3084     if (for_register_allocation_) {
3085       location = Location::Any();
3086     }
3087   }
3088   return location;
3089 }
3090 
GetReturnLocation(DataType::Type type) const3091 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
3092     const {
3093   // We perform conversion to the managed ABI return register after the call if needed.
3094   InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
3095   return dex_calling_convention.GetReturnLocation(type);
3096 }
3097 
GetMethodLocation() const3098 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
3099   // Pass the method in the hidden argument RAX.
3100   return Location::RegisterLocation(RAX);
3101 }
3102 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3103 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3104   // The trampoline uses the same calling convention as dex calling conventions,
3105   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3106   // the method_idx.
3107   HandleInvoke(invoke);
3108 }
3109 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3110 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3111   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3112 }
3113 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3114 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3115   // Explicit clinit checks triggered by static invokes must have been pruned by
3116   // art::PrepareForRegisterAllocation.
3117   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3118 
3119   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3120   if (intrinsic.TryDispatch(invoke)) {
3121     return;
3122   }
3123 
3124   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3125     CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
3126         /*for_register_allocation=*/ true);
3127     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3128     CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
3129   } else {
3130     HandleInvoke(invoke);
3131   }
3132 }
3133 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3134 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3135   if (invoke->GetLocations()->Intrinsified()) {
3136     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
3137     intrinsic.Dispatch(invoke);
3138     return true;
3139   }
3140   return false;
3141 }
3142 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3143 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3144   // Explicit clinit checks triggered by static invokes must have been pruned by
3145   // art::PrepareForRegisterAllocation.
3146   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3147 
3148   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3149     return;
3150   }
3151 
3152   LocationSummary* locations = invoke->GetLocations();
3153   codegen_->GenerateStaticOrDirectCall(
3154       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3155 }
3156 
HandleInvoke(HInvoke * invoke)3157 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
3158   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
3159   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3160 }
3161 
VisitInvokeVirtual(HInvokeVirtual * invoke)3162 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3163   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3164   if (intrinsic.TryDispatch(invoke)) {
3165     return;
3166   }
3167 
3168   HandleInvoke(invoke);
3169 }
3170 
VisitInvokeVirtual(HInvokeVirtual * invoke)3171 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3172   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3173     return;
3174   }
3175 
3176   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3177   DCHECK(!codegen_->IsLeafMethod());
3178 }
3179 
VisitInvokeInterface(HInvokeInterface * invoke)3180 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3181   HandleInvoke(invoke);
3182   // Add the hidden argument.
3183   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3184     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
3185                                     Location::RegisterLocation(RAX));
3186   }
3187   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
3188 }
3189 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)3190 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3191                                                         CpuRegister klass) {
3192   DCHECK_EQ(RDI, klass.AsRegister());
3193   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
3194     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3195     DCHECK(info != nullptr);
3196     InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
3197         info, GetCompilerOptions(), instruction->AsInvoke());
3198     if (cache != nullptr) {
3199       uint64_t address = reinterpret_cast64<uint64_t>(cache);
3200       NearLabel done;
3201       __ movq(CpuRegister(TMP), Immediate(address));
3202       // Fast path for a monomorphic cache.
3203       __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
3204       __ j(kEqual, &done);
3205       GenerateInvokeRuntime(
3206           GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
3207       __ Bind(&done);
3208     } else {
3209       // This is unexpected, but we don't guarantee stable compilation across
3210       // JIT runs so just warn about it.
3211       ScopedObjectAccess soa(Thread::Current());
3212       LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
3213     }
3214   }
3215 }
3216 
VisitInvokeInterface(HInvokeInterface * invoke)3217 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3218   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3219   LocationSummary* locations = invoke->GetLocations();
3220   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3221   Location receiver = locations->InAt(0);
3222   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
3223 
3224   if (receiver.IsStackSlot()) {
3225     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
3226     // /* HeapReference<Class> */ temp = temp->klass_
3227     __ movl(temp, Address(temp, class_offset));
3228   } else {
3229     // /* HeapReference<Class> */ temp = receiver->klass_
3230     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
3231   }
3232   codegen_->MaybeRecordImplicitNullCheck(invoke);
3233   // Instead of simply (possibly) unpoisoning `temp` here, we should
3234   // emit a read barrier for the previous class reference load.
3235   // However this is not required in practice, as this is an
3236   // intermediate/temporary reference and because the current
3237   // concurrent copying collector keeps the from-space memory
3238   // intact/accessible until the end of the marking phase (the
3239   // concurrent copying collector may not in the future).
3240   __ MaybeUnpoisonHeapReference(temp);
3241 
3242   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3243 
3244   if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
3245       invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
3246     Location hidden_reg = locations->GetTemp(1);
3247     // Set the hidden argument. This is safe to do this here, as RAX
3248     // won't be modified thereafter, before the `call` instruction.
3249     // We also do it after MaybeGenerateInlineCache that may use RAX.
3250     DCHECK_EQ(RAX, hidden_reg.AsRegister<Register>());
3251     codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3252   }
3253 
3254   // temp = temp->GetAddressOfIMT()
3255   __ movq(temp,
3256       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
3257   // temp = temp->GetImtEntryAt(method_offset);
3258   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3259       invoke->GetImtIndex(), kX86_64PointerSize));
3260   // temp = temp->GetImtEntryAt(method_offset);
3261   __ movq(temp, Address(temp, method_offset));
3262   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3263     // We pass the method from the IMT in case of a conflict. This will ensure
3264     // we go into the runtime to resolve the actual method.
3265     Location hidden_reg = locations->GetTemp(1);
3266     __ movq(hidden_reg.AsRegister<CpuRegister>(), temp);
3267   }
3268   // call temp->GetEntryPoint();
3269   __ call(Address(
3270       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
3271 
3272   DCHECK(!codegen_->IsLeafMethod());
3273   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
3274 }
3275 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3276 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3277   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3278   if (intrinsic.TryDispatch(invoke)) {
3279     return;
3280   }
3281   HandleInvoke(invoke);
3282 }
3283 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3284 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3285   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3286     return;
3287   }
3288   codegen_->GenerateInvokePolymorphicCall(invoke);
3289 }
3290 
VisitInvokeCustom(HInvokeCustom * invoke)3291 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3292   HandleInvoke(invoke);
3293 }
3294 
VisitInvokeCustom(HInvokeCustom * invoke)3295 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3296   codegen_->GenerateInvokeCustomCall(invoke);
3297 }
3298 
VisitNeg(HNeg * neg)3299 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
3300   LocationSummary* locations =
3301       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3302   switch (neg->GetResultType()) {
3303     case DataType::Type::kInt32:
3304     case DataType::Type::kInt64:
3305       locations->SetInAt(0, Location::RequiresRegister());
3306       locations->SetOut(Location::SameAsFirstInput());
3307       break;
3308 
3309     case DataType::Type::kFloat32:
3310     case DataType::Type::kFloat64:
3311       locations->SetInAt(0, Location::RequiresFpuRegister());
3312       locations->SetOut(Location::SameAsFirstInput());
3313       locations->AddTemp(Location::RequiresFpuRegister());
3314       break;
3315 
3316     default:
3317       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3318   }
3319 }
3320 
VisitNeg(HNeg * neg)3321 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
3322   LocationSummary* locations = neg->GetLocations();
3323   Location out = locations->Out();
3324   Location in = locations->InAt(0);
3325   switch (neg->GetResultType()) {
3326     case DataType::Type::kInt32:
3327       DCHECK(in.IsRegister());
3328       DCHECK(in.Equals(out));
3329       __ negl(out.AsRegister<CpuRegister>());
3330       break;
3331 
3332     case DataType::Type::kInt64:
3333       DCHECK(in.IsRegister());
3334       DCHECK(in.Equals(out));
3335       __ negq(out.AsRegister<CpuRegister>());
3336       break;
3337 
3338     case DataType::Type::kFloat32: {
3339       DCHECK(in.Equals(out));
3340       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3341       // Implement float negation with an exclusive or with value
3342       // 0x80000000 (mask for bit 31, representing the sign of a
3343       // single-precision floating-point number).
3344       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
3345       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3346       break;
3347     }
3348 
3349     case DataType::Type::kFloat64: {
3350       DCHECK(in.Equals(out));
3351       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3352       // Implement double negation with an exclusive or with value
3353       // 0x8000000000000000 (mask for bit 63, representing the sign of
3354       // a double-precision floating-point number).
3355       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
3356       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3357       break;
3358     }
3359 
3360     default:
3361       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3362   }
3363 }
3364 
VisitTypeConversion(HTypeConversion * conversion)3365 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3366   LocationSummary* locations =
3367       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
3368   DataType::Type result_type = conversion->GetResultType();
3369   DataType::Type input_type = conversion->GetInputType();
3370   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3371       << input_type << " -> " << result_type;
3372 
3373   switch (result_type) {
3374     case DataType::Type::kUint8:
3375     case DataType::Type::kInt8:
3376     case DataType::Type::kUint16:
3377     case DataType::Type::kInt16:
3378       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3379       locations->SetInAt(0, Location::Any());
3380       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3381       break;
3382 
3383     case DataType::Type::kInt32:
3384       switch (input_type) {
3385         case DataType::Type::kInt64:
3386           locations->SetInAt(0, Location::Any());
3387           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3388           break;
3389 
3390         case DataType::Type::kFloat32:
3391           locations->SetInAt(0, Location::RequiresFpuRegister());
3392           locations->SetOut(Location::RequiresRegister());
3393           break;
3394 
3395         case DataType::Type::kFloat64:
3396           locations->SetInAt(0, Location::RequiresFpuRegister());
3397           locations->SetOut(Location::RequiresRegister());
3398           break;
3399 
3400         default:
3401           LOG(FATAL) << "Unexpected type conversion from " << input_type
3402                      << " to " << result_type;
3403       }
3404       break;
3405 
3406     case DataType::Type::kInt64:
3407       switch (input_type) {
3408         case DataType::Type::kBool:
3409         case DataType::Type::kUint8:
3410         case DataType::Type::kInt8:
3411         case DataType::Type::kUint16:
3412         case DataType::Type::kInt16:
3413         case DataType::Type::kInt32:
3414           // TODO: We would benefit from a (to-be-implemented)
3415           // Location::RegisterOrStackSlot requirement for this input.
3416           locations->SetInAt(0, Location::RequiresRegister());
3417           locations->SetOut(Location::RequiresRegister());
3418           break;
3419 
3420         case DataType::Type::kFloat32:
3421           locations->SetInAt(0, Location::RequiresFpuRegister());
3422           locations->SetOut(Location::RequiresRegister());
3423           break;
3424 
3425         case DataType::Type::kFloat64:
3426           locations->SetInAt(0, Location::RequiresFpuRegister());
3427           locations->SetOut(Location::RequiresRegister());
3428           break;
3429 
3430         default:
3431           LOG(FATAL) << "Unexpected type conversion from " << input_type
3432                      << " to " << result_type;
3433       }
3434       break;
3435 
3436     case DataType::Type::kFloat32:
3437       switch (input_type) {
3438         case DataType::Type::kBool:
3439         case DataType::Type::kUint8:
3440         case DataType::Type::kInt8:
3441         case DataType::Type::kUint16:
3442         case DataType::Type::kInt16:
3443         case DataType::Type::kInt32:
3444           locations->SetInAt(0, Location::Any());
3445           locations->SetOut(Location::RequiresFpuRegister());
3446           break;
3447 
3448         case DataType::Type::kInt64:
3449           locations->SetInAt(0, Location::Any());
3450           locations->SetOut(Location::RequiresFpuRegister());
3451           break;
3452 
3453         case DataType::Type::kFloat64:
3454           locations->SetInAt(0, Location::Any());
3455           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3456           break;
3457 
3458         default:
3459           LOG(FATAL) << "Unexpected type conversion from " << input_type
3460                      << " to " << result_type;
3461       }
3462       break;
3463 
3464     case DataType::Type::kFloat64:
3465       switch (input_type) {
3466         case DataType::Type::kBool:
3467         case DataType::Type::kUint8:
3468         case DataType::Type::kInt8:
3469         case DataType::Type::kUint16:
3470         case DataType::Type::kInt16:
3471         case DataType::Type::kInt32:
3472           locations->SetInAt(0, Location::Any());
3473           locations->SetOut(Location::RequiresFpuRegister());
3474           break;
3475 
3476         case DataType::Type::kInt64:
3477           locations->SetInAt(0, Location::Any());
3478           locations->SetOut(Location::RequiresFpuRegister());
3479           break;
3480 
3481         case DataType::Type::kFloat32:
3482           locations->SetInAt(0, Location::Any());
3483           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3484           break;
3485 
3486         default:
3487           LOG(FATAL) << "Unexpected type conversion from " << input_type
3488                      << " to " << result_type;
3489       }
3490       break;
3491 
3492     default:
3493       LOG(FATAL) << "Unexpected type conversion from " << input_type
3494                  << " to " << result_type;
3495   }
3496 }
3497 
VisitTypeConversion(HTypeConversion * conversion)3498 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3499   LocationSummary* locations = conversion->GetLocations();
3500   Location out = locations->Out();
3501   Location in = locations->InAt(0);
3502   DataType::Type result_type = conversion->GetResultType();
3503   DataType::Type input_type = conversion->GetInputType();
3504   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3505       << input_type << " -> " << result_type;
3506   switch (result_type) {
3507     case DataType::Type::kUint8:
3508       switch (input_type) {
3509         case DataType::Type::kInt8:
3510         case DataType::Type::kUint16:
3511         case DataType::Type::kInt16:
3512         case DataType::Type::kInt32:
3513         case DataType::Type::kInt64:
3514           if (in.IsRegister()) {
3515             __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3516           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3517             __ movzxb(out.AsRegister<CpuRegister>(),
3518                       Address(CpuRegister(RSP), in.GetStackIndex()));
3519           } else {
3520             __ movl(out.AsRegister<CpuRegister>(),
3521                     Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3522           }
3523           break;
3524 
3525         default:
3526           LOG(FATAL) << "Unexpected type conversion from " << input_type
3527                      << " to " << result_type;
3528       }
3529       break;
3530 
3531     case DataType::Type::kInt8:
3532       switch (input_type) {
3533         case DataType::Type::kUint8:
3534         case DataType::Type::kUint16:
3535         case DataType::Type::kInt16:
3536         case DataType::Type::kInt32:
3537         case DataType::Type::kInt64:
3538           if (in.IsRegister()) {
3539             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3540           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3541             __ movsxb(out.AsRegister<CpuRegister>(),
3542                       Address(CpuRegister(RSP), in.GetStackIndex()));
3543           } else {
3544             __ movl(out.AsRegister<CpuRegister>(),
3545                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3546           }
3547           break;
3548 
3549         default:
3550           LOG(FATAL) << "Unexpected type conversion from " << input_type
3551                      << " to " << result_type;
3552       }
3553       break;
3554 
3555     case DataType::Type::kUint16:
3556       switch (input_type) {
3557         case DataType::Type::kInt8:
3558         case DataType::Type::kInt16:
3559         case DataType::Type::kInt32:
3560         case DataType::Type::kInt64:
3561           if (in.IsRegister()) {
3562             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3563           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3564             __ movzxw(out.AsRegister<CpuRegister>(),
3565                       Address(CpuRegister(RSP), in.GetStackIndex()));
3566           } else {
3567             __ movl(out.AsRegister<CpuRegister>(),
3568                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3569           }
3570           break;
3571 
3572         default:
3573           LOG(FATAL) << "Unexpected type conversion from " << input_type
3574                      << " to " << result_type;
3575       }
3576       break;
3577 
3578     case DataType::Type::kInt16:
3579       switch (input_type) {
3580         case DataType::Type::kUint16:
3581         case DataType::Type::kInt32:
3582         case DataType::Type::kInt64:
3583           if (in.IsRegister()) {
3584             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3585           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3586             __ movsxw(out.AsRegister<CpuRegister>(),
3587                       Address(CpuRegister(RSP), in.GetStackIndex()));
3588           } else {
3589             __ movl(out.AsRegister<CpuRegister>(),
3590                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3591           }
3592           break;
3593 
3594         default:
3595           LOG(FATAL) << "Unexpected type conversion from " << input_type
3596                      << " to " << result_type;
3597       }
3598       break;
3599 
3600     case DataType::Type::kInt32:
3601       switch (input_type) {
3602         case DataType::Type::kInt64:
3603           if (in.IsRegister()) {
3604             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3605           } else if (in.IsDoubleStackSlot()) {
3606             __ movl(out.AsRegister<CpuRegister>(),
3607                     Address(CpuRegister(RSP), in.GetStackIndex()));
3608           } else {
3609             DCHECK(in.IsConstant());
3610             DCHECK(in.GetConstant()->IsLongConstant());
3611             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3612             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3613           }
3614           break;
3615 
3616         case DataType::Type::kFloat32: {
3617           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3618           CpuRegister output = out.AsRegister<CpuRegister>();
3619           NearLabel done, nan;
3620 
3621           __ movl(output, Immediate(kPrimIntMax));
3622           // if input >= (float)INT_MAX goto done
3623           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3624           __ j(kAboveEqual, &done);
3625           // if input == NaN goto nan
3626           __ j(kUnordered, &nan);
3627           // output = float-to-int-truncate(input)
3628           __ cvttss2si(output, input, false);
3629           __ jmp(&done);
3630           __ Bind(&nan);
3631           //  output = 0
3632           __ xorl(output, output);
3633           __ Bind(&done);
3634           break;
3635         }
3636 
3637         case DataType::Type::kFloat64: {
3638           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3639           CpuRegister output = out.AsRegister<CpuRegister>();
3640           NearLabel done, nan;
3641 
3642           __ movl(output, Immediate(kPrimIntMax));
3643           // if input >= (double)INT_MAX goto done
3644           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3645           __ j(kAboveEqual, &done);
3646           // if input == NaN goto nan
3647           __ j(kUnordered, &nan);
3648           // output = double-to-int-truncate(input)
3649           __ cvttsd2si(output, input);
3650           __ jmp(&done);
3651           __ Bind(&nan);
3652           //  output = 0
3653           __ xorl(output, output);
3654           __ Bind(&done);
3655           break;
3656         }
3657 
3658         default:
3659           LOG(FATAL) << "Unexpected type conversion from " << input_type
3660                      << " to " << result_type;
3661       }
3662       break;
3663 
3664     case DataType::Type::kInt64:
3665       switch (input_type) {
3666         DCHECK(out.IsRegister());
3667         case DataType::Type::kBool:
3668         case DataType::Type::kUint8:
3669         case DataType::Type::kInt8:
3670         case DataType::Type::kUint16:
3671         case DataType::Type::kInt16:
3672         case DataType::Type::kInt32:
3673           DCHECK(in.IsRegister());
3674           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3675           break;
3676 
3677         case DataType::Type::kFloat32: {
3678           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3679           CpuRegister output = out.AsRegister<CpuRegister>();
3680           NearLabel done, nan;
3681 
3682           codegen_->Load64BitValue(output, kPrimLongMax);
3683           // if input >= (float)LONG_MAX goto done
3684           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3685           __ j(kAboveEqual, &done);
3686           // if input == NaN goto nan
3687           __ j(kUnordered, &nan);
3688           // output = float-to-long-truncate(input)
3689           __ cvttss2si(output, input, true);
3690           __ jmp(&done);
3691           __ Bind(&nan);
3692           //  output = 0
3693           __ xorl(output, output);
3694           __ Bind(&done);
3695           break;
3696         }
3697 
3698         case DataType::Type::kFloat64: {
3699           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3700           CpuRegister output = out.AsRegister<CpuRegister>();
3701           NearLabel done, nan;
3702 
3703           codegen_->Load64BitValue(output, kPrimLongMax);
3704           // if input >= (double)LONG_MAX goto done
3705           __ comisd(input, codegen_->LiteralDoubleAddress(
3706                 static_cast<double>(kPrimLongMax)));
3707           __ j(kAboveEqual, &done);
3708           // if input == NaN goto nan
3709           __ j(kUnordered, &nan);
3710           // output = double-to-long-truncate(input)
3711           __ cvttsd2si(output, input, true);
3712           __ jmp(&done);
3713           __ Bind(&nan);
3714           //  output = 0
3715           __ xorl(output, output);
3716           __ Bind(&done);
3717           break;
3718         }
3719 
3720         default:
3721           LOG(FATAL) << "Unexpected type conversion from " << input_type
3722                      << " to " << result_type;
3723       }
3724       break;
3725 
3726     case DataType::Type::kFloat32:
3727       switch (input_type) {
3728         case DataType::Type::kBool:
3729         case DataType::Type::kUint8:
3730         case DataType::Type::kInt8:
3731         case DataType::Type::kUint16:
3732         case DataType::Type::kInt16:
3733         case DataType::Type::kInt32:
3734           if (in.IsRegister()) {
3735             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3736           } else if (in.IsConstant()) {
3737             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3738             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3739             codegen_->Load32BitValue(dest, static_cast<float>(v));
3740           } else {
3741             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3742                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3743           }
3744           break;
3745 
3746         case DataType::Type::kInt64:
3747           if (in.IsRegister()) {
3748             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3749           } else if (in.IsConstant()) {
3750             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3751             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3752             codegen_->Load32BitValue(dest, static_cast<float>(v));
3753           } else {
3754             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3755                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3756           }
3757           break;
3758 
3759         case DataType::Type::kFloat64:
3760           if (in.IsFpuRegister()) {
3761             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3762           } else if (in.IsConstant()) {
3763             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3764             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3765             codegen_->Load32BitValue(dest, static_cast<float>(v));
3766           } else {
3767             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3768                         Address(CpuRegister(RSP), in.GetStackIndex()));
3769           }
3770           break;
3771 
3772         default:
3773           LOG(FATAL) << "Unexpected type conversion from " << input_type
3774                      << " to " << result_type;
3775       }
3776       break;
3777 
3778     case DataType::Type::kFloat64:
3779       switch (input_type) {
3780         case DataType::Type::kBool:
3781         case DataType::Type::kUint8:
3782         case DataType::Type::kInt8:
3783         case DataType::Type::kUint16:
3784         case DataType::Type::kInt16:
3785         case DataType::Type::kInt32:
3786           if (in.IsRegister()) {
3787             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3788           } else if (in.IsConstant()) {
3789             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3790             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3791             codegen_->Load64BitValue(dest, static_cast<double>(v));
3792           } else {
3793             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3794                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3795           }
3796           break;
3797 
3798         case DataType::Type::kInt64:
3799           if (in.IsRegister()) {
3800             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3801           } else if (in.IsConstant()) {
3802             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3803             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3804             codegen_->Load64BitValue(dest, static_cast<double>(v));
3805           } else {
3806             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3807                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3808           }
3809           break;
3810 
3811         case DataType::Type::kFloat32:
3812           if (in.IsFpuRegister()) {
3813             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3814           } else if (in.IsConstant()) {
3815             float v = in.GetConstant()->AsFloatConstant()->GetValue();
3816             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3817             codegen_->Load64BitValue(dest, static_cast<double>(v));
3818           } else {
3819             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3820                         Address(CpuRegister(RSP), in.GetStackIndex()));
3821           }
3822           break;
3823 
3824         default:
3825           LOG(FATAL) << "Unexpected type conversion from " << input_type
3826                      << " to " << result_type;
3827       }
3828       break;
3829 
3830     default:
3831       LOG(FATAL) << "Unexpected type conversion from " << input_type
3832                  << " to " << result_type;
3833   }
3834 }
3835 
VisitAdd(HAdd * add)3836 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3837   LocationSummary* locations =
3838       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3839   switch (add->GetResultType()) {
3840     case DataType::Type::kInt32: {
3841       locations->SetInAt(0, Location::RequiresRegister());
3842       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3843       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3844       break;
3845     }
3846 
3847     case DataType::Type::kInt64: {
3848       locations->SetInAt(0, Location::RequiresRegister());
3849       // We can use a leaq or addq if the constant can fit in an immediate.
3850       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3851       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3852       break;
3853     }
3854 
3855     case DataType::Type::kFloat64:
3856     case DataType::Type::kFloat32: {
3857       locations->SetInAt(0, Location::RequiresFpuRegister());
3858       locations->SetInAt(1, Location::Any());
3859       locations->SetOut(Location::SameAsFirstInput());
3860       break;
3861     }
3862 
3863     default:
3864       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3865   }
3866 }
3867 
VisitAdd(HAdd * add)3868 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3869   LocationSummary* locations = add->GetLocations();
3870   Location first = locations->InAt(0);
3871   Location second = locations->InAt(1);
3872   Location out = locations->Out();
3873 
3874   switch (add->GetResultType()) {
3875     case DataType::Type::kInt32: {
3876       if (second.IsRegister()) {
3877         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3878           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3879         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3880           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3881         } else {
3882           __ leal(out.AsRegister<CpuRegister>(), Address(
3883               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3884         }
3885       } else if (second.IsConstant()) {
3886         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3887           __ addl(out.AsRegister<CpuRegister>(),
3888                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3889         } else {
3890           __ leal(out.AsRegister<CpuRegister>(), Address(
3891               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3892         }
3893       } else {
3894         DCHECK(first.Equals(locations->Out()));
3895         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3896       }
3897       break;
3898     }
3899 
3900     case DataType::Type::kInt64: {
3901       if (second.IsRegister()) {
3902         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3903           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3904         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3905           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3906         } else {
3907           __ leaq(out.AsRegister<CpuRegister>(), Address(
3908               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3909         }
3910       } else {
3911         DCHECK(second.IsConstant());
3912         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3913         int32_t int32_value = Low32Bits(value);
3914         DCHECK_EQ(int32_value, value);
3915         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3916           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3917         } else {
3918           __ leaq(out.AsRegister<CpuRegister>(), Address(
3919               first.AsRegister<CpuRegister>(), int32_value));
3920         }
3921       }
3922       break;
3923     }
3924 
3925     case DataType::Type::kFloat32: {
3926       if (second.IsFpuRegister()) {
3927         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3928       } else if (second.IsConstant()) {
3929         __ addss(first.AsFpuRegister<XmmRegister>(),
3930                  codegen_->LiteralFloatAddress(
3931                      second.GetConstant()->AsFloatConstant()->GetValue()));
3932       } else {
3933         DCHECK(second.IsStackSlot());
3934         __ addss(first.AsFpuRegister<XmmRegister>(),
3935                  Address(CpuRegister(RSP), second.GetStackIndex()));
3936       }
3937       break;
3938     }
3939 
3940     case DataType::Type::kFloat64: {
3941       if (second.IsFpuRegister()) {
3942         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3943       } else if (second.IsConstant()) {
3944         __ addsd(first.AsFpuRegister<XmmRegister>(),
3945                  codegen_->LiteralDoubleAddress(
3946                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3947       } else {
3948         DCHECK(second.IsDoubleStackSlot());
3949         __ addsd(first.AsFpuRegister<XmmRegister>(),
3950                  Address(CpuRegister(RSP), second.GetStackIndex()));
3951       }
3952       break;
3953     }
3954 
3955     default:
3956       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3957   }
3958 }
3959 
VisitSub(HSub * sub)3960 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3961   LocationSummary* locations =
3962       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3963   switch (sub->GetResultType()) {
3964     case DataType::Type::kInt32: {
3965       locations->SetInAt(0, Location::RequiresRegister());
3966       locations->SetInAt(1, Location::Any());
3967       locations->SetOut(Location::SameAsFirstInput());
3968       break;
3969     }
3970     case DataType::Type::kInt64: {
3971       locations->SetInAt(0, Location::RequiresRegister());
3972       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3973       locations->SetOut(Location::SameAsFirstInput());
3974       break;
3975     }
3976     case DataType::Type::kFloat32:
3977     case DataType::Type::kFloat64: {
3978       locations->SetInAt(0, Location::RequiresFpuRegister());
3979       locations->SetInAt(1, Location::Any());
3980       locations->SetOut(Location::SameAsFirstInput());
3981       break;
3982     }
3983     default:
3984       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3985   }
3986 }
3987 
VisitSub(HSub * sub)3988 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3989   LocationSummary* locations = sub->GetLocations();
3990   Location first = locations->InAt(0);
3991   Location second = locations->InAt(1);
3992   DCHECK(first.Equals(locations->Out()));
3993   switch (sub->GetResultType()) {
3994     case DataType::Type::kInt32: {
3995       if (second.IsRegister()) {
3996         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3997       } else if (second.IsConstant()) {
3998         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3999         __ subl(first.AsRegister<CpuRegister>(), imm);
4000       } else {
4001         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
4002       }
4003       break;
4004     }
4005     case DataType::Type::kInt64: {
4006       if (second.IsConstant()) {
4007         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
4008         DCHECK(IsInt<32>(value));
4009         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
4010       } else {
4011         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4012       }
4013       break;
4014     }
4015 
4016     case DataType::Type::kFloat32: {
4017       if (second.IsFpuRegister()) {
4018         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4019       } else if (second.IsConstant()) {
4020         __ subss(first.AsFpuRegister<XmmRegister>(),
4021                  codegen_->LiteralFloatAddress(
4022                      second.GetConstant()->AsFloatConstant()->GetValue()));
4023       } else {
4024         DCHECK(second.IsStackSlot());
4025         __ subss(first.AsFpuRegister<XmmRegister>(),
4026                  Address(CpuRegister(RSP), second.GetStackIndex()));
4027       }
4028       break;
4029     }
4030 
4031     case DataType::Type::kFloat64: {
4032       if (second.IsFpuRegister()) {
4033         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4034       } else if (second.IsConstant()) {
4035         __ subsd(first.AsFpuRegister<XmmRegister>(),
4036                  codegen_->LiteralDoubleAddress(
4037                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4038       } else {
4039         DCHECK(second.IsDoubleStackSlot());
4040         __ subsd(first.AsFpuRegister<XmmRegister>(),
4041                  Address(CpuRegister(RSP), second.GetStackIndex()));
4042       }
4043       break;
4044     }
4045 
4046     default:
4047       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4048   }
4049 }
4050 
VisitMul(HMul * mul)4051 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
4052   LocationSummary* locations =
4053       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4054   switch (mul->GetResultType()) {
4055     case DataType::Type::kInt32: {
4056       locations->SetInAt(0, Location::RequiresRegister());
4057       locations->SetInAt(1, Location::Any());
4058       if (mul->InputAt(1)->IsIntConstant()) {
4059         // Can use 3 operand multiply.
4060         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4061       } else {
4062         locations->SetOut(Location::SameAsFirstInput());
4063       }
4064       break;
4065     }
4066     case DataType::Type::kInt64: {
4067       locations->SetInAt(0, Location::RequiresRegister());
4068       locations->SetInAt(1, Location::Any());
4069       if (mul->InputAt(1)->IsLongConstant() &&
4070           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
4071         // Can use 3 operand multiply.
4072         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4073       } else {
4074         locations->SetOut(Location::SameAsFirstInput());
4075       }
4076       break;
4077     }
4078     case DataType::Type::kFloat32:
4079     case DataType::Type::kFloat64: {
4080       locations->SetInAt(0, Location::RequiresFpuRegister());
4081       locations->SetInAt(1, Location::Any());
4082       locations->SetOut(Location::SameAsFirstInput());
4083       break;
4084     }
4085 
4086     default:
4087       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4088   }
4089 }
4090 
VisitMul(HMul * mul)4091 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
4092   LocationSummary* locations = mul->GetLocations();
4093   Location first = locations->InAt(0);
4094   Location second = locations->InAt(1);
4095   Location out = locations->Out();
4096   switch (mul->GetResultType()) {
4097     case DataType::Type::kInt32:
4098       // The constant may have ended up in a register, so test explicitly to avoid
4099       // problems where the output may not be the same as the first operand.
4100       if (mul->InputAt(1)->IsIntConstant()) {
4101         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
4102         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
4103       } else if (second.IsRegister()) {
4104         DCHECK(first.Equals(out));
4105         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4106       } else {
4107         DCHECK(first.Equals(out));
4108         DCHECK(second.IsStackSlot());
4109         __ imull(first.AsRegister<CpuRegister>(),
4110                  Address(CpuRegister(RSP), second.GetStackIndex()));
4111       }
4112       break;
4113     case DataType::Type::kInt64: {
4114       // The constant may have ended up in a register, so test explicitly to avoid
4115       // problems where the output may not be the same as the first operand.
4116       if (mul->InputAt(1)->IsLongConstant()) {
4117         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
4118         if (IsInt<32>(value)) {
4119           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
4120                    Immediate(static_cast<int32_t>(value)));
4121         } else {
4122           // Have to use the constant area.
4123           DCHECK(first.Equals(out));
4124           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
4125         }
4126       } else if (second.IsRegister()) {
4127         DCHECK(first.Equals(out));
4128         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4129       } else {
4130         DCHECK(second.IsDoubleStackSlot());
4131         DCHECK(first.Equals(out));
4132         __ imulq(first.AsRegister<CpuRegister>(),
4133                  Address(CpuRegister(RSP), second.GetStackIndex()));
4134       }
4135       break;
4136     }
4137 
4138     case DataType::Type::kFloat32: {
4139       DCHECK(first.Equals(out));
4140       if (second.IsFpuRegister()) {
4141         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4142       } else if (second.IsConstant()) {
4143         __ mulss(first.AsFpuRegister<XmmRegister>(),
4144                  codegen_->LiteralFloatAddress(
4145                      second.GetConstant()->AsFloatConstant()->GetValue()));
4146       } else {
4147         DCHECK(second.IsStackSlot());
4148         __ mulss(first.AsFpuRegister<XmmRegister>(),
4149                  Address(CpuRegister(RSP), second.GetStackIndex()));
4150       }
4151       break;
4152     }
4153 
4154     case DataType::Type::kFloat64: {
4155       DCHECK(first.Equals(out));
4156       if (second.IsFpuRegister()) {
4157         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4158       } else if (second.IsConstant()) {
4159         __ mulsd(first.AsFpuRegister<XmmRegister>(),
4160                  codegen_->LiteralDoubleAddress(
4161                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4162       } else {
4163         DCHECK(second.IsDoubleStackSlot());
4164         __ mulsd(first.AsFpuRegister<XmmRegister>(),
4165                  Address(CpuRegister(RSP), second.GetStackIndex()));
4166       }
4167       break;
4168     }
4169 
4170     default:
4171       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4172   }
4173 }
4174 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)4175 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
4176                                                      uint32_t stack_adjustment, bool is_float) {
4177   if (source.IsStackSlot()) {
4178     DCHECK(is_float);
4179     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4180   } else if (source.IsDoubleStackSlot()) {
4181     DCHECK(!is_float);
4182     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4183   } else {
4184     // Write the value to the temporary location on the stack and load to FP stack.
4185     if (is_float) {
4186       Location stack_temp = Location::StackSlot(temp_offset);
4187       codegen_->Move(stack_temp, source);
4188       __ flds(Address(CpuRegister(RSP), temp_offset));
4189     } else {
4190       Location stack_temp = Location::DoubleStackSlot(temp_offset);
4191       codegen_->Move(stack_temp, source);
4192       __ fldl(Address(CpuRegister(RSP), temp_offset));
4193     }
4194   }
4195 }
4196 
GenerateRemFP(HRem * rem)4197 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
4198   DataType::Type type = rem->GetResultType();
4199   bool is_float = type == DataType::Type::kFloat32;
4200   size_t elem_size = DataType::Size(type);
4201   LocationSummary* locations = rem->GetLocations();
4202   Location first = locations->InAt(0);
4203   Location second = locations->InAt(1);
4204   Location out = locations->Out();
4205 
4206   // Create stack space for 2 elements.
4207   // TODO: enhance register allocator to ask for stack temporaries.
4208   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
4209 
4210   // Load the values to the FP stack in reverse order, using temporaries if needed.
4211   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
4212   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
4213 
4214   // Loop doing FPREM until we stabilize.
4215   NearLabel retry;
4216   __ Bind(&retry);
4217   __ fprem();
4218 
4219   // Move FP status to AX.
4220   __ fstsw();
4221 
4222   // And see if the argument reduction is complete. This is signaled by the
4223   // C2 FPU flag bit set to 0.
4224   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
4225   __ j(kNotEqual, &retry);
4226 
4227   // We have settled on the final value. Retrieve it into an XMM register.
4228   // Store FP top of stack to real stack.
4229   if (is_float) {
4230     __ fsts(Address(CpuRegister(RSP), 0));
4231   } else {
4232     __ fstl(Address(CpuRegister(RSP), 0));
4233   }
4234 
4235   // Pop the 2 items from the FP stack.
4236   __ fucompp();
4237 
4238   // Load the value from the stack into an XMM register.
4239   DCHECK(out.IsFpuRegister()) << out;
4240   if (is_float) {
4241     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4242   } else {
4243     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4244   }
4245 
4246   // And remove the temporary stack space we allocated.
4247   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
4248 }
4249 
DivRemOneOrMinusOne(HBinaryOperation * instruction)4250 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4251   DCHECK(instruction->IsDiv() || instruction->IsRem());
4252 
4253   LocationSummary* locations = instruction->GetLocations();
4254   Location second = locations->InAt(1);
4255   DCHECK(second.IsConstant());
4256 
4257   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4258   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
4259   int64_t imm = Int64FromConstant(second.GetConstant());
4260 
4261   DCHECK(imm == 1 || imm == -1);
4262 
4263   switch (instruction->GetResultType()) {
4264     case DataType::Type::kInt32: {
4265       if (instruction->IsRem()) {
4266         __ xorl(output_register, output_register);
4267       } else {
4268         __ movl(output_register, input_register);
4269         if (imm == -1) {
4270           __ negl(output_register);
4271         }
4272       }
4273       break;
4274     }
4275 
4276     case DataType::Type::kInt64: {
4277       if (instruction->IsRem()) {
4278         __ xorl(output_register, output_register);
4279       } else {
4280         __ movq(output_register, input_register);
4281         if (imm == -1) {
4282           __ negq(output_register);
4283         }
4284       }
4285       break;
4286     }
4287 
4288     default:
4289       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
4290   }
4291 }
RemByPowerOfTwo(HRem * instruction)4292 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
4293   LocationSummary* locations = instruction->GetLocations();
4294   Location second = locations->InAt(1);
4295   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4296   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4297   int64_t imm = Int64FromConstant(second.GetConstant());
4298   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4299   uint64_t abs_imm = AbsOrMin(imm);
4300   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4301   if (instruction->GetResultType() == DataType::Type::kInt32) {
4302     NearLabel done;
4303     __ movl(out, numerator);
4304     __ andl(out, Immediate(abs_imm-1));
4305     __ j(Condition::kZero, &done);
4306     __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4307     __ testl(numerator, numerator);
4308     __ cmov(Condition::kLess, out, tmp, false);
4309     __ Bind(&done);
4310 
4311   } else {
4312     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4313     codegen_->Load64BitValue(tmp, abs_imm - 1);
4314     NearLabel done;
4315 
4316     __ movq(out, numerator);
4317     __ andq(out, tmp);
4318     __ j(Condition::kZero, &done);
4319     __ movq(tmp, numerator);
4320     __ sarq(tmp, Immediate(63));
4321     __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
4322     __ orq(out, tmp);
4323     __ Bind(&done);
4324   }
4325 }
DivByPowerOfTwo(HDiv * instruction)4326 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
4327   LocationSummary* locations = instruction->GetLocations();
4328   Location second = locations->InAt(1);
4329 
4330   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4331   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4332 
4333   int64_t imm = Int64FromConstant(second.GetConstant());
4334   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4335   uint64_t abs_imm = AbsOrMin(imm);
4336 
4337   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4338 
4339   if (instruction->GetResultType() == DataType::Type::kInt32) {
4340     // When denominator is equal to 2, we can add signed bit and numerator to tmp.
4341     // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
4342     if (abs_imm == 2) {
4343       __ leal(tmp, Address(numerator, 0));
4344       __ shrl(tmp, Immediate(31));
4345       __ addl(tmp, numerator);
4346     } else {
4347       __ leal(tmp, Address(numerator, abs_imm - 1));
4348       __ testl(numerator, numerator);
4349       __ cmov(kGreaterEqual, tmp, numerator);
4350     }
4351     int shift = CTZ(imm);
4352     __ sarl(tmp, Immediate(shift));
4353 
4354     if (imm < 0) {
4355       __ negl(tmp);
4356     }
4357 
4358     __ movl(output_register, tmp);
4359   } else {
4360     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4361     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
4362     if (abs_imm == 2) {
4363       __ movq(rdx, numerator);
4364       __ shrq(rdx, Immediate(63));
4365       __ addq(rdx, numerator);
4366     } else {
4367       codegen_->Load64BitValue(rdx, abs_imm - 1);
4368       __ addq(rdx, numerator);
4369       __ testq(numerator, numerator);
4370       __ cmov(kGreaterEqual, rdx, numerator);
4371     }
4372     int shift = CTZ(imm);
4373     __ sarq(rdx, Immediate(shift));
4374 
4375     if (imm < 0) {
4376       __ negq(rdx);
4377     }
4378 
4379     __ movq(output_register, rdx);
4380   }
4381 }
4382 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4383 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4384   DCHECK(instruction->IsDiv() || instruction->IsRem());
4385 
4386   LocationSummary* locations = instruction->GetLocations();
4387   Location second = locations->InAt(1);
4388 
4389   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
4390       : locations->GetTemp(0).AsRegister<CpuRegister>();
4391   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
4392   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
4393       : locations->Out().AsRegister<CpuRegister>();
4394   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4395 
4396   DCHECK_EQ(RAX, eax.AsRegister());
4397   DCHECK_EQ(RDX, edx.AsRegister());
4398   if (instruction->IsDiv()) {
4399     DCHECK_EQ(RAX, out.AsRegister());
4400   } else {
4401     DCHECK_EQ(RDX, out.AsRegister());
4402   }
4403 
4404   int64_t magic;
4405   int shift;
4406 
4407   // TODO: can these branches be written as one?
4408   if (instruction->GetResultType() == DataType::Type::kInt32) {
4409     int imm = second.GetConstant()->AsIntConstant()->GetValue();
4410 
4411     CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
4412 
4413     __ movl(numerator, eax);
4414 
4415     __ movl(eax, Immediate(magic));
4416     __ imull(numerator);
4417 
4418     if (imm > 0 && magic < 0) {
4419       __ addl(edx, numerator);
4420     } else if (imm < 0 && magic > 0) {
4421       __ subl(edx, numerator);
4422     }
4423 
4424     if (shift != 0) {
4425       __ sarl(edx, Immediate(shift));
4426     }
4427 
4428     __ movl(eax, edx);
4429     __ shrl(edx, Immediate(31));
4430     __ addl(edx, eax);
4431 
4432     if (instruction->IsRem()) {
4433       __ movl(eax, numerator);
4434       __ imull(edx, Immediate(imm));
4435       __ subl(eax, edx);
4436       __ movl(edx, eax);
4437     } else {
4438       __ movl(eax, edx);
4439     }
4440   } else {
4441     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
4442 
4443     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4444 
4445     CpuRegister rax = eax;
4446     CpuRegister rdx = edx;
4447 
4448     CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
4449 
4450     // Save the numerator.
4451     __ movq(numerator, rax);
4452 
4453     // RAX = magic
4454     codegen_->Load64BitValue(rax, magic);
4455 
4456     // RDX:RAX = magic * numerator
4457     __ imulq(numerator);
4458 
4459     if (imm > 0 && magic < 0) {
4460       // RDX += numerator
4461       __ addq(rdx, numerator);
4462     } else if (imm < 0 && magic > 0) {
4463       // RDX -= numerator
4464       __ subq(rdx, numerator);
4465     }
4466 
4467     // Shift if needed.
4468     if (shift != 0) {
4469       __ sarq(rdx, Immediate(shift));
4470     }
4471 
4472     // RDX += 1 if RDX < 0
4473     __ movq(rax, rdx);
4474     __ shrq(rdx, Immediate(63));
4475     __ addq(rdx, rax);
4476 
4477     if (instruction->IsRem()) {
4478       __ movq(rax, numerator);
4479 
4480       if (IsInt<32>(imm)) {
4481         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
4482       } else {
4483         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
4484       }
4485 
4486       __ subq(rax, rdx);
4487       __ movq(rdx, rax);
4488     } else {
4489       __ movq(rax, rdx);
4490     }
4491   }
4492 }
4493 
GenerateDivRemIntegral(HBinaryOperation * instruction)4494 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4495   DCHECK(instruction->IsDiv() || instruction->IsRem());
4496   DataType::Type type = instruction->GetResultType();
4497   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4498 
4499   bool is_div = instruction->IsDiv();
4500   LocationSummary* locations = instruction->GetLocations();
4501 
4502   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4503   Location second = locations->InAt(1);
4504 
4505   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
4506   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
4507 
4508   if (second.IsConstant()) {
4509     int64_t imm = Int64FromConstant(second.GetConstant());
4510 
4511     if (imm == 0) {
4512       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4513     } else if (imm == 1 || imm == -1) {
4514       DivRemOneOrMinusOne(instruction);
4515     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4516       if (is_div) {
4517         DivByPowerOfTwo(instruction->AsDiv());
4518       } else {
4519         RemByPowerOfTwo(instruction->AsRem());
4520       }
4521     } else {
4522       DCHECK(imm <= -2 || imm >= 2);
4523       GenerateDivRemWithAnyConstant(instruction);
4524     }
4525   } else {
4526     SlowPathCode* slow_path =
4527         new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4528             instruction, out.AsRegister(), type, is_div);
4529     codegen_->AddSlowPath(slow_path);
4530 
4531     CpuRegister second_reg = second.AsRegister<CpuRegister>();
4532     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4533     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4534     // so it's safe to just use negl instead of more complex comparisons.
4535     if (type == DataType::Type::kInt32) {
4536       __ cmpl(second_reg, Immediate(-1));
4537       __ j(kEqual, slow_path->GetEntryLabel());
4538       // edx:eax <- sign-extended of eax
4539       __ cdq();
4540       // eax = quotient, edx = remainder
4541       __ idivl(second_reg);
4542     } else {
4543       __ cmpq(second_reg, Immediate(-1));
4544       __ j(kEqual, slow_path->GetEntryLabel());
4545       // rdx:rax <- sign-extended of rax
4546       __ cqo();
4547       // rax = quotient, rdx = remainder
4548       __ idivq(second_reg);
4549     }
4550     __ Bind(slow_path->GetExitLabel());
4551   }
4552 }
4553 
VisitDiv(HDiv * div)4554 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4555   LocationSummary* locations =
4556       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4557   switch (div->GetResultType()) {
4558     case DataType::Type::kInt32:
4559     case DataType::Type::kInt64: {
4560       locations->SetInAt(0, Location::RegisterLocation(RAX));
4561       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4562       locations->SetOut(Location::SameAsFirstInput());
4563       // Intel uses edx:eax as the dividend.
4564       locations->AddTemp(Location::RegisterLocation(RDX));
4565       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4566       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4567       // output and request another temp.
4568       if (div->InputAt(1)->IsConstant()) {
4569         locations->AddTemp(Location::RequiresRegister());
4570       }
4571       break;
4572     }
4573 
4574     case DataType::Type::kFloat32:
4575     case DataType::Type::kFloat64: {
4576       locations->SetInAt(0, Location::RequiresFpuRegister());
4577       locations->SetInAt(1, Location::Any());
4578       locations->SetOut(Location::SameAsFirstInput());
4579       break;
4580     }
4581 
4582     default:
4583       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4584   }
4585 }
4586 
VisitDiv(HDiv * div)4587 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4588   LocationSummary* locations = div->GetLocations();
4589   Location first = locations->InAt(0);
4590   Location second = locations->InAt(1);
4591   DCHECK(first.Equals(locations->Out()));
4592 
4593   DataType::Type type = div->GetResultType();
4594   switch (type) {
4595     case DataType::Type::kInt32:
4596     case DataType::Type::kInt64: {
4597       GenerateDivRemIntegral(div);
4598       break;
4599     }
4600 
4601     case DataType::Type::kFloat32: {
4602       if (second.IsFpuRegister()) {
4603         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4604       } else if (second.IsConstant()) {
4605         __ divss(first.AsFpuRegister<XmmRegister>(),
4606                  codegen_->LiteralFloatAddress(
4607                      second.GetConstant()->AsFloatConstant()->GetValue()));
4608       } else {
4609         DCHECK(second.IsStackSlot());
4610         __ divss(first.AsFpuRegister<XmmRegister>(),
4611                  Address(CpuRegister(RSP), second.GetStackIndex()));
4612       }
4613       break;
4614     }
4615 
4616     case DataType::Type::kFloat64: {
4617       if (second.IsFpuRegister()) {
4618         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4619       } else if (second.IsConstant()) {
4620         __ divsd(first.AsFpuRegister<XmmRegister>(),
4621                  codegen_->LiteralDoubleAddress(
4622                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4623       } else {
4624         DCHECK(second.IsDoubleStackSlot());
4625         __ divsd(first.AsFpuRegister<XmmRegister>(),
4626                  Address(CpuRegister(RSP), second.GetStackIndex()));
4627       }
4628       break;
4629     }
4630 
4631     default:
4632       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4633   }
4634 }
4635 
VisitRem(HRem * rem)4636 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4637   DataType::Type type = rem->GetResultType();
4638   LocationSummary* locations =
4639     new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4640 
4641   switch (type) {
4642     case DataType::Type::kInt32:
4643     case DataType::Type::kInt64: {
4644       locations->SetInAt(0, Location::RegisterLocation(RAX));
4645       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4646       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4647       locations->SetOut(Location::RegisterLocation(RDX));
4648       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4649       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4650       // output and request another temp.
4651       if (rem->InputAt(1)->IsConstant()) {
4652         locations->AddTemp(Location::RequiresRegister());
4653       }
4654       break;
4655     }
4656 
4657     case DataType::Type::kFloat32:
4658     case DataType::Type::kFloat64: {
4659       locations->SetInAt(0, Location::Any());
4660       locations->SetInAt(1, Location::Any());
4661       locations->SetOut(Location::RequiresFpuRegister());
4662       locations->AddTemp(Location::RegisterLocation(RAX));
4663       break;
4664     }
4665 
4666     default:
4667       LOG(FATAL) << "Unexpected rem type " << type;
4668   }
4669 }
4670 
VisitRem(HRem * rem)4671 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4672   DataType::Type type = rem->GetResultType();
4673   switch (type) {
4674     case DataType::Type::kInt32:
4675     case DataType::Type::kInt64: {
4676       GenerateDivRemIntegral(rem);
4677       break;
4678     }
4679     case DataType::Type::kFloat32:
4680     case DataType::Type::kFloat64: {
4681       GenerateRemFP(rem);
4682       break;
4683     }
4684     default:
4685       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4686   }
4687 }
4688 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4689 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4690   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4691   switch (minmax->GetResultType()) {
4692     case DataType::Type::kInt32:
4693     case DataType::Type::kInt64:
4694       locations->SetInAt(0, Location::RequiresRegister());
4695       locations->SetInAt(1, Location::RequiresRegister());
4696       locations->SetOut(Location::SameAsFirstInput());
4697       break;
4698     case DataType::Type::kFloat32:
4699     case DataType::Type::kFloat64:
4700       locations->SetInAt(0, Location::RequiresFpuRegister());
4701       locations->SetInAt(1, Location::RequiresFpuRegister());
4702       // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4703       // the second input to be the output (we can simply swap inputs).
4704       locations->SetOut(Location::SameAsFirstInput());
4705       break;
4706     default:
4707       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4708   }
4709 }
4710 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4711 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4712                                                        bool is_min,
4713                                                        DataType::Type type) {
4714   Location op1_loc = locations->InAt(0);
4715   Location op2_loc = locations->InAt(1);
4716 
4717   // Shortcut for same input locations.
4718   if (op1_loc.Equals(op2_loc)) {
4719     // Can return immediately, as op1_loc == out_loc.
4720     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4721     //       a copy here.
4722     DCHECK(locations->Out().Equals(op1_loc));
4723     return;
4724   }
4725 
4726   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4727   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4728 
4729   //  (out := op1)
4730   //  out <=? op2
4731   //  if out is min jmp done
4732   //  out := op2
4733   // done:
4734 
4735   if (type == DataType::Type::kInt64) {
4736     __ cmpq(out, op2);
4737     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4738   } else {
4739     DCHECK_EQ(type, DataType::Type::kInt32);
4740     __ cmpl(out, op2);
4741     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4742   }
4743 }
4744 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4745 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4746                                                       bool is_min,
4747                                                       DataType::Type type) {
4748   Location op1_loc = locations->InAt(0);
4749   Location op2_loc = locations->InAt(1);
4750   Location out_loc = locations->Out();
4751   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4752 
4753   // Shortcut for same input locations.
4754   if (op1_loc.Equals(op2_loc)) {
4755     DCHECK(out_loc.Equals(op1_loc));
4756     return;
4757   }
4758 
4759   //  (out := op1)
4760   //  out <=? op2
4761   //  if Nan jmp Nan_label
4762   //  if out is min jmp done
4763   //  if op2 is min jmp op2_label
4764   //  handle -0/+0
4765   //  jmp done
4766   // Nan_label:
4767   //  out := NaN
4768   // op2_label:
4769   //  out := op2
4770   // done:
4771   //
4772   // This removes one jmp, but needs to copy one input (op1) to out.
4773   //
4774   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4775 
4776   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4777 
4778   NearLabel nan, done, op2_label;
4779   if (type == DataType::Type::kFloat64) {
4780     __ ucomisd(out, op2);
4781   } else {
4782     DCHECK_EQ(type, DataType::Type::kFloat32);
4783     __ ucomiss(out, op2);
4784   }
4785 
4786   __ j(Condition::kParityEven, &nan);
4787 
4788   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4789   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4790 
4791   // Handle 0.0/-0.0.
4792   if (is_min) {
4793     if (type == DataType::Type::kFloat64) {
4794       __ orpd(out, op2);
4795     } else {
4796       __ orps(out, op2);
4797     }
4798   } else {
4799     if (type == DataType::Type::kFloat64) {
4800       __ andpd(out, op2);
4801     } else {
4802       __ andps(out, op2);
4803     }
4804   }
4805   __ jmp(&done);
4806 
4807   // NaN handling.
4808   __ Bind(&nan);
4809   if (type == DataType::Type::kFloat64) {
4810     __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4811   } else {
4812     __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4813   }
4814   __ jmp(&done);
4815 
4816   // out := op2;
4817   __ Bind(&op2_label);
4818   if (type == DataType::Type::kFloat64) {
4819     __ movsd(out, op2);
4820   } else {
4821     __ movss(out, op2);
4822   }
4823 
4824   // Done.
4825   __ Bind(&done);
4826 }
4827 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4828 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4829   DataType::Type type = minmax->GetResultType();
4830   switch (type) {
4831     case DataType::Type::kInt32:
4832     case DataType::Type::kInt64:
4833       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4834       break;
4835     case DataType::Type::kFloat32:
4836     case DataType::Type::kFloat64:
4837       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4838       break;
4839     default:
4840       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4841   }
4842 }
4843 
VisitMin(HMin * min)4844 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4845   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4846 }
4847 
VisitMin(HMin * min)4848 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4849   GenerateMinMax(min, /*is_min*/ true);
4850 }
4851 
VisitMax(HMax * max)4852 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4853   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4854 }
4855 
VisitMax(HMax * max)4856 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4857   GenerateMinMax(max, /*is_min*/ false);
4858 }
4859 
VisitAbs(HAbs * abs)4860 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4861   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4862   switch (abs->GetResultType()) {
4863     case DataType::Type::kInt32:
4864     case DataType::Type::kInt64:
4865       locations->SetInAt(0, Location::RequiresRegister());
4866       locations->SetOut(Location::SameAsFirstInput());
4867       locations->AddTemp(Location::RequiresRegister());
4868       break;
4869     case DataType::Type::kFloat32:
4870     case DataType::Type::kFloat64:
4871       locations->SetInAt(0, Location::RequiresFpuRegister());
4872       locations->SetOut(Location::SameAsFirstInput());
4873       locations->AddTemp(Location::RequiresFpuRegister());
4874       break;
4875     default:
4876       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4877   }
4878 }
4879 
VisitAbs(HAbs * abs)4880 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4881   LocationSummary* locations = abs->GetLocations();
4882   switch (abs->GetResultType()) {
4883     case DataType::Type::kInt32: {
4884       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4885       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4886       // Create mask.
4887       __ movl(mask, out);
4888       __ sarl(mask, Immediate(31));
4889       // Add mask.
4890       __ addl(out, mask);
4891       __ xorl(out, mask);
4892       break;
4893     }
4894     case DataType::Type::kInt64: {
4895       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4896       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4897       // Create mask.
4898       __ movq(mask, out);
4899       __ sarq(mask, Immediate(63));
4900       // Add mask.
4901       __ addq(out, mask);
4902       __ xorq(out, mask);
4903       break;
4904     }
4905     case DataType::Type::kFloat32: {
4906       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4907       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4908       __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4909       __ andps(out, mask);
4910       break;
4911     }
4912     case DataType::Type::kFloat64: {
4913       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4914       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4915       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4916       __ andpd(out, mask);
4917       break;
4918     }
4919     default:
4920       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4921   }
4922 }
4923 
VisitDivZeroCheck(HDivZeroCheck * instruction)4924 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4925   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4926   locations->SetInAt(0, Location::Any());
4927 }
4928 
VisitDivZeroCheck(HDivZeroCheck * instruction)4929 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4930   SlowPathCode* slow_path =
4931       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4932   codegen_->AddSlowPath(slow_path);
4933 
4934   LocationSummary* locations = instruction->GetLocations();
4935   Location value = locations->InAt(0);
4936 
4937   switch (instruction->GetType()) {
4938     case DataType::Type::kBool:
4939     case DataType::Type::kUint8:
4940     case DataType::Type::kInt8:
4941     case DataType::Type::kUint16:
4942     case DataType::Type::kInt16:
4943     case DataType::Type::kInt32: {
4944       if (value.IsRegister()) {
4945         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4946         __ j(kEqual, slow_path->GetEntryLabel());
4947       } else if (value.IsStackSlot()) {
4948         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4949         __ j(kEqual, slow_path->GetEntryLabel());
4950       } else {
4951         DCHECK(value.IsConstant()) << value;
4952         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4953           __ jmp(slow_path->GetEntryLabel());
4954         }
4955       }
4956       break;
4957     }
4958     case DataType::Type::kInt64: {
4959       if (value.IsRegister()) {
4960         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4961         __ j(kEqual, slow_path->GetEntryLabel());
4962       } else if (value.IsDoubleStackSlot()) {
4963         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4964         __ j(kEqual, slow_path->GetEntryLabel());
4965       } else {
4966         DCHECK(value.IsConstant()) << value;
4967         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4968           __ jmp(slow_path->GetEntryLabel());
4969         }
4970       }
4971       break;
4972     }
4973     default:
4974       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4975   }
4976 }
4977 
HandleShift(HBinaryOperation * op)4978 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4979   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4980 
4981   LocationSummary* locations =
4982       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4983 
4984   switch (op->GetResultType()) {
4985     case DataType::Type::kInt32:
4986     case DataType::Type::kInt64: {
4987       locations->SetInAt(0, Location::RequiresRegister());
4988       // The shift count needs to be in CL.
4989       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4990       locations->SetOut(Location::SameAsFirstInput());
4991       break;
4992     }
4993     default:
4994       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4995   }
4996 }
4997 
HandleShift(HBinaryOperation * op)4998 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4999   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5000 
5001   LocationSummary* locations = op->GetLocations();
5002   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
5003   Location second = locations->InAt(1);
5004 
5005   switch (op->GetResultType()) {
5006     case DataType::Type::kInt32: {
5007       if (second.IsRegister()) {
5008         CpuRegister second_reg = second.AsRegister<CpuRegister>();
5009         if (op->IsShl()) {
5010           __ shll(first_reg, second_reg);
5011         } else if (op->IsShr()) {
5012           __ sarl(first_reg, second_reg);
5013         } else {
5014           __ shrl(first_reg, second_reg);
5015         }
5016       } else {
5017         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5018         if (op->IsShl()) {
5019           __ shll(first_reg, imm);
5020         } else if (op->IsShr()) {
5021           __ sarl(first_reg, imm);
5022         } else {
5023           __ shrl(first_reg, imm);
5024         }
5025       }
5026       break;
5027     }
5028     case DataType::Type::kInt64: {
5029       if (second.IsRegister()) {
5030         CpuRegister second_reg = second.AsRegister<CpuRegister>();
5031         if (op->IsShl()) {
5032           __ shlq(first_reg, second_reg);
5033         } else if (op->IsShr()) {
5034           __ sarq(first_reg, second_reg);
5035         } else {
5036           __ shrq(first_reg, second_reg);
5037         }
5038       } else {
5039         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5040         if (op->IsShl()) {
5041           __ shlq(first_reg, imm);
5042         } else if (op->IsShr()) {
5043           __ sarq(first_reg, imm);
5044         } else {
5045           __ shrq(first_reg, imm);
5046         }
5047       }
5048       break;
5049     }
5050     default:
5051       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5052       UNREACHABLE();
5053   }
5054 }
5055 
HandleRotate(HBinaryOperation * rotate)5056 void LocationsBuilderX86_64::HandleRotate(HBinaryOperation* rotate) {
5057   LocationSummary* locations =
5058       new (GetGraph()->GetAllocator()) LocationSummary(rotate, LocationSummary::kNoCall);
5059 
5060   switch (rotate->GetResultType()) {
5061     case DataType::Type::kInt32:
5062     case DataType::Type::kInt64: {
5063       locations->SetInAt(0, Location::RequiresRegister());
5064       // The shift count needs to be in CL (unless it is a constant).
5065       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, rotate->InputAt(1)));
5066       locations->SetOut(Location::SameAsFirstInput());
5067       break;
5068     }
5069     default:
5070       LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5071       UNREACHABLE();
5072   }
5073 }
5074 
HandleRotate(HBinaryOperation * rotate)5075 void InstructionCodeGeneratorX86_64::HandleRotate(HBinaryOperation* rotate) {
5076   LocationSummary* locations = rotate->GetLocations();
5077   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
5078   Location second = locations->InAt(1);
5079 
5080   switch (rotate->GetResultType()) {
5081     case DataType::Type::kInt32:
5082       if (second.IsRegister()) {
5083         CpuRegister second_reg = second.AsRegister<CpuRegister>();
5084         if (rotate->IsRor()) {
5085           __ rorl(first_reg, second_reg);
5086         } else {
5087           DCHECK(rotate->IsRol());
5088           __ roll(first_reg, second_reg);
5089         }
5090       } else {
5091         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5092         if (rotate->IsRor()) {
5093           __ rorl(first_reg, imm);
5094         } else {
5095           DCHECK(rotate->IsRol());
5096           __ roll(first_reg, imm);
5097         }
5098       }
5099       break;
5100     case DataType::Type::kInt64:
5101       if (second.IsRegister()) {
5102         CpuRegister second_reg = second.AsRegister<CpuRegister>();
5103         if (rotate->IsRor()) {
5104           __ rorq(first_reg, second_reg);
5105         } else {
5106           DCHECK(rotate->IsRol());
5107           __ rolq(first_reg, second_reg);
5108         }
5109       } else {
5110         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5111         if (rotate->IsRor()) {
5112           __ rorq(first_reg, imm);
5113         } else {
5114           DCHECK(rotate->IsRol());
5115           __ rolq(first_reg, imm);
5116         }
5117       }
5118       break;
5119     default:
5120       LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5121       UNREACHABLE();
5122   }
5123 }
5124 
VisitRol(HRol * rol)5125 void LocationsBuilderX86_64::VisitRol(HRol* rol) {
5126   HandleRotate(rol);
5127 }
5128 
VisitRol(HRol * rol)5129 void InstructionCodeGeneratorX86_64::VisitRol(HRol* rol) {
5130   HandleRotate(rol);
5131 }
5132 
VisitRor(HRor * ror)5133 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
5134   HandleRotate(ror);
5135 }
5136 
VisitRor(HRor * ror)5137 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
5138   HandleRotate(ror);
5139 }
5140 
VisitShl(HShl * shl)5141 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
5142   HandleShift(shl);
5143 }
5144 
VisitShl(HShl * shl)5145 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
5146   HandleShift(shl);
5147 }
5148 
VisitShr(HShr * shr)5149 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
5150   HandleShift(shr);
5151 }
5152 
VisitShr(HShr * shr)5153 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
5154   HandleShift(shr);
5155 }
5156 
VisitUShr(HUShr * ushr)5157 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
5158   HandleShift(ushr);
5159 }
5160 
VisitUShr(HUShr * ushr)5161 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
5162   HandleShift(ushr);
5163 }
5164 
VisitNewInstance(HNewInstance * instruction)5165 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
5166   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5167       instruction, LocationSummary::kCallOnMainOnly);
5168   InvokeRuntimeCallingConvention calling_convention;
5169   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5170   locations->SetOut(Location::RegisterLocation(RAX));
5171 }
5172 
VisitNewInstance(HNewInstance * instruction)5173 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
5174   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5175   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5176   DCHECK(!codegen_->IsLeafMethod());
5177 }
5178 
VisitNewArray(HNewArray * instruction)5179 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
5180   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5181       instruction, LocationSummary::kCallOnMainOnly);
5182   InvokeRuntimeCallingConvention calling_convention;
5183   locations->SetOut(Location::RegisterLocation(RAX));
5184   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5185   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5186 }
5187 
VisitNewArray(HNewArray * instruction)5188 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
5189   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5190   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5191   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5192   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5193   DCHECK(!codegen_->IsLeafMethod());
5194 }
5195 
VisitParameterValue(HParameterValue * instruction)5196 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
5197   LocationSummary* locations =
5198       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5199   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5200   if (location.IsStackSlot()) {
5201     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5202   } else if (location.IsDoubleStackSlot()) {
5203     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5204   }
5205   locations->SetOut(location);
5206 }
5207 
VisitParameterValue(HParameterValue * instruction)5208 void InstructionCodeGeneratorX86_64::VisitParameterValue(
5209     [[maybe_unused]] HParameterValue* instruction) {
5210   // Nothing to do, the parameter is already at its location.
5211 }
5212 
VisitCurrentMethod(HCurrentMethod * instruction)5213 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
5214   LocationSummary* locations =
5215       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5216   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5217 }
5218 
VisitCurrentMethod(HCurrentMethod * instruction)5219 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
5220     [[maybe_unused]] HCurrentMethod* instruction) {
5221   // Nothing to do, the method is already at its location.
5222 }
5223 
VisitClassTableGet(HClassTableGet * instruction)5224 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5225   LocationSummary* locations =
5226       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5227   locations->SetInAt(0, Location::RequiresRegister());
5228   locations->SetOut(Location::RequiresRegister());
5229 }
5230 
VisitClassTableGet(HClassTableGet * instruction)5231 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5232   LocationSummary* locations = instruction->GetLocations();
5233   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5234     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5235         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
5236     __ movq(locations->Out().AsRegister<CpuRegister>(),
5237             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
5238   } else {
5239     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5240         instruction->GetIndex(), kX86_64PointerSize));
5241     __ movq(locations->Out().AsRegister<CpuRegister>(),
5242             Address(locations->InAt(0).AsRegister<CpuRegister>(),
5243             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
5244     __ movq(locations->Out().AsRegister<CpuRegister>(),
5245             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
5246   }
5247 }
5248 
VisitNot(HNot * not_)5249 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
5250   LocationSummary* locations =
5251       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5252   locations->SetInAt(0, Location::RequiresRegister());
5253   locations->SetOut(Location::SameAsFirstInput());
5254 }
5255 
VisitNot(HNot * not_)5256 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
5257   LocationSummary* locations = not_->GetLocations();
5258   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5259             locations->Out().AsRegister<CpuRegister>().AsRegister());
5260   Location out = locations->Out();
5261   switch (not_->GetResultType()) {
5262     case DataType::Type::kInt32:
5263       __ notl(out.AsRegister<CpuRegister>());
5264       break;
5265 
5266     case DataType::Type::kInt64:
5267       __ notq(out.AsRegister<CpuRegister>());
5268       break;
5269 
5270     default:
5271       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5272   }
5273 }
5274 
VisitBooleanNot(HBooleanNot * bool_not)5275 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5276   LocationSummary* locations =
5277       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5278   locations->SetInAt(0, Location::RequiresRegister());
5279   locations->SetOut(Location::SameAsFirstInput());
5280 }
5281 
VisitBooleanNot(HBooleanNot * bool_not)5282 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5283   LocationSummary* locations = bool_not->GetLocations();
5284   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5285             locations->Out().AsRegister<CpuRegister>().AsRegister());
5286   Location out = locations->Out();
5287   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
5288 }
5289 
VisitPhi(HPhi * instruction)5290 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
5291   LocationSummary* locations =
5292       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5293   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5294     locations->SetInAt(i, Location::Any());
5295   }
5296   locations->SetOut(Location::Any());
5297 }
5298 
VisitPhi(HPhi * instruction)5299 void InstructionCodeGeneratorX86_64::VisitPhi([[maybe_unused]] HPhi* instruction) {
5300   LOG(FATAL) << "Unimplemented";
5301 }
5302 
GenerateMemoryBarrier(MemBarrierKind kind)5303 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
5304   /*
5305    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
5306    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
5307    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5308    */
5309   switch (kind) {
5310     case MemBarrierKind::kAnyAny: {
5311       MemoryFence();
5312       break;
5313     }
5314     case MemBarrierKind::kAnyStore:
5315     case MemBarrierKind::kLoadAny:
5316     case MemBarrierKind::kStoreStore: {
5317       // nop
5318       break;
5319     }
5320     case MemBarrierKind::kNTStoreStore:
5321       // Non-Temporal Store/Store needs an explicit fence.
5322       MemoryFence(/* non-temporal= */ true);
5323       break;
5324   }
5325 }
5326 
HandleFieldGet(HInstruction * instruction)5327 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
5328   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5329 
5330   bool object_field_get_with_read_barrier =
5331       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5332   LocationSummary* locations =
5333       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5334                                                        object_field_get_with_read_barrier
5335                                                            ? LocationSummary::kCallOnSlowPath
5336                                                            : LocationSummary::kNoCall);
5337   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5338     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5339   }
5340   // receiver_input
5341   locations->SetInAt(0, Location::RequiresRegister());
5342   if (DataType::IsFloatingPointType(instruction->GetType())) {
5343     locations->SetOut(Location::RequiresFpuRegister());
5344   } else {
5345     // The output overlaps for an object field get when read barriers are
5346     // enabled: we do not want the move to overwrite the object's location, as
5347     // we need it to emit the read barrier. For predicated instructions we can
5348     // always overlap since the output is SameAsFirst and the default value.
5349     locations->SetOut(
5350         Location::RequiresRegister(),
5351         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5352   }
5353 }
5354 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5355 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
5356                                                     const FieldInfo& field_info) {
5357   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5358 
5359   LocationSummary* locations = instruction->GetLocations();
5360   Location base_loc = locations->InAt(0);
5361   CpuRegister base = base_loc.AsRegister<CpuRegister>();
5362   Location out = locations->Out();
5363   bool is_volatile = field_info.IsVolatile();
5364   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5365   DataType::Type load_type = instruction->GetType();
5366   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5367 
5368   if (load_type == DataType::Type::kReference) {
5369     // /* HeapReference<Object> */ out = *(base + offset)
5370     if (codegen_->EmitBakerReadBarrier()) {
5371       // Note that a potential implicit null check is handled in this
5372       // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
5373       codegen_->GenerateFieldLoadWithBakerReadBarrier(
5374           instruction, out, base, offset, /* needs_null_check= */ true);
5375       if (is_volatile) {
5376         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5377       }
5378     } else {
5379       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
5380       codegen_->MaybeRecordImplicitNullCheck(instruction);
5381       if (is_volatile) {
5382         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5383       }
5384       // If read barriers are enabled, emit read barriers other than
5385       // Baker's using a slow path (and also unpoison the loaded
5386       // reference, if heap poisoning is enabled).
5387       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5388     }
5389   } else {
5390     codegen_->LoadFromMemoryNoReference(load_type, out, Address(base, offset));
5391     codegen_->MaybeRecordImplicitNullCheck(instruction);
5392     if (is_volatile) {
5393       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5394     }
5395   }
5396 }
5397 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5398 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
5399                                             const FieldInfo& field_info,
5400                                             WriteBarrierKind write_barrier_kind) {
5401   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5402 
5403   LocationSummary* locations =
5404       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5405   DataType::Type field_type = field_info.GetFieldType();
5406   bool is_volatile = field_info.IsVolatile();
5407   bool needs_write_barrier =
5408       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5409   bool check_gc_card =
5410       codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
5411 
5412   locations->SetInAt(0, Location::RequiresRegister());
5413   if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
5414     if (is_volatile) {
5415       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5416       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
5417     } else {
5418       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5419     }
5420   } else {
5421     if (is_volatile) {
5422       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5423       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
5424     } else {
5425       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5426     }
5427   }
5428 
5429   // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
5430   // InstructionCodeGeneratorX86_64::HandleFieldSet, GenerateVarHandleSet due to `extra_temp_index`.
5431   if (needs_write_barrier ||
5432       check_gc_card ||
5433       (kPoisonHeapReferences && field_type == DataType::Type::kReference)) {
5434     // Temporary registers for the write barrier / reference poisoning.
5435     locations->AddRegisterTemps(2);
5436   }
5437 }
5438 
Bswap(Location value,DataType::Type type,CpuRegister * temp)5439 void InstructionCodeGeneratorX86_64::Bswap(Location value,
5440                                            DataType::Type type,
5441                                            CpuRegister* temp) {
5442   switch (type) {
5443     case DataType::Type::kInt16:
5444       // This should sign-extend, even if reimplemented with an XCHG of 8-bit registers.
5445       __ bswapl(value.AsRegister<CpuRegister>());
5446       __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
5447       break;
5448     case DataType::Type::kUint16:
5449       // TODO: Can be done with an XCHG of 8-bit registers. This is straight from Quick.
5450       __ bswapl(value.AsRegister<CpuRegister>());
5451       __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
5452       break;
5453     case DataType::Type::kInt32:
5454     case DataType::Type::kUint32:
5455       __ bswapl(value.AsRegister<CpuRegister>());
5456       break;
5457     case DataType::Type::kInt64:
5458     case DataType::Type::kUint64:
5459       __ bswapq(value.AsRegister<CpuRegister>());
5460       break;
5461     case DataType::Type::kFloat32: {
5462       DCHECK_NE(temp, nullptr);
5463       __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
5464       __ bswapl(*temp);
5465       __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ false);
5466       break;
5467     }
5468     case DataType::Type::kFloat64: {
5469       DCHECK_NE(temp, nullptr);
5470       __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
5471       __ bswapq(*temp);
5472       __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ true);
5473       break;
5474     }
5475     default:
5476       LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
5477       UNREACHABLE();
5478   }
5479 }
5480 
HandleFieldSet(HInstruction * instruction,uint32_t value_index,uint32_t extra_temp_index,DataType::Type field_type,Address field_addr,CpuRegister base,bool is_volatile,bool is_atomic,bool value_can_be_null,bool byte_swap,WriteBarrierKind write_barrier_kind)5481 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5482                                                     uint32_t value_index,
5483                                                     uint32_t extra_temp_index,
5484                                                     DataType::Type field_type,
5485                                                     Address field_addr,
5486                                                     CpuRegister base,
5487                                                     bool is_volatile,
5488                                                     bool is_atomic,
5489                                                     bool value_can_be_null,
5490                                                     bool byte_swap,
5491                                                     WriteBarrierKind write_barrier_kind) {
5492   LocationSummary* locations = instruction->GetLocations();
5493   Location value = locations->InAt(value_index);
5494 
5495   if (is_volatile) {
5496     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5497   }
5498 
5499   bool maybe_record_implicit_null_check_done = false;
5500 
5501   if (value.IsConstant()) {
5502     switch (field_type) {
5503       case DataType::Type::kBool:
5504       case DataType::Type::kUint8:
5505       case DataType::Type::kInt8:
5506         __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5507         break;
5508       case DataType::Type::kUint16:
5509       case DataType::Type::kInt16: {
5510         int16_t v = CodeGenerator::GetInt16ValueOf(value.GetConstant());
5511         if (byte_swap) {
5512           v = BSWAP(v);
5513         }
5514         __ movw(field_addr, Immediate(v));
5515         break;
5516       }
5517       case DataType::Type::kUint32:
5518       case DataType::Type::kInt32:
5519       case DataType::Type::kFloat32:
5520       case DataType::Type::kReference: {
5521         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5522         if (byte_swap) {
5523           v = BSWAP(v);
5524         }
5525         DCHECK_IMPLIES(field_type == DataType::Type::kReference, v == 0);
5526         // Note: if heap poisoning is enabled, no need to poison
5527         // (negate) `v` if it is a reference, as it would be null.
5528         __ movl(field_addr, Immediate(v));
5529         break;
5530       }
5531       case DataType::Type::kUint64:
5532       case DataType::Type::kInt64:
5533       case DataType::Type::kFloat64: {
5534         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5535         if (byte_swap) {
5536           v = BSWAP(v);
5537         }
5538         if (is_atomic) {
5539           // Move constant into a register, then atomically store the register to memory.
5540           CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5541           __ movq(temp, Immediate(v));
5542           __ movq(field_addr, temp);
5543         } else {
5544           Address field_addr2 = Address::displace(field_addr, sizeof(int32_t));
5545           codegen_->MoveInt64ToAddress(field_addr, field_addr2, v, instruction);
5546         }
5547         maybe_record_implicit_null_check_done = true;
5548         break;
5549       }
5550       case DataType::Type::kVoid:
5551         LOG(FATAL) << "Unreachable type " << field_type;
5552         UNREACHABLE();
5553     }
5554   } else {
5555     if (byte_swap) {
5556       // Swap byte order in-place in the input register (we will restore it later).
5557       CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5558       Bswap(value, field_type, &temp);
5559     }
5560 
5561     switch (field_type) {
5562       case DataType::Type::kBool:
5563       case DataType::Type::kUint8:
5564       case DataType::Type::kInt8:
5565         __ movb(field_addr, value.AsRegister<CpuRegister>());
5566         break;
5567       case DataType::Type::kUint16:
5568       case DataType::Type::kInt16:
5569         __ movw(field_addr, value.AsRegister<CpuRegister>());
5570         break;
5571       case DataType::Type::kUint32:
5572       case DataType::Type::kInt32:
5573       case DataType::Type::kReference:
5574         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5575           CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5576           __ movl(temp, value.AsRegister<CpuRegister>());
5577           __ PoisonHeapReference(temp);
5578           __ movl(field_addr, temp);
5579         } else {
5580           __ movl(field_addr, value.AsRegister<CpuRegister>());
5581         }
5582         break;
5583       case DataType::Type::kUint64:
5584       case DataType::Type::kInt64:
5585         __ movq(field_addr, value.AsRegister<CpuRegister>());
5586         break;
5587       case DataType::Type::kFloat32:
5588         __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5589         break;
5590       case DataType::Type::kFloat64:
5591         __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5592         break;
5593       case DataType::Type::kVoid:
5594         LOG(FATAL) << "Unreachable type " << field_type;
5595         UNREACHABLE();
5596     }
5597 
5598     if (byte_swap) {
5599       // Restore byte order.
5600       CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5601       Bswap(value, field_type, &temp);
5602     }
5603   }
5604 
5605   if (!maybe_record_implicit_null_check_done) {
5606     codegen_->MaybeRecordImplicitNullCheck(instruction);
5607   }
5608 
5609   bool needs_write_barrier =
5610       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5611   if (needs_write_barrier) {
5612     if (value.IsConstant()) {
5613       DCHECK(value.GetConstant()->IsNullConstant());
5614       if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
5615         DCHECK_NE(extra_temp_index, 0u);
5616         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5617         CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5618         codegen_->MarkGCCard(temp, card, base);
5619       }
5620     } else {
5621       DCHECK_NE(extra_temp_index, 0u);
5622       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5623       CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5624       codegen_->MaybeMarkGCCard(
5625           temp,
5626           card,
5627           base,
5628           value.AsRegister<CpuRegister>(),
5629           value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
5630     }
5631   } else if (codegen_->ShouldCheckGCCard(
5632                  field_type, instruction->InputAt(value_index), write_barrier_kind)) {
5633     DCHECK_NE(extra_temp_index, 0u);
5634     DCHECK(value.IsRegister());
5635     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5636     CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5637     codegen_->CheckGCCardIsValid(temp, card, base);
5638   }
5639 
5640   if (is_volatile) {
5641     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5642   }
5643 }
5644 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)5645 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5646                                                     const FieldInfo& field_info,
5647                                                     bool value_can_be_null,
5648                                                     WriteBarrierKind write_barrier_kind) {
5649   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5650 
5651   LocationSummary* locations = instruction->GetLocations();
5652   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
5653   bool is_volatile = field_info.IsVolatile();
5654   DataType::Type field_type = field_info.GetFieldType();
5655   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5656 
5657   HandleFieldSet(instruction,
5658                  /*value_index=*/ 1,
5659                  /*extra_temp_index=*/ 1,
5660                  field_type,
5661                  Address(base, offset),
5662                  base,
5663                  is_volatile,
5664                  /*is_atomic=*/ false,
5665                  value_can_be_null,
5666                  /*byte_swap=*/ false,
5667                  write_barrier_kind);
5668 }
5669 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5670 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5671   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5672 }
5673 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5674 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5675   HandleFieldSet(instruction,
5676                  instruction->GetFieldInfo(),
5677                  instruction->GetValueCanBeNull(),
5678                  instruction->GetWriteBarrierKind());
5679 }
5680 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5681 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5682   HandleFieldGet(instruction);
5683 }
5684 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5685 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5686   HandleFieldGet(instruction, instruction->GetFieldInfo());
5687 }
5688 
VisitStaticFieldGet(HStaticFieldGet * instruction)5689 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5690   HandleFieldGet(instruction);
5691 }
5692 
VisitStaticFieldGet(HStaticFieldGet * instruction)5693 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5694   HandleFieldGet(instruction, instruction->GetFieldInfo());
5695 }
5696 
VisitStaticFieldSet(HStaticFieldSet * instruction)5697 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5698   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5699 }
5700 
VisitStaticFieldSet(HStaticFieldSet * instruction)5701 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5702   HandleFieldSet(instruction,
5703                  instruction->GetFieldInfo(),
5704                  instruction->GetValueCanBeNull(),
5705                  instruction->GetWriteBarrierKind());
5706 }
5707 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5708 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5709   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5710 }
5711 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5712 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5713   __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5714   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5715 }
5716 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5717 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5718     HUnresolvedInstanceFieldGet* instruction) {
5719   FieldAccessCallingConventionX86_64 calling_convention;
5720   codegen_->CreateUnresolvedFieldLocationSummary(
5721       instruction, instruction->GetFieldType(), calling_convention);
5722 }
5723 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5724 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5725     HUnresolvedInstanceFieldGet* instruction) {
5726   FieldAccessCallingConventionX86_64 calling_convention;
5727   codegen_->GenerateUnresolvedFieldAccess(instruction,
5728                                           instruction->GetFieldType(),
5729                                           instruction->GetFieldIndex(),
5730                                           instruction->GetDexPc(),
5731                                           calling_convention);
5732 }
5733 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5734 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5735     HUnresolvedInstanceFieldSet* instruction) {
5736   FieldAccessCallingConventionX86_64 calling_convention;
5737   codegen_->CreateUnresolvedFieldLocationSummary(
5738       instruction, instruction->GetFieldType(), calling_convention);
5739 }
5740 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5741 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5742     HUnresolvedInstanceFieldSet* instruction) {
5743   FieldAccessCallingConventionX86_64 calling_convention;
5744   codegen_->GenerateUnresolvedFieldAccess(instruction,
5745                                           instruction->GetFieldType(),
5746                                           instruction->GetFieldIndex(),
5747                                           instruction->GetDexPc(),
5748                                           calling_convention);
5749 }
5750 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5751 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5752     HUnresolvedStaticFieldGet* instruction) {
5753   FieldAccessCallingConventionX86_64 calling_convention;
5754   codegen_->CreateUnresolvedFieldLocationSummary(
5755       instruction, instruction->GetFieldType(), calling_convention);
5756 }
5757 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5758 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5759     HUnresolvedStaticFieldGet* instruction) {
5760   FieldAccessCallingConventionX86_64 calling_convention;
5761   codegen_->GenerateUnresolvedFieldAccess(instruction,
5762                                           instruction->GetFieldType(),
5763                                           instruction->GetFieldIndex(),
5764                                           instruction->GetDexPc(),
5765                                           calling_convention);
5766 }
5767 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5768 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5769     HUnresolvedStaticFieldSet* instruction) {
5770   FieldAccessCallingConventionX86_64 calling_convention;
5771   codegen_->CreateUnresolvedFieldLocationSummary(
5772       instruction, instruction->GetFieldType(), calling_convention);
5773 }
5774 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5775 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5776     HUnresolvedStaticFieldSet* instruction) {
5777   FieldAccessCallingConventionX86_64 calling_convention;
5778   codegen_->GenerateUnresolvedFieldAccess(instruction,
5779                                           instruction->GetFieldType(),
5780                                           instruction->GetFieldIndex(),
5781                                           instruction->GetDexPc(),
5782                                           calling_convention);
5783 }
5784 
VisitNullCheck(HNullCheck * instruction)5785 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5786   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5787   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5788       ? Location::RequiresRegister()
5789       : Location::Any();
5790   locations->SetInAt(0, loc);
5791 }
5792 
GenerateImplicitNullCheck(HNullCheck * instruction)5793 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5794   if (CanMoveNullCheckToUser(instruction)) {
5795     return;
5796   }
5797   LocationSummary* locations = instruction->GetLocations();
5798   Location obj = locations->InAt(0);
5799 
5800   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5801   RecordPcInfo(instruction, instruction->GetDexPc());
5802 }
5803 
GenerateExplicitNullCheck(HNullCheck * instruction)5804 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5805   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5806   AddSlowPath(slow_path);
5807 
5808   LocationSummary* locations = instruction->GetLocations();
5809   Location obj = locations->InAt(0);
5810 
5811   if (obj.IsRegister()) {
5812     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5813   } else if (obj.IsStackSlot()) {
5814     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5815   } else {
5816     DCHECK(obj.IsConstant()) << obj;
5817     DCHECK(obj.GetConstant()->IsNullConstant());
5818     __ jmp(slow_path->GetEntryLabel());
5819     return;
5820   }
5821   __ j(kEqual, slow_path->GetEntryLabel());
5822 }
5823 
VisitNullCheck(HNullCheck * instruction)5824 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5825   codegen_->GenerateNullCheck(instruction);
5826 }
5827 
VisitArrayGet(HArrayGet * instruction)5828 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5829   bool object_array_get_with_read_barrier =
5830       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5831   LocationSummary* locations =
5832       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5833                                                        object_array_get_with_read_barrier
5834                                                            ? LocationSummary::kCallOnSlowPath
5835                                                            : LocationSummary::kNoCall);
5836   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5837     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5838   }
5839   locations->SetInAt(0, Location::RequiresRegister());
5840   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5841   if (DataType::IsFloatingPointType(instruction->GetType())) {
5842     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5843   } else {
5844     // The output overlaps for an object array get when read barriers
5845     // are enabled: we do not want the move to overwrite the array's
5846     // location, as we need it to emit the read barrier.
5847     locations->SetOut(
5848         Location::RequiresRegister(),
5849         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5850   }
5851 }
5852 
VisitArrayGet(HArrayGet * instruction)5853 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5854   LocationSummary* locations = instruction->GetLocations();
5855   Location obj_loc = locations->InAt(0);
5856   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5857   Location index = locations->InAt(1);
5858   Location out_loc = locations->Out();
5859   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5860 
5861   DataType::Type type = instruction->GetType();
5862   if (type == DataType::Type::kReference) {
5863     static_assert(
5864         sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5865         "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5866     // /* HeapReference<Object> */ out =
5867     //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
5868     if (codegen_->EmitBakerReadBarrier()) {
5869       // Note that a potential implicit null check is handled in this
5870       // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5871       codegen_->GenerateArrayLoadWithBakerReadBarrier(
5872           instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5873     } else {
5874       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5875       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5876       codegen_->MaybeRecordImplicitNullCheck(instruction);
5877       // If read barriers are enabled, emit read barriers other than
5878       // Baker's using a slow path (and also unpoison the loaded
5879       // reference, if heap poisoning is enabled).
5880       if (index.IsConstant()) {
5881         uint32_t offset =
5882             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5883         codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5884       } else {
5885         codegen_->MaybeGenerateReadBarrierSlow(
5886             instruction, out_loc, out_loc, obj_loc, data_offset, index);
5887       }
5888     }
5889   } else {
5890     if (type == DataType::Type::kUint16
5891         && mirror::kUseStringCompression
5892         && instruction->IsStringCharAt()) {
5893       // Branch cases into compressed and uncompressed for each index's type.
5894       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5895       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5896       NearLabel done, not_compressed;
5897       __ testb(Address(obj, count_offset), Immediate(1));
5898       codegen_->MaybeRecordImplicitNullCheck(instruction);
5899       static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5900                     "Expecting 0=compressed, 1=uncompressed");
5901       __ j(kNotZero, &not_compressed);
5902       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5903       __ jmp(&done);
5904       __ Bind(&not_compressed);
5905       __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5906       __ Bind(&done);
5907     } else {
5908       ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
5909       Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, scale, data_offset);
5910       codegen_->LoadFromMemoryNoReference(type, out_loc, src);
5911     }
5912     codegen_->MaybeRecordImplicitNullCheck(instruction);
5913   }
5914 }
5915 
VisitArraySet(HArraySet * instruction)5916 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5917   DataType::Type value_type = instruction->GetComponentType();
5918 
5919   WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5920   bool needs_write_barrier =
5921       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5922   bool check_gc_card =
5923       codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
5924   bool needs_type_check = instruction->NeedsTypeCheck();
5925 
5926   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5927       instruction,
5928       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5929 
5930   locations->SetInAt(0, Location::RequiresRegister());
5931   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5932   if (DataType::IsFloatingPointType(value_type)) {
5933     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5934   } else {
5935     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5936   }
5937 
5938   if (needs_write_barrier || check_gc_card) {
5939     // Used by reference poisoning, type checking, emitting write barrier, or checking write
5940     // barrier.
5941     locations->AddTemp(Location::RequiresRegister());
5942     // Only used when emitting a write barrier, or when checking for the card table.
5943     locations->AddTemp(Location::RequiresRegister());
5944   } else if ((kPoisonHeapReferences && value_type == DataType::Type::kReference) ||
5945              instruction->NeedsTypeCheck()) {
5946     // Used for poisoning or type checking.
5947     locations->AddTemp(Location::RequiresRegister());
5948   }
5949 }
5950 
VisitArraySet(HArraySet * instruction)5951 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5952   LocationSummary* locations = instruction->GetLocations();
5953   Location array_loc = locations->InAt(0);
5954   CpuRegister array = array_loc.AsRegister<CpuRegister>();
5955   Location index = locations->InAt(1);
5956   Location value = locations->InAt(2);
5957   DataType::Type value_type = instruction->GetComponentType();
5958   bool needs_type_check = instruction->NeedsTypeCheck();
5959   const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5960   bool needs_write_barrier =
5961       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5962 
5963   switch (value_type) {
5964     case DataType::Type::kBool:
5965     case DataType::Type::kUint8:
5966     case DataType::Type::kInt8: {
5967       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5968       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5969       if (value.IsRegister()) {
5970         __ movb(address, value.AsRegister<CpuRegister>());
5971       } else {
5972         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5973       }
5974       codegen_->MaybeRecordImplicitNullCheck(instruction);
5975       break;
5976     }
5977 
5978     case DataType::Type::kUint16:
5979     case DataType::Type::kInt16: {
5980       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5981       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5982       if (value.IsRegister()) {
5983         __ movw(address, value.AsRegister<CpuRegister>());
5984       } else {
5985         DCHECK(value.IsConstant()) << value;
5986         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5987       }
5988       codegen_->MaybeRecordImplicitNullCheck(instruction);
5989       break;
5990     }
5991 
5992     case DataType::Type::kReference: {
5993       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5994       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5995 
5996       if (!value.IsRegister()) {
5997         // Just setting null.
5998         DCHECK(instruction->InputAt(2)->IsNullConstant());
5999         DCHECK(value.IsConstant()) << value;
6000         __ movl(address, Immediate(0));
6001         codegen_->MaybeRecordImplicitNullCheck(instruction);
6002         if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6003           // We need to set a write barrier here even though we are writing null, since this write
6004           // barrier is being relied on.
6005           DCHECK(needs_write_barrier);
6006           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6007           CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6008           codegen_->MarkGCCard(temp, card, array);
6009         }
6010         DCHECK(!needs_type_check);
6011         break;
6012       }
6013 
6014       CpuRegister register_value = value.AsRegister<CpuRegister>();
6015       const bool can_value_be_null = instruction->GetValueCanBeNull();
6016       // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
6017       // value is null (without an extra CompareAndBranchIfZero since we already checked if the
6018       // value is null for the type check).
6019       const bool skip_marking_gc_card =
6020           can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
6021       NearLabel do_store;
6022       NearLabel skip_writing_card;
6023       if (can_value_be_null) {
6024         __ testl(register_value, register_value);
6025         if (skip_marking_gc_card) {
6026           __ j(kEqual, &skip_writing_card);
6027         } else {
6028           __ j(kEqual, &do_store);
6029         }
6030       }
6031 
6032       SlowPathCode* slow_path = nullptr;
6033       if (needs_type_check) {
6034         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
6035         codegen_->AddSlowPath(slow_path);
6036 
6037         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6038         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6039         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6040 
6041         // Note that when Baker read barriers are enabled, the type
6042         // checks are performed without read barriers.  This is fine,
6043         // even in the case where a class object is in the from-space
6044         // after the flip, as a comparison involving such a type would
6045         // not produce a false positive; it may of course produce a
6046         // false negative, in which case we would take the ArraySet
6047         // slow path.
6048 
6049         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6050         // /* HeapReference<Class> */ temp = array->klass_
6051         __ movl(temp, Address(array, class_offset));
6052         codegen_->MaybeRecordImplicitNullCheck(instruction);
6053         __ MaybeUnpoisonHeapReference(temp);
6054 
6055         // /* HeapReference<Class> */ temp = temp->component_type_
6056         __ movl(temp, Address(temp, component_offset));
6057         // If heap poisoning is enabled, no need to unpoison `temp`
6058         // nor the object reference in `register_value->klass`, as
6059         // we are comparing two poisoned references.
6060         __ cmpl(temp, Address(register_value, class_offset));
6061 
6062         if (instruction->StaticTypeOfArrayIsObjectArray()) {
6063           NearLabel do_put;
6064           __ j(kEqual, &do_put);
6065           // If heap poisoning is enabled, the `temp` reference has
6066           // not been unpoisoned yet; unpoison it now.
6067           __ MaybeUnpoisonHeapReference(temp);
6068 
6069           // If heap poisoning is enabled, no need to unpoison the
6070           // heap reference loaded below, as it is only used for a
6071           // comparison with null.
6072           __ cmpl(Address(temp, super_offset), Immediate(0));
6073           __ j(kNotEqual, slow_path->GetEntryLabel());
6074           __ Bind(&do_put);
6075         } else {
6076           __ j(kNotEqual, slow_path->GetEntryLabel());
6077         }
6078       }
6079 
6080       if (can_value_be_null && !skip_marking_gc_card) {
6081         DCHECK(do_store.IsLinked());
6082         __ Bind(&do_store);
6083       }
6084 
6085       if (needs_write_barrier) {
6086         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6087         CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6088         codegen_->MarkGCCard(temp, card, array);
6089       } else if (codegen_->ShouldCheckGCCard(
6090                      value_type, instruction->GetValue(), write_barrier_kind)) {
6091         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6092         CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6093         codegen_->CheckGCCardIsValid(temp, card, array);
6094       }
6095 
6096       if (skip_marking_gc_card) {
6097         // Note that we don't check that the GC card is valid as it can be correctly clean.
6098         DCHECK(skip_writing_card.IsLinked());
6099         __ Bind(&skip_writing_card);
6100       }
6101 
6102       Location source = value;
6103       if (kPoisonHeapReferences) {
6104         Location temp_loc = locations->GetTemp(0);
6105         CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6106         __ movl(temp, register_value);
6107         __ PoisonHeapReference(temp);
6108         source = temp_loc;
6109       }
6110 
6111       __ movl(address, source.AsRegister<CpuRegister>());
6112 
6113       if (can_value_be_null || !needs_type_check) {
6114         codegen_->MaybeRecordImplicitNullCheck(instruction);
6115       }
6116 
6117       if (slow_path != nullptr) {
6118         __ Bind(slow_path->GetExitLabel());
6119       }
6120 
6121       break;
6122     }
6123 
6124     case DataType::Type::kInt32: {
6125       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6126       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6127       if (value.IsRegister()) {
6128         __ movl(address, value.AsRegister<CpuRegister>());
6129       } else {
6130         DCHECK(value.IsConstant()) << value;
6131         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6132         __ movl(address, Immediate(v));
6133       }
6134       codegen_->MaybeRecordImplicitNullCheck(instruction);
6135       break;
6136     }
6137 
6138     case DataType::Type::kInt64: {
6139       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6140       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6141       if (value.IsRegister()) {
6142         __ movq(address, value.AsRegister<CpuRegister>());
6143         codegen_->MaybeRecordImplicitNullCheck(instruction);
6144       } else {
6145         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
6146         Address address_high =
6147             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6148         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6149       }
6150       break;
6151     }
6152 
6153     case DataType::Type::kFloat32: {
6154       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6155       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6156       if (value.IsFpuRegister()) {
6157         __ movss(address, value.AsFpuRegister<XmmRegister>());
6158       } else {
6159         DCHECK(value.IsConstant());
6160         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6161         __ movl(address, Immediate(v));
6162       }
6163       codegen_->MaybeRecordImplicitNullCheck(instruction);
6164       break;
6165     }
6166 
6167     case DataType::Type::kFloat64: {
6168       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6169       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6170       if (value.IsFpuRegister()) {
6171         __ movsd(address, value.AsFpuRegister<XmmRegister>());
6172         codegen_->MaybeRecordImplicitNullCheck(instruction);
6173       } else {
6174         int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6175         Address address_high =
6176             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6177         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6178       }
6179       break;
6180     }
6181 
6182     case DataType::Type::kUint32:
6183     case DataType::Type::kUint64:
6184     case DataType::Type::kVoid:
6185       LOG(FATAL) << "Unreachable type " << instruction->GetType();
6186       UNREACHABLE();
6187   }
6188 }
6189 
VisitArrayLength(HArrayLength * instruction)6190 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
6191   LocationSummary* locations =
6192       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6193   locations->SetInAt(0, Location::RequiresRegister());
6194   if (!instruction->IsEmittedAtUseSite()) {
6195     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6196   }
6197 }
6198 
VisitArrayLength(HArrayLength * instruction)6199 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
6200   if (instruction->IsEmittedAtUseSite()) {
6201     return;
6202   }
6203 
6204   LocationSummary* locations = instruction->GetLocations();
6205   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6206   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
6207   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
6208   __ movl(out, Address(obj, offset));
6209   codegen_->MaybeRecordImplicitNullCheck(instruction);
6210   // Mask out most significant bit in case the array is String's array of char.
6211   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6212     __ shrl(out, Immediate(1));
6213   }
6214 }
6215 
VisitBoundsCheck(HBoundsCheck * instruction)6216 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6217   RegisterSet caller_saves = RegisterSet::Empty();
6218   InvokeRuntimeCallingConvention calling_convention;
6219   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6220   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6221   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6222   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6223   HInstruction* length = instruction->InputAt(1);
6224   if (!length->IsEmittedAtUseSite()) {
6225     locations->SetInAt(1, Location::RegisterOrConstant(length));
6226   }
6227 }
6228 
VisitBoundsCheck(HBoundsCheck * instruction)6229 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6230   LocationSummary* locations = instruction->GetLocations();
6231   Location index_loc = locations->InAt(0);
6232   Location length_loc = locations->InAt(1);
6233   SlowPathCode* slow_path =
6234       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
6235 
6236   if (length_loc.IsConstant()) {
6237     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6238     if (index_loc.IsConstant()) {
6239       // BCE will remove the bounds check if we are guarenteed to pass.
6240       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6241       if (index < 0 || index >= length) {
6242         codegen_->AddSlowPath(slow_path);
6243         __ jmp(slow_path->GetEntryLabel());
6244       } else {
6245         // Some optimization after BCE may have generated this, and we should not
6246         // generate a bounds check if it is a valid range.
6247       }
6248       return;
6249     }
6250 
6251     // We have to reverse the jump condition because the length is the constant.
6252     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
6253     __ cmpl(index_reg, Immediate(length));
6254     codegen_->AddSlowPath(slow_path);
6255     __ j(kAboveEqual, slow_path->GetEntryLabel());
6256   } else {
6257     HInstruction* array_length = instruction->InputAt(1);
6258     if (array_length->IsEmittedAtUseSite()) {
6259       // Address the length field in the array.
6260       DCHECK(array_length->IsArrayLength());
6261       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6262       Location array_loc = array_length->GetLocations()->InAt(0);
6263       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
6264       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6265         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6266         // the string compression flag) with the in-memory length and avoid the temporary.
6267         CpuRegister length_reg = CpuRegister(TMP);
6268         __ movl(length_reg, array_len);
6269         codegen_->MaybeRecordImplicitNullCheck(array_length);
6270         __ shrl(length_reg, Immediate(1));
6271         codegen_->GenerateIntCompare(length_reg, index_loc);
6272       } else {
6273         // Checking the bound for general case:
6274         // Array of char or String's array when the compression feature off.
6275         if (index_loc.IsConstant()) {
6276           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6277           __ cmpl(array_len, Immediate(value));
6278         } else {
6279           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
6280         }
6281         codegen_->MaybeRecordImplicitNullCheck(array_length);
6282       }
6283     } else {
6284       codegen_->GenerateIntCompare(length_loc, index_loc);
6285     }
6286     codegen_->AddSlowPath(slow_path);
6287     __ j(kBelowEqual, slow_path->GetEntryLabel());
6288   }
6289 }
6290 
MaybeMarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool emit_null_check)6291 void CodeGeneratorX86_64::MaybeMarkGCCard(CpuRegister temp,
6292                                           CpuRegister card,
6293                                           CpuRegister object,
6294                                           CpuRegister value,
6295                                           bool emit_null_check) {
6296   NearLabel is_null;
6297   if (emit_null_check) {
6298     __ testl(value, value);
6299     __ j(kEqual, &is_null);
6300   }
6301   MarkGCCard(temp, card, object);
6302   if (emit_null_check) {
6303     __ Bind(&is_null);
6304   }
6305 }
6306 
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object)6307 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object) {
6308   // Load the address of the card table into `card`.
6309   __ gs()->movq(card,
6310                 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6311                                   /* no_rip= */ true));
6312   // Calculate the offset (in the card table) of the card corresponding to `object`.
6313   __ movq(temp, object);
6314   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6315   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
6316   // `object`'s card.
6317   //
6318   // Register `card` contains the address of the card table. Note that the card
6319   // table's base is biased during its creation so that it always starts at an
6320   // address whose least-significant byte is equal to `kCardDirty` (see
6321   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
6322   // below writes the `kCardDirty` (byte) value into the `object`'s card
6323   // (located at `card + object >> kCardShift`).
6324   //
6325   // This dual use of the value in register `card` (1. to calculate the location
6326   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
6327   // (no need to explicitly load `kCardDirty` as an immediate value).
6328   __ movb(Address(temp, card, TIMES_1, 0), card);
6329 }
6330 
CheckGCCardIsValid(CpuRegister temp,CpuRegister card,CpuRegister object)6331 void CodeGeneratorX86_64::CheckGCCardIsValid(CpuRegister temp,
6332                                              CpuRegister card,
6333                                              CpuRegister object) {
6334   NearLabel done;
6335   // Load the address of the card table into `card`.
6336   __ gs()->movq(card,
6337                 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6338                                   /* no_rip= */ true));
6339   // Calculate the offset (in the card table) of the card corresponding to `object`.
6340   __ movq(temp, object);
6341   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6342   // assert (!clean || !self->is_gc_marking)
6343   __ cmpb(Address(temp, card, TIMES_1, 0), Immediate(gc::accounting::CardTable::kCardClean));
6344   __ j(kNotEqual, &done);
6345   __ gs()->cmpl(
6346       Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
6347       Immediate(0));
6348   __ j(kEqual, &done);
6349   __ int3();
6350   __ Bind(&done);
6351 }
6352 
VisitParallelMove(HParallelMove * instruction)6353 void LocationsBuilderX86_64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6354   LOG(FATAL) << "Unimplemented";
6355 }
6356 
VisitParallelMove(HParallelMove * instruction)6357 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
6358   if (instruction->GetNext()->IsSuspendCheck() &&
6359       instruction->GetBlock()->GetLoopInformation() != nullptr) {
6360     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6361     // The back edge will generate the suspend check.
6362     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6363   }
6364 
6365   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6366 }
6367 
VisitSuspendCheck(HSuspendCheck * instruction)6368 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6369   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6370       instruction, LocationSummary::kCallOnSlowPath);
6371   // In suspend check slow path, usually there are no caller-save registers at all.
6372   // If SIMD instructions are present, however, we force spilling all live SIMD
6373   // registers in full width (since the runtime only saves/restores lower part).
6374   locations->SetCustomSlowPathCallerSaves(
6375       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6376 }
6377 
VisitSuspendCheck(HSuspendCheck * instruction)6378 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6379   HBasicBlock* block = instruction->GetBlock();
6380   if (block->GetLoopInformation() != nullptr) {
6381     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6382     // The back edge will generate the suspend check.
6383     return;
6384   }
6385   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6386     // The goto will generate the suspend check.
6387     return;
6388   }
6389   GenerateSuspendCheck(instruction, nullptr);
6390 }
6391 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6392 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
6393                                                           HBasicBlock* successor) {
6394   SuspendCheckSlowPathX86_64* slow_path =
6395       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
6396   if (slow_path == nullptr) {
6397     slow_path =
6398         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
6399     instruction->SetSlowPath(slow_path);
6400     codegen_->AddSlowPath(slow_path);
6401     if (successor != nullptr) {
6402       DCHECK(successor->IsLoopHeader());
6403     }
6404   } else {
6405     DCHECK_EQ(slow_path->GetSuccessor(), successor);
6406   }
6407 
6408   __ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
6409                                    /* no_rip= */ true),
6410                  Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6411   if (successor == nullptr) {
6412     __ j(kNotZero, slow_path->GetEntryLabel());
6413     __ Bind(slow_path->GetReturnLabel());
6414   } else {
6415     __ j(kZero, codegen_->GetLabelOf(successor));
6416     __ jmp(slow_path->GetEntryLabel());
6417   }
6418 }
6419 
GetAssembler() const6420 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
6421   return codegen_->GetAssembler();
6422 }
6423 
EmitMove(size_t index)6424 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
6425   MoveOperands* move = moves_[index];
6426   Location source = move->GetSource();
6427   Location destination = move->GetDestination();
6428 
6429   if (source.IsRegister()) {
6430     if (destination.IsRegister()) {
6431       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
6432     } else if (destination.IsStackSlot()) {
6433       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
6434               source.AsRegister<CpuRegister>());
6435     } else {
6436       DCHECK(destination.IsDoubleStackSlot());
6437       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
6438               source.AsRegister<CpuRegister>());
6439     }
6440   } else if (source.IsStackSlot()) {
6441     if (destination.IsRegister()) {
6442       __ movl(destination.AsRegister<CpuRegister>(),
6443               Address(CpuRegister(RSP), source.GetStackIndex()));
6444     } else if (destination.IsFpuRegister()) {
6445       __ movss(destination.AsFpuRegister<XmmRegister>(),
6446               Address(CpuRegister(RSP), source.GetStackIndex()));
6447     } else {
6448       DCHECK(destination.IsStackSlot());
6449       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6450       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6451     }
6452   } else if (source.IsDoubleStackSlot()) {
6453     if (destination.IsRegister()) {
6454       __ movq(destination.AsRegister<CpuRegister>(),
6455               Address(CpuRegister(RSP), source.GetStackIndex()));
6456     } else if (destination.IsFpuRegister()) {
6457       __ movsd(destination.AsFpuRegister<XmmRegister>(),
6458                Address(CpuRegister(RSP), source.GetStackIndex()));
6459     } else {
6460       DCHECK(destination.IsDoubleStackSlot()) << destination;
6461       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6462       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6463     }
6464   } else if (source.IsSIMDStackSlot()) {
6465     if (destination.IsFpuRegister()) {
6466       __ movups(destination.AsFpuRegister<XmmRegister>(),
6467                 Address(CpuRegister(RSP), source.GetStackIndex()));
6468     } else {
6469       DCHECK(destination.IsSIMDStackSlot());
6470       size_t high = kX86_64WordSize;
6471       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6472       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6473       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
6474       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
6475     }
6476   } else if (source.IsConstant()) {
6477     HConstant* constant = source.GetConstant();
6478     if (constant->IsIntConstant() || constant->IsNullConstant()) {
6479       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6480       if (destination.IsRegister()) {
6481         if (value == 0) {
6482           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6483         } else {
6484           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
6485         }
6486       } else {
6487         DCHECK(destination.IsStackSlot()) << destination;
6488         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
6489       }
6490     } else if (constant->IsLongConstant()) {
6491       int64_t value = constant->AsLongConstant()->GetValue();
6492       if (destination.IsRegister()) {
6493         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
6494       } else {
6495         DCHECK(destination.IsDoubleStackSlot()) << destination;
6496         codegen_->Store64BitValueToStack(destination, value);
6497       }
6498     } else if (constant->IsFloatConstant()) {
6499       float fp_value = constant->AsFloatConstant()->GetValue();
6500       if (destination.IsFpuRegister()) {
6501         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6502         codegen_->Load32BitValue(dest, fp_value);
6503       } else {
6504         DCHECK(destination.IsStackSlot()) << destination;
6505         Immediate imm(bit_cast<int32_t, float>(fp_value));
6506         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
6507       }
6508     } else {
6509       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
6510       double fp_value =  constant->AsDoubleConstant()->GetValue();
6511       int64_t value = bit_cast<int64_t, double>(fp_value);
6512       if (destination.IsFpuRegister()) {
6513         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6514         codegen_->Load64BitValue(dest, fp_value);
6515       } else {
6516         DCHECK(destination.IsDoubleStackSlot()) << destination;
6517         codegen_->Store64BitValueToStack(destination, value);
6518       }
6519     }
6520   } else if (source.IsFpuRegister()) {
6521     if (destination.IsFpuRegister()) {
6522       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6523     } else if (destination.IsStackSlot()) {
6524       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
6525                source.AsFpuRegister<XmmRegister>());
6526     } else if (destination.IsDoubleStackSlot()) {
6527       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
6528                source.AsFpuRegister<XmmRegister>());
6529     } else {
6530        DCHECK(destination.IsSIMDStackSlot());
6531       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
6532                 source.AsFpuRegister<XmmRegister>());
6533     }
6534   }
6535 }
6536 
Exchange32(CpuRegister reg,int mem)6537 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
6538   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6539   __ movl(Address(CpuRegister(RSP), mem), reg);
6540   __ movl(reg, CpuRegister(TMP));
6541 }
6542 
Exchange64(CpuRegister reg1,CpuRegister reg2)6543 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
6544   __ movq(CpuRegister(TMP), reg1);
6545   __ movq(reg1, reg2);
6546   __ movq(reg2, CpuRegister(TMP));
6547 }
6548 
Exchange64(CpuRegister reg,int mem)6549 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
6550   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6551   __ movq(Address(CpuRegister(RSP), mem), reg);
6552   __ movq(reg, CpuRegister(TMP));
6553 }
6554 
Exchange32(XmmRegister reg,int mem)6555 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
6556   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6557   __ movss(Address(CpuRegister(RSP), mem), reg);
6558   __ movd(reg, CpuRegister(TMP));
6559 }
6560 
Exchange64(XmmRegister reg,int mem)6561 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
6562   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6563   __ movsd(Address(CpuRegister(RSP), mem), reg);
6564   __ movd(reg, CpuRegister(TMP));
6565 }
6566 
Exchange128(XmmRegister reg,int mem)6567 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
6568   size_t extra_slot = 2 * kX86_64WordSize;
6569   __ subq(CpuRegister(RSP), Immediate(extra_slot));
6570   __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
6571   ExchangeMemory64(0, mem + extra_slot, 2);
6572   __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
6573   __ addq(CpuRegister(RSP), Immediate(extra_slot));
6574 }
6575 
ExchangeMemory32(int mem1,int mem2)6576 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
6577   ScratchRegisterScope ensure_scratch(
6578       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6579 
6580   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6581   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
6582   __ movl(CpuRegister(ensure_scratch.GetRegister()),
6583           Address(CpuRegister(RSP), mem2 + stack_offset));
6584   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
6585   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
6586           CpuRegister(ensure_scratch.GetRegister()));
6587 }
6588 
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)6589 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
6590   ScratchRegisterScope ensure_scratch(
6591       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6592 
6593   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6594 
6595   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6596   for (int i = 0; i < num_of_qwords; i++) {
6597     __ movq(CpuRegister(TMP),
6598             Address(CpuRegister(RSP), mem1 + stack_offset));
6599     __ movq(CpuRegister(ensure_scratch.GetRegister()),
6600             Address(CpuRegister(RSP), mem2 + stack_offset));
6601     __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
6602             CpuRegister(TMP));
6603     __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
6604             CpuRegister(ensure_scratch.GetRegister()));
6605     stack_offset += kX86_64WordSize;
6606   }
6607 }
6608 
EmitSwap(size_t index)6609 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
6610   MoveOperands* move = moves_[index];
6611   Location source = move->GetSource();
6612   Location destination = move->GetDestination();
6613 
6614   if (source.IsRegister() && destination.IsRegister()) {
6615     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6616   } else if (source.IsRegister() && destination.IsStackSlot()) {
6617     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6618   } else if (source.IsStackSlot() && destination.IsRegister()) {
6619     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6620   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6621     ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6622   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6623     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6624   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6625     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6626   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6627     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6628   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6629     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6630     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6631     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6632   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6633     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6634   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6635     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6636   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6637     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6638   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6639     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6640   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6641     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6642   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6643     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6644   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6645     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6646   } else {
6647     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6648   }
6649 }
6650 
6651 
SpillScratch(int reg)6652 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6653   __ pushq(CpuRegister(reg));
6654 }
6655 
6656 
RestoreScratch(int reg)6657 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6658   __ popq(CpuRegister(reg));
6659 }
6660 
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6661 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6662     SlowPathCode* slow_path, CpuRegister class_reg) {
6663   __ cmpb(Address(class_reg, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
6664   __ j(kBelow, slow_path->GetEntryLabel());
6665   __ Bind(slow_path->GetExitLabel());
6666 }
6667 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6668 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6669                                                                        CpuRegister temp) {
6670   uint32_t path_to_root = check->GetBitstringPathToRoot();
6671   uint32_t mask = check->GetBitstringMask();
6672   DCHECK(IsPowerOfTwo(mask + 1));
6673   size_t mask_bits = WhichPowerOf2(mask + 1);
6674 
6675   if (mask_bits == 16u) {
6676     // Compare the bitstring in memory.
6677     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6678   } else {
6679     // /* uint32_t */ temp = temp->status_
6680     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6681     // Compare the bitstring bits using SUB.
6682     __ subl(temp, Immediate(path_to_root));
6683     // Shift out bits that do not contribute to the comparison.
6684     __ shll(temp, Immediate(32u - mask_bits));
6685   }
6686 }
6687 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6688 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6689     HLoadClass::LoadKind desired_class_load_kind) {
6690   switch (desired_class_load_kind) {
6691     case HLoadClass::LoadKind::kInvalid:
6692       LOG(FATAL) << "UNREACHABLE";
6693       UNREACHABLE();
6694     case HLoadClass::LoadKind::kReferrersClass:
6695       break;
6696     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6697     case HLoadClass::LoadKind::kBootImageRelRo:
6698     case HLoadClass::LoadKind::kAppImageRelRo:
6699     case HLoadClass::LoadKind::kBssEntry:
6700     case HLoadClass::LoadKind::kBssEntryPublic:
6701     case HLoadClass::LoadKind::kBssEntryPackage:
6702       DCHECK(!GetCompilerOptions().IsJitCompiler());
6703       break;
6704     case HLoadClass::LoadKind::kJitBootImageAddress:
6705     case HLoadClass::LoadKind::kJitTableAddress:
6706       DCHECK(GetCompilerOptions().IsJitCompiler());
6707       break;
6708     case HLoadClass::LoadKind::kRuntimeCall:
6709       break;
6710   }
6711   return desired_class_load_kind;
6712 }
6713 
VisitLoadClass(HLoadClass * cls)6714 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6715   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6716   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6717     // Custom calling convention: RAX serves as both input and output.
6718     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6719         cls,
6720         Location::RegisterLocation(RAX),
6721         Location::RegisterLocation(RAX));
6722     return;
6723   }
6724   DCHECK_EQ(cls->NeedsAccessCheck(),
6725             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6726                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6727 
6728   const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
6729   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6730       ? LocationSummary::kCallOnSlowPath
6731       : LocationSummary::kNoCall;
6732   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6733   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6734     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6735   }
6736 
6737   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6738     locations->SetInAt(0, Location::RequiresRegister());
6739   }
6740   locations->SetOut(Location::RequiresRegister());
6741   if (load_kind == HLoadClass::LoadKind::kBssEntry ||
6742       load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6743       load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
6744     if (codegen_->EmitNonBakerReadBarrier()) {
6745       // For non-Baker read barrier we have a temp-clobbering call.
6746     } else {
6747       // Rely on the type resolution and/or initialization to save everything.
6748       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6749     }
6750   }
6751 }
6752 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6753 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6754                                                  dex::TypeIndex type_index,
6755                                                  Handle<mirror::Class> handle) {
6756   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6757   // Add a patch entry and return the label.
6758   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6759   PatchInfo<Label>* info = &jit_class_patches_.back();
6760   return &info->label;
6761 }
6762 
6763 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6764 // move.
VisitLoadClass(HLoadClass * cls)6765 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6766   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6767   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6768     codegen_->GenerateLoadClassRuntimeCall(cls);
6769     return;
6770   }
6771   DCHECK_EQ(cls->NeedsAccessCheck(),
6772             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6773                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6774 
6775   LocationSummary* locations = cls->GetLocations();
6776   Location out_loc = locations->Out();
6777   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6778 
6779   const ReadBarrierOption read_barrier_option =
6780       cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
6781   bool generate_null_check = false;
6782   switch (load_kind) {
6783     case HLoadClass::LoadKind::kReferrersClass: {
6784       DCHECK(!cls->CanCallRuntime());
6785       DCHECK(!cls->MustGenerateClinitCheck());
6786       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6787       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6788       GenerateGcRootFieldLoad(
6789           cls,
6790           out_loc,
6791           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6792           /* fixup_label= */ nullptr,
6793           read_barrier_option);
6794       break;
6795     }
6796     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6797       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6798              codegen_->GetCompilerOptions().IsBootImageExtension());
6799       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6800       __ leal(out,
6801               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6802       codegen_->RecordBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6803       break;
6804     case HLoadClass::LoadKind::kBootImageRelRo: {
6805       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6806       __ movl(out,
6807               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6808       codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
6809       break;
6810     }
6811     case HLoadClass::LoadKind::kAppImageRelRo: {
6812       DCHECK(codegen_->GetCompilerOptions().IsAppImage());
6813       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6814       __ movl(out,
6815               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6816       codegen_->RecordAppImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6817       break;
6818     }
6819     case HLoadClass::LoadKind::kBssEntry:
6820     case HLoadClass::LoadKind::kBssEntryPublic:
6821     case HLoadClass::LoadKind::kBssEntryPackage: {
6822       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6823                                           /* no_rip= */ false);
6824       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6825       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6826       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6827       // No need for memory fence, thanks to the x86-64 memory model.
6828       generate_null_check = true;
6829       break;
6830     }
6831     case HLoadClass::LoadKind::kJitBootImageAddress: {
6832       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6833       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6834       DCHECK_NE(address, 0u);
6835       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6836       break;
6837     }
6838     case HLoadClass::LoadKind::kJitTableAddress: {
6839       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6840                                           /* no_rip= */ true);
6841       Label* fixup_label =
6842           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6843       // /* GcRoot<mirror::Class> */ out = *address
6844       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6845       break;
6846     }
6847     default:
6848       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6849       UNREACHABLE();
6850   }
6851 
6852   if (generate_null_check || cls->MustGenerateClinitCheck()) {
6853     DCHECK(cls->CanCallRuntime());
6854     SlowPathCode* slow_path =
6855         new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6856     codegen_->AddSlowPath(slow_path);
6857     if (generate_null_check) {
6858       __ testl(out, out);
6859       __ j(kEqual, slow_path->GetEntryLabel());
6860     }
6861     if (cls->MustGenerateClinitCheck()) {
6862       GenerateClassInitializationCheck(slow_path, out);
6863     } else {
6864       __ Bind(slow_path->GetExitLabel());
6865     }
6866   }
6867 }
6868 
VisitClinitCheck(HClinitCheck * check)6869 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6870   LocationSummary* locations =
6871       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6872   locations->SetInAt(0, Location::RequiresRegister());
6873   if (check->HasUses()) {
6874     locations->SetOut(Location::SameAsFirstInput());
6875   }
6876   // Rely on the type initialization to save everything we need.
6877   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6878 }
6879 
VisitLoadMethodHandle(HLoadMethodHandle * load)6880 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6881   // Custom calling convention: RAX serves as both input and output.
6882   Location location = Location::RegisterLocation(RAX);
6883   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6884 }
6885 
VisitLoadMethodHandle(HLoadMethodHandle * load)6886 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6887   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6888 }
6889 
NewJitRootMethodTypePatch(const DexFile & dex_file,dex::ProtoIndex proto_index,Handle<mirror::MethodType> handle)6890 Label* CodeGeneratorX86_64::NewJitRootMethodTypePatch(const DexFile& dex_file,
6891                                                       dex::ProtoIndex proto_index,
6892                                                       Handle<mirror::MethodType> handle) {
6893   ReserveJitMethodTypeRoot(ProtoReference(&dex_file, proto_index), handle);
6894   // Add a patch entry and return the label.
6895   jit_method_type_patches_.emplace_back(&dex_file, proto_index.index_);
6896   PatchInfo<Label>* info = &jit_method_type_patches_.back();
6897   return &info->label;
6898 }
6899 
VisitLoadMethodType(HLoadMethodType * load)6900 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6901   LocationSummary* locations =
6902       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
6903   if (load->GetLoadKind() == HLoadMethodType::LoadKind::kRuntimeCall) {
6904     Location location = Location::RegisterLocation(RAX);
6905     CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6906   } else {
6907     locations->SetOut(Location::RequiresRegister());
6908     if (load->GetLoadKind() == HLoadMethodType::LoadKind::kBssEntry) {
6909       if (codegen_->EmitNonBakerReadBarrier()) {
6910         // For non-Baker read barrier we have a temp-clobbering call.
6911       } else {
6912         // Rely on the pResolveMethodType to save everything.
6913         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6914       }
6915     }
6916   }
6917 }
6918 
VisitLoadMethodType(HLoadMethodType * load)6919 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6920   LocationSummary* locations = load->GetLocations();
6921   Location out_loc = locations->Out();
6922   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6923 
6924   switch (load->GetLoadKind()) {
6925     case HLoadMethodType::LoadKind::kBssEntry: {
6926       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6927                                           /* no_rip= */ false);
6928       Label* fixup_label = codegen_->NewMethodTypeBssEntryPatch(load);
6929       // /* GcRoot<mirror::MethodType> */ out = *address  /* PC-relative */
6930       GenerateGcRootFieldLoad(
6931           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6932       // No need for memory fence, thanks to the x86-64 memory model.
6933       SlowPathCode* slow_path =
6934           new (codegen_->GetScopedAllocator()) LoadMethodTypeSlowPathX86_64(load);
6935       codegen_->AddSlowPath(slow_path);
6936       __ testl(out, out);
6937       __ j(kEqual, slow_path->GetEntryLabel());
6938       __ Bind(slow_path->GetExitLabel());
6939       return;
6940     }
6941     case HLoadMethodType::LoadKind::kJitTableAddress: {
6942       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6943                                           /* no_rip= */ true);
6944       Handle<mirror::MethodType> method_type = load->GetMethodType();
6945       DCHECK(method_type != nullptr);
6946       Label* fixup_label = codegen_->NewJitRootMethodTypePatch(
6947           load->GetDexFile(), load->GetProtoIndex(), method_type);
6948       GenerateGcRootFieldLoad(
6949           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6950       return;
6951     }
6952     default:
6953       DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kRuntimeCall);
6954       codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6955       break;
6956   }
6957 }
6958 
VisitClinitCheck(HClinitCheck * check)6959 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6960   // We assume the class to not be null.
6961   SlowPathCode* slow_path =
6962       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6963   codegen_->AddSlowPath(slow_path);
6964   GenerateClassInitializationCheck(slow_path,
6965                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6966 }
6967 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6968 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6969     HLoadString::LoadKind desired_string_load_kind) {
6970   switch (desired_string_load_kind) {
6971     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6972     case HLoadString::LoadKind::kBootImageRelRo:
6973     case HLoadString::LoadKind::kBssEntry:
6974       DCHECK(!GetCompilerOptions().IsJitCompiler());
6975       break;
6976     case HLoadString::LoadKind::kJitBootImageAddress:
6977     case HLoadString::LoadKind::kJitTableAddress:
6978       DCHECK(GetCompilerOptions().IsJitCompiler());
6979       break;
6980     case HLoadString::LoadKind::kRuntimeCall:
6981       break;
6982   }
6983   return desired_string_load_kind;
6984 }
6985 
VisitLoadString(HLoadString * load)6986 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6987   LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
6988   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6989   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6990     locations->SetOut(Location::RegisterLocation(RAX));
6991   } else {
6992     locations->SetOut(Location::RequiresRegister());
6993     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6994       if (codegen_->EmitNonBakerReadBarrier()) {
6995         // For non-Baker read barrier we have a temp-clobbering call.
6996       } else {
6997         // Rely on the pResolveString to save everything.
6998         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6999       }
7000     }
7001   }
7002 }
7003 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7004 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
7005                                                   dex::StringIndex string_index,
7006                                                   Handle<mirror::String> handle) {
7007   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7008   // Add a patch entry and return the label.
7009   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7010   PatchInfo<Label>* info = &jit_string_patches_.back();
7011   return &info->label;
7012 }
7013 
7014 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7015 // move.
VisitLoadString(HLoadString * load)7016 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7017   LocationSummary* locations = load->GetLocations();
7018   Location out_loc = locations->Out();
7019   CpuRegister out = out_loc.AsRegister<CpuRegister>();
7020 
7021   switch (load->GetLoadKind()) {
7022     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7023       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7024              codegen_->GetCompilerOptions().IsBootImageExtension());
7025       __ leal(out,
7026               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
7027       codegen_->RecordBootImageStringPatch(load);
7028       return;
7029     }
7030     case HLoadString::LoadKind::kBootImageRelRo: {
7031       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7032       __ movl(out,
7033               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
7034       codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
7035       return;
7036     }
7037     case HLoadString::LoadKind::kBssEntry: {
7038       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
7039                                           /* no_rip= */ false);
7040       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7041       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
7042       GenerateGcRootFieldLoad(
7043           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7044       // No need for memory fence, thanks to the x86-64 memory model.
7045       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
7046       codegen_->AddSlowPath(slow_path);
7047       __ testl(out, out);
7048       __ j(kEqual, slow_path->GetEntryLabel());
7049       __ Bind(slow_path->GetExitLabel());
7050       return;
7051     }
7052     case HLoadString::LoadKind::kJitBootImageAddress: {
7053       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7054       DCHECK_NE(address, 0u);
7055       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
7056       return;
7057     }
7058     case HLoadString::LoadKind::kJitTableAddress: {
7059       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
7060                                           /* no_rip= */ true);
7061       Label* fixup_label = codegen_->NewJitRootStringPatch(
7062           load->GetDexFile(), load->GetStringIndex(), load->GetString());
7063       // /* GcRoot<mirror::String> */ out = *address
7064       GenerateGcRootFieldLoad(
7065           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7066       return;
7067     }
7068     default:
7069       break;
7070   }
7071 
7072   // Custom calling convention: RAX serves as both input and output.
7073   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
7074   codegen_->InvokeRuntime(kQuickResolveString,
7075                           load,
7076                           load->GetDexPc());
7077   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7078 }
7079 
GetExceptionTlsAddress()7080 static Address GetExceptionTlsAddress() {
7081   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
7082                            /* no_rip= */ true);
7083 }
7084 
VisitLoadException(HLoadException * load)7085 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
7086   LocationSummary* locations =
7087       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7088   locations->SetOut(Location::RequiresRegister());
7089 }
7090 
VisitLoadException(HLoadException * load)7091 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
7092   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
7093 }
7094 
VisitClearException(HClearException * clear)7095 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
7096   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7097 }
7098 
VisitClearException(HClearException * clear)7099 void InstructionCodeGeneratorX86_64::VisitClearException([[maybe_unused]] HClearException* clear) {
7100   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
7101 }
7102 
VisitThrow(HThrow * instruction)7103 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
7104   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7105       instruction, LocationSummary::kCallOnMainOnly);
7106   InvokeRuntimeCallingConvention calling_convention;
7107   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7108 }
7109 
VisitThrow(HThrow * instruction)7110 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
7111   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7112   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7113 }
7114 
7115 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7116 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7117   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7118     return 1;
7119   }
7120   if (emit_read_barrier &&
7121       !kUseBakerReadBarrier &&
7122       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7123        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7124        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7125     return 1;
7126   }
7127   return 0;
7128 }
7129 
7130 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7131 // interface pointer, the current interface is compared in memory.
7132 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7133 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7134   return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
7135 }
7136 
VisitInstanceOf(HInstanceOf * instruction)7137 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7138   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7139   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7140   bool baker_read_barrier_slow_path = false;
7141   switch (type_check_kind) {
7142     case TypeCheckKind::kExactCheck:
7143     case TypeCheckKind::kAbstractClassCheck:
7144     case TypeCheckKind::kClassHierarchyCheck:
7145     case TypeCheckKind::kArrayObjectCheck:
7146     case TypeCheckKind::kInterfaceCheck: {
7147       bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
7148       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7149       baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
7150                                      (type_check_kind != TypeCheckKind::kInterfaceCheck);
7151       break;
7152     }
7153     case TypeCheckKind::kArrayCheck:
7154     case TypeCheckKind::kUnresolvedCheck:
7155       call_kind = LocationSummary::kCallOnSlowPath;
7156       break;
7157     case TypeCheckKind::kBitstringCheck:
7158       break;
7159   }
7160 
7161   LocationSummary* locations =
7162       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7163   if (baker_read_barrier_slow_path) {
7164     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7165   }
7166   locations->SetInAt(0, Location::RequiresRegister());
7167   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7168     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7169     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7170     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7171   } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7172     locations->SetInAt(1, Location::RequiresRegister());
7173   } else {
7174     locations->SetInAt(1, Location::Any());
7175   }
7176   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
7177   locations->SetOut(Location::RequiresRegister());
7178   locations->AddRegisterTemps(
7179       NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
7180 }
7181 
VisitInstanceOf(HInstanceOf * instruction)7182 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7183   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7184   LocationSummary* locations = instruction->GetLocations();
7185   Location obj_loc = locations->InAt(0);
7186   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7187   Location cls = locations->InAt(1);
7188   Location out_loc =  locations->Out();
7189   CpuRegister out = out_loc.AsRegister<CpuRegister>();
7190   const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
7191   DCHECK_LE(num_temps, 1u);
7192   Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
7193   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7194   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7195   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7196   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7197   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7198   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7199   const uint32_t object_array_data_offset =
7200       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7201   SlowPathCode* slow_path = nullptr;
7202   NearLabel done, zero;
7203 
7204   // Return 0 if `obj` is null.
7205   // Avoid null check if we know obj is not null.
7206   if (instruction->MustDoNullCheck()) {
7207     __ testl(obj, obj);
7208     __ j(kEqual, &zero);
7209   }
7210 
7211   switch (type_check_kind) {
7212     case TypeCheckKind::kExactCheck: {
7213       ReadBarrierOption read_barrier_option =
7214           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7215       // /* HeapReference<Class> */ out = obj->klass_
7216       GenerateReferenceLoadTwoRegisters(instruction,
7217                                         out_loc,
7218                                         obj_loc,
7219                                         class_offset,
7220                                         read_barrier_option);
7221       if (cls.IsRegister()) {
7222         __ cmpl(out, cls.AsRegister<CpuRegister>());
7223       } else {
7224         DCHECK(cls.IsStackSlot()) << cls;
7225         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7226       }
7227       if (zero.IsLinked()) {
7228         // Classes must be equal for the instanceof to succeed.
7229         __ j(kNotEqual, &zero);
7230         __ movl(out, Immediate(1));
7231         __ jmp(&done);
7232       } else {
7233         __ setcc(kEqual, out);
7234         // setcc only sets the low byte.
7235         __ andl(out, Immediate(1));
7236       }
7237       break;
7238     }
7239 
7240     case TypeCheckKind::kAbstractClassCheck: {
7241       ReadBarrierOption read_barrier_option =
7242           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7243       // /* HeapReference<Class> */ out = obj->klass_
7244       GenerateReferenceLoadTwoRegisters(instruction,
7245                                         out_loc,
7246                                         obj_loc,
7247                                         class_offset,
7248                                         read_barrier_option);
7249       // If the class is abstract, we eagerly fetch the super class of the
7250       // object to avoid doing a comparison we know will fail.
7251       NearLabel loop, success;
7252       __ Bind(&loop);
7253       // /* HeapReference<Class> */ out = out->super_class_
7254       GenerateReferenceLoadOneRegister(instruction,
7255                                        out_loc,
7256                                        super_offset,
7257                                        maybe_temp_loc,
7258                                        read_barrier_option);
7259       __ testl(out, out);
7260       // If `out` is null, we use it for the result, and jump to `done`.
7261       __ j(kEqual, &done);
7262       if (cls.IsRegister()) {
7263         __ cmpl(out, cls.AsRegister<CpuRegister>());
7264       } else {
7265         DCHECK(cls.IsStackSlot()) << cls;
7266         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7267       }
7268       __ j(kNotEqual, &loop);
7269       __ movl(out, Immediate(1));
7270       if (zero.IsLinked()) {
7271         __ jmp(&done);
7272       }
7273       break;
7274     }
7275 
7276     case TypeCheckKind::kClassHierarchyCheck: {
7277       ReadBarrierOption read_barrier_option =
7278           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7279       // /* HeapReference<Class> */ out = obj->klass_
7280       GenerateReferenceLoadTwoRegisters(instruction,
7281                                         out_loc,
7282                                         obj_loc,
7283                                         class_offset,
7284                                         read_barrier_option);
7285       // Walk over the class hierarchy to find a match.
7286       NearLabel loop, success;
7287       __ Bind(&loop);
7288       if (cls.IsRegister()) {
7289         __ cmpl(out, cls.AsRegister<CpuRegister>());
7290       } else {
7291         DCHECK(cls.IsStackSlot()) << cls;
7292         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7293       }
7294       __ j(kEqual, &success);
7295       // /* HeapReference<Class> */ out = out->super_class_
7296       GenerateReferenceLoadOneRegister(instruction,
7297                                        out_loc,
7298                                        super_offset,
7299                                        maybe_temp_loc,
7300                                        read_barrier_option);
7301       __ testl(out, out);
7302       __ j(kNotEqual, &loop);
7303       // If `out` is null, we use it for the result, and jump to `done`.
7304       __ jmp(&done);
7305       __ Bind(&success);
7306       __ movl(out, Immediate(1));
7307       if (zero.IsLinked()) {
7308         __ jmp(&done);
7309       }
7310       break;
7311     }
7312 
7313     case TypeCheckKind::kArrayObjectCheck: {
7314       ReadBarrierOption read_barrier_option =
7315           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7316       // /* HeapReference<Class> */ out = obj->klass_
7317       GenerateReferenceLoadTwoRegisters(instruction,
7318                                         out_loc,
7319                                         obj_loc,
7320                                         class_offset,
7321                                         read_barrier_option);
7322       // Do an exact check.
7323       NearLabel exact_check;
7324       if (cls.IsRegister()) {
7325         __ cmpl(out, cls.AsRegister<CpuRegister>());
7326       } else {
7327         DCHECK(cls.IsStackSlot()) << cls;
7328         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7329       }
7330       __ j(kEqual, &exact_check);
7331       // Otherwise, we need to check that the object's class is a non-primitive array.
7332       // /* HeapReference<Class> */ out = out->component_type_
7333       GenerateReferenceLoadOneRegister(instruction,
7334                                        out_loc,
7335                                        component_offset,
7336                                        maybe_temp_loc,
7337                                        read_barrier_option);
7338       __ testl(out, out);
7339       // If `out` is null, we use it for the result, and jump to `done`.
7340       __ j(kEqual, &done);
7341       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7342       __ j(kNotEqual, &zero);
7343       __ Bind(&exact_check);
7344       __ movl(out, Immediate(1));
7345       __ jmp(&done);
7346       break;
7347     }
7348 
7349     case TypeCheckKind::kArrayCheck: {
7350       // No read barrier since the slow path will retry upon failure.
7351       // /* HeapReference<Class> */ out = obj->klass_
7352       GenerateReferenceLoadTwoRegisters(instruction,
7353                                         out_loc,
7354                                         obj_loc,
7355                                         class_offset,
7356                                         kWithoutReadBarrier);
7357       if (cls.IsRegister()) {
7358         __ cmpl(out, cls.AsRegister<CpuRegister>());
7359       } else {
7360         DCHECK(cls.IsStackSlot()) << cls;
7361         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7362       }
7363       DCHECK(locations->OnlyCallsOnSlowPath());
7364       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7365           instruction, /* is_fatal= */ false);
7366       codegen_->AddSlowPath(slow_path);
7367       __ j(kNotEqual, slow_path->GetEntryLabel());
7368       __ movl(out, Immediate(1));
7369       if (zero.IsLinked()) {
7370         __ jmp(&done);
7371       }
7372       break;
7373     }
7374 
7375     case TypeCheckKind::kInterfaceCheck: {
7376       if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
7377         DCHECK(locations->OnlyCallsOnSlowPath());
7378         slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7379             instruction, /* is_fatal= */ false);
7380         codegen_->AddSlowPath(slow_path);
7381         if (codegen_->EmitNonBakerReadBarrier()) {
7382           __ jmp(slow_path->GetEntryLabel());
7383           break;
7384         }
7385         // For Baker read barrier, take the slow path while marking.
7386         __ gs()->cmpl(
7387             Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
7388             Immediate(0));
7389         __ j(kNotEqual, slow_path->GetEntryLabel());
7390       }
7391 
7392       // Fast-path without read barriers.
7393       CpuRegister temp = maybe_temp_loc.AsRegister<CpuRegister>();
7394       // /* HeapReference<Class> */ temp = obj->klass_
7395       __ movl(temp, Address(obj, class_offset));
7396       __ MaybeUnpoisonHeapReference(temp);
7397       // /* HeapReference<Class> */ temp = temp->iftable_
7398       __ movl(temp, Address(temp, iftable_offset));
7399       __ MaybeUnpoisonHeapReference(temp);
7400       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7401       __ movl(out, Address(temp, array_length_offset));
7402       // Maybe poison the `cls` for direct comparison with memory.
7403       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7404       // Loop through the iftable and check if any class matches.
7405       NearLabel loop, end;
7406       __ Bind(&loop);
7407       // Check if we still have an entry to compare.
7408       __ subl(out, Immediate(2));
7409       __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end);
7410       // Go to next interface if the classes do not match.
7411       __ cmpl(cls.AsRegister<CpuRegister>(),
7412               CodeGeneratorX86_64::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset));
7413       __ j(kNotEqual, &loop);
7414       if (zero.IsLinked()) {
7415         __ movl(out, Immediate(1));
7416         // If `cls` was poisoned above, unpoison it.
7417         __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7418         __ jmp(&done);
7419         if (kPoisonHeapReferences) {
7420           // The false case needs to unpoison the class before jumping to `zero`.
7421           __ Bind(&end);
7422           __ UnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7423           __ jmp(&zero);
7424         }
7425       } else {
7426         // To reduce branching, use the fact that the false case branches with a `-2` in `out`.
7427         __ movl(out, Immediate(-1));
7428         __ Bind(&end);
7429         __ addl(out, Immediate(2));
7430         // If `cls` was poisoned above, unpoison it.
7431         __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7432       }
7433       break;
7434     }
7435 
7436     case TypeCheckKind::kUnresolvedCheck: {
7437       // Note that we indeed only call on slow path, but we always go
7438       // into the slow path for the unresolved check case.
7439       //
7440       // We cannot directly call the InstanceofNonTrivial runtime
7441       // entry point without resorting to a type checking slow path
7442       // here (i.e. by calling InvokeRuntime directly), as it would
7443       // require to assign fixed registers for the inputs of this
7444       // HInstanceOf instruction (following the runtime calling
7445       // convention), which might be cluttered by the potential first
7446       // read barrier emission at the beginning of this method.
7447       //
7448       // TODO: Introduce a new runtime entry point taking the object
7449       // to test (instead of its class) as argument, and let it deal
7450       // with the read barrier issues. This will let us refactor this
7451       // case of the `switch` code as it was previously (with a direct
7452       // call to the runtime not using a type checking slow path).
7453       // This should also be beneficial for the other cases above.
7454       DCHECK(locations->OnlyCallsOnSlowPath());
7455       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7456           instruction, /* is_fatal= */ false);
7457       codegen_->AddSlowPath(slow_path);
7458       __ jmp(slow_path->GetEntryLabel());
7459       break;
7460     }
7461 
7462     case TypeCheckKind::kBitstringCheck: {
7463       // /* HeapReference<Class> */ temp = obj->klass_
7464       GenerateReferenceLoadTwoRegisters(instruction,
7465                                         out_loc,
7466                                         obj_loc,
7467                                         class_offset,
7468                                         kWithoutReadBarrier);
7469 
7470       GenerateBitstringTypeCheckCompare(instruction, out);
7471       if (zero.IsLinked()) {
7472         __ j(kNotEqual, &zero);
7473         __ movl(out, Immediate(1));
7474         __ jmp(&done);
7475       } else {
7476         __ setcc(kEqual, out);
7477         // setcc only sets the low byte.
7478         __ andl(out, Immediate(1));
7479       }
7480       break;
7481     }
7482   }
7483 
7484   if (zero.IsLinked()) {
7485     __ Bind(&zero);
7486     __ xorl(out, out);
7487   }
7488 
7489   if (done.IsLinked()) {
7490     __ Bind(&done);
7491   }
7492 
7493   if (slow_path != nullptr) {
7494     __ Bind(slow_path->GetExitLabel());
7495   }
7496 }
7497 
VisitCheckCast(HCheckCast * instruction)7498 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
7499   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7500   LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
7501   LocationSummary* locations =
7502       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7503   locations->SetInAt(0, Location::RequiresRegister());
7504   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7505     // Require a register for the interface check since there is a loop that compares the class to
7506     // a memory address.
7507     locations->SetInAt(1, Location::RequiresRegister());
7508   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7509     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7510     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7511     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7512   } else {
7513     locations->SetInAt(1, Location::Any());
7514   }
7515   locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
7516 }
7517 
VisitCheckCast(HCheckCast * instruction)7518 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
7519   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7520   LocationSummary* locations = instruction->GetLocations();
7521   Location obj_loc = locations->InAt(0);
7522   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7523   Location cls = locations->InAt(1);
7524   Location temp_loc = locations->GetTemp(0);
7525   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
7526   const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
7527   DCHECK_GE(num_temps, 1u);
7528   DCHECK_LE(num_temps, 2u);
7529   Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
7530   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7531   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7532   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7533   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7534   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7535   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7536   const uint32_t object_array_data_offset =
7537       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7538 
7539   bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
7540   SlowPathCode* type_check_slow_path =
7541       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7542           instruction, is_type_check_slow_path_fatal);
7543   codegen_->AddSlowPath(type_check_slow_path);
7544 
7545 
7546   NearLabel done;
7547   // Avoid null check if we know obj is not null.
7548   if (instruction->MustDoNullCheck()) {
7549     __ testl(obj, obj);
7550     __ j(kEqual, &done);
7551   }
7552 
7553   switch (type_check_kind) {
7554     case TypeCheckKind::kExactCheck:
7555     case TypeCheckKind::kArrayCheck: {
7556       // /* HeapReference<Class> */ temp = obj->klass_
7557       GenerateReferenceLoadTwoRegisters(instruction,
7558                                         temp_loc,
7559                                         obj_loc,
7560                                         class_offset,
7561                                         kWithoutReadBarrier);
7562       if (cls.IsRegister()) {
7563         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7564       } else {
7565         DCHECK(cls.IsStackSlot()) << cls;
7566         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7567       }
7568       // Jump to slow path for throwing the exception or doing a
7569       // more involved array check.
7570       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7571       break;
7572     }
7573 
7574     case TypeCheckKind::kAbstractClassCheck: {
7575       // /* HeapReference<Class> */ temp = obj->klass_
7576       GenerateReferenceLoadTwoRegisters(instruction,
7577                                         temp_loc,
7578                                         obj_loc,
7579                                         class_offset,
7580                                         kWithoutReadBarrier);
7581       // If the class is abstract, we eagerly fetch the super class of the
7582       // object to avoid doing a comparison we know will fail.
7583       NearLabel loop;
7584       __ Bind(&loop);
7585       // /* HeapReference<Class> */ temp = temp->super_class_
7586       GenerateReferenceLoadOneRegister(instruction,
7587                                        temp_loc,
7588                                        super_offset,
7589                                        maybe_temp2_loc,
7590                                        kWithoutReadBarrier);
7591 
7592       // If the class reference currently in `temp` is null, jump to the slow path to throw the
7593       // exception.
7594       __ testl(temp, temp);
7595       // Otherwise, compare the classes.
7596       __ j(kZero, type_check_slow_path->GetEntryLabel());
7597       if (cls.IsRegister()) {
7598         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7599       } else {
7600         DCHECK(cls.IsStackSlot()) << cls;
7601         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7602       }
7603       __ j(kNotEqual, &loop);
7604       break;
7605     }
7606 
7607     case TypeCheckKind::kClassHierarchyCheck: {
7608       // /* HeapReference<Class> */ temp = obj->klass_
7609       GenerateReferenceLoadTwoRegisters(instruction,
7610                                         temp_loc,
7611                                         obj_loc,
7612                                         class_offset,
7613                                         kWithoutReadBarrier);
7614       // Walk over the class hierarchy to find a match.
7615       NearLabel loop;
7616       __ Bind(&loop);
7617       if (cls.IsRegister()) {
7618         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7619       } else {
7620         DCHECK(cls.IsStackSlot()) << cls;
7621         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7622       }
7623       __ j(kEqual, &done);
7624 
7625       // /* HeapReference<Class> */ temp = temp->super_class_
7626       GenerateReferenceLoadOneRegister(instruction,
7627                                        temp_loc,
7628                                        super_offset,
7629                                        maybe_temp2_loc,
7630                                        kWithoutReadBarrier);
7631 
7632       // If the class reference currently in `temp` is not null, jump
7633       // back at the beginning of the loop.
7634       __ testl(temp, temp);
7635       __ j(kNotZero, &loop);
7636       // Otherwise, jump to the slow path to throw the exception.
7637       __ jmp(type_check_slow_path->GetEntryLabel());
7638       break;
7639     }
7640 
7641     case TypeCheckKind::kArrayObjectCheck: {
7642       // /* HeapReference<Class> */ temp = obj->klass_
7643       GenerateReferenceLoadTwoRegisters(instruction,
7644                                         temp_loc,
7645                                         obj_loc,
7646                                         class_offset,
7647                                         kWithoutReadBarrier);
7648       // Do an exact check.
7649       NearLabel check_non_primitive_component_type;
7650       if (cls.IsRegister()) {
7651         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7652       } else {
7653         DCHECK(cls.IsStackSlot()) << cls;
7654         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7655       }
7656       __ j(kEqual, &done);
7657 
7658       // Otherwise, we need to check that the object's class is a non-primitive array.
7659       // /* HeapReference<Class> */ temp = temp->component_type_
7660       GenerateReferenceLoadOneRegister(instruction,
7661                                        temp_loc,
7662                                        component_offset,
7663                                        maybe_temp2_loc,
7664                                        kWithoutReadBarrier);
7665 
7666       // If the component type is not null (i.e. the object is indeed
7667       // an array), jump to label `check_non_primitive_component_type`
7668       // to further check that this component type is not a primitive
7669       // type.
7670       __ testl(temp, temp);
7671       // Otherwise, jump to the slow path to throw the exception.
7672       __ j(kZero, type_check_slow_path->GetEntryLabel());
7673       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7674       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7675       break;
7676     }
7677 
7678     case TypeCheckKind::kUnresolvedCheck: {
7679       // We always go into the type check slow path for the unresolved case.
7680       //
7681       // We cannot directly call the CheckCast runtime entry point
7682       // without resorting to a type checking slow path here (i.e. by
7683       // calling InvokeRuntime directly), as it would require to
7684       // assign fixed registers for the inputs of this HInstanceOf
7685       // instruction (following the runtime calling convention), which
7686       // might be cluttered by the potential first read barrier
7687       // emission at the beginning of this method.
7688       __ jmp(type_check_slow_path->GetEntryLabel());
7689       break;
7690     }
7691 
7692     case TypeCheckKind::kInterfaceCheck: {
7693       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7694       // We can not get false positives by doing this.
7695       // /* HeapReference<Class> */ temp = obj->klass_
7696       GenerateReferenceLoadTwoRegisters(instruction,
7697                                         temp_loc,
7698                                         obj_loc,
7699                                         class_offset,
7700                                         kWithoutReadBarrier);
7701 
7702       // /* HeapReference<Class> */ temp = temp->iftable_
7703       GenerateReferenceLoadOneRegister(instruction,
7704                                        temp_loc,
7705                                        iftable_offset,
7706                                        maybe_temp2_loc,
7707                                        kWithoutReadBarrier);
7708       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7709       __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
7710       // Maybe poison the `cls` for direct comparison with memory.
7711       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7712       // Loop through the iftable and check if any class matches.
7713       NearLabel start_loop;
7714       __ Bind(&start_loop);
7715       // Check if we still have an entry to compare.
7716       __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
7717       __ j(kNegative, type_check_slow_path->GetEntryLabel());
7718       // Go to next interface if the classes do not match.
7719       __ cmpl(cls.AsRegister<CpuRegister>(),
7720               CodeGeneratorX86_64::ArrayAddress(temp,
7721                                                 maybe_temp2_loc,
7722                                                 TIMES_4,
7723                                                 object_array_data_offset));
7724       __ j(kNotEqual, &start_loop);  // Return if same class.
7725       // If `cls` was poisoned above, unpoison it.
7726       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7727       break;
7728     }
7729 
7730     case TypeCheckKind::kBitstringCheck: {
7731       // /* HeapReference<Class> */ temp = obj->klass_
7732       GenerateReferenceLoadTwoRegisters(instruction,
7733                                         temp_loc,
7734                                         obj_loc,
7735                                         class_offset,
7736                                         kWithoutReadBarrier);
7737 
7738       GenerateBitstringTypeCheckCompare(instruction, temp);
7739       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7740       break;
7741     }
7742   }
7743 
7744   if (done.IsLinked()) {
7745     __ Bind(&done);
7746   }
7747 
7748   __ Bind(type_check_slow_path->GetExitLabel());
7749 }
7750 
VisitMonitorOperation(HMonitorOperation * instruction)7751 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7752   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7753       instruction, LocationSummary::kCallOnMainOnly);
7754   InvokeRuntimeCallingConvention calling_convention;
7755   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7756 }
7757 
VisitMonitorOperation(HMonitorOperation * instruction)7758 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7759   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7760                           instruction,
7761                           instruction->GetDexPc());
7762   if (instruction->IsEnter()) {
7763     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7764   } else {
7765     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7766   }
7767 }
7768 
VisitX86AndNot(HX86AndNot * instruction)7769 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7770   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7771   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7772   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7773   locations->SetInAt(0, Location::RequiresRegister());
7774   // There is no immediate variant of negated bitwise and in X86.
7775   locations->SetInAt(1, Location::RequiresRegister());
7776   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7777 }
7778 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7779 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7780   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7781   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7782   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7783   locations->SetInAt(0, Location::RequiresRegister());
7784   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7785 }
7786 
VisitX86AndNot(HX86AndNot * instruction)7787 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7788   LocationSummary* locations = instruction->GetLocations();
7789   Location first = locations->InAt(0);
7790   Location second = locations->InAt(1);
7791   Location dest = locations->Out();
7792   __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7793 }
7794 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7795 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7796   LocationSummary* locations = instruction->GetLocations();
7797   Location src = locations->InAt(0);
7798   Location dest = locations->Out();
7799   switch (instruction->GetOpKind()) {
7800     case HInstruction::kAnd:
7801       __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7802       break;
7803     case HInstruction::kXor:
7804       __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7805       break;
7806     default:
7807       LOG(FATAL) << "Unreachable";
7808   }
7809 }
7810 
VisitAnd(HAnd * instruction)7811 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7812 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7813 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7814 
HandleBitwiseOperation(HBinaryOperation * instruction)7815 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7816   LocationSummary* locations =
7817       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7818   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7819          || instruction->GetResultType() == DataType::Type::kInt64);
7820   locations->SetInAt(0, Location::RequiresRegister());
7821   locations->SetInAt(1, Location::Any());
7822   locations->SetOut(Location::SameAsFirstInput());
7823 }
7824 
VisitAnd(HAnd * instruction)7825 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7826   HandleBitwiseOperation(instruction);
7827 }
7828 
VisitOr(HOr * instruction)7829 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7830   HandleBitwiseOperation(instruction);
7831 }
7832 
VisitXor(HXor * instruction)7833 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7834   HandleBitwiseOperation(instruction);
7835 }
7836 
HandleBitwiseOperation(HBinaryOperation * instruction)7837 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7838   LocationSummary* locations = instruction->GetLocations();
7839   Location first = locations->InAt(0);
7840   Location second = locations->InAt(1);
7841   DCHECK(first.Equals(locations->Out()));
7842 
7843   if (instruction->GetResultType() == DataType::Type::kInt32) {
7844     if (second.IsRegister()) {
7845       if (instruction->IsAnd()) {
7846         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7847       } else if (instruction->IsOr()) {
7848         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7849       } else {
7850         DCHECK(instruction->IsXor());
7851         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7852       }
7853     } else if (second.IsConstant()) {
7854       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7855       if (instruction->IsAnd()) {
7856         __ andl(first.AsRegister<CpuRegister>(), imm);
7857       } else if (instruction->IsOr()) {
7858         __ orl(first.AsRegister<CpuRegister>(), imm);
7859       } else {
7860         DCHECK(instruction->IsXor());
7861         __ xorl(first.AsRegister<CpuRegister>(), imm);
7862       }
7863     } else {
7864       Address address(CpuRegister(RSP), second.GetStackIndex());
7865       if (instruction->IsAnd()) {
7866         __ andl(first.AsRegister<CpuRegister>(), address);
7867       } else if (instruction->IsOr()) {
7868         __ orl(first.AsRegister<CpuRegister>(), address);
7869       } else {
7870         DCHECK(instruction->IsXor());
7871         __ xorl(first.AsRegister<CpuRegister>(), address);
7872       }
7873     }
7874   } else {
7875     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7876     CpuRegister first_reg = first.AsRegister<CpuRegister>();
7877     bool second_is_constant = false;
7878     int64_t value = 0;
7879     if (second.IsConstant()) {
7880       second_is_constant = true;
7881       value = second.GetConstant()->AsLongConstant()->GetValue();
7882     }
7883     bool is_int32_value = IsInt<32>(value);
7884 
7885     if (instruction->IsAnd()) {
7886       if (second_is_constant) {
7887         if (is_int32_value) {
7888           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7889         } else {
7890           __ andq(first_reg, codegen_->LiteralInt64Address(value));
7891         }
7892       } else if (second.IsDoubleStackSlot()) {
7893         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7894       } else {
7895         __ andq(first_reg, second.AsRegister<CpuRegister>());
7896       }
7897     } else if (instruction->IsOr()) {
7898       if (second_is_constant) {
7899         if (is_int32_value) {
7900           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7901         } else {
7902           __ orq(first_reg, codegen_->LiteralInt64Address(value));
7903         }
7904       } else if (second.IsDoubleStackSlot()) {
7905         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7906       } else {
7907         __ orq(first_reg, second.AsRegister<CpuRegister>());
7908       }
7909     } else {
7910       DCHECK(instruction->IsXor());
7911       if (second_is_constant) {
7912         if (is_int32_value) {
7913           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7914         } else {
7915           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7916         }
7917       } else if (second.IsDoubleStackSlot()) {
7918         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7919       } else {
7920         __ xorq(first_reg, second.AsRegister<CpuRegister>());
7921       }
7922     }
7923   }
7924 }
7925 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7926 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7927     HInstruction* instruction,
7928     Location out,
7929     uint32_t offset,
7930     Location maybe_temp,
7931     ReadBarrierOption read_barrier_option) {
7932   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7933   if (read_barrier_option == kWithReadBarrier) {
7934     DCHECK(codegen_->EmitReadBarrier());
7935     if (kUseBakerReadBarrier) {
7936       // Load with fast path based Baker's read barrier.
7937       // /* HeapReference<Object> */ out = *(out + offset)
7938       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7939           instruction, out, out_reg, offset, /* needs_null_check= */ false);
7940     } else {
7941       // Load with slow path based read barrier.
7942       // Save the value of `out` into `maybe_temp` before overwriting it
7943       // in the following move operation, as we will need it for the
7944       // read barrier below.
7945       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7946       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7947       // /* HeapReference<Object> */ out = *(out + offset)
7948       __ movl(out_reg, Address(out_reg, offset));
7949       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7950     }
7951   } else {
7952     // Plain load with no read barrier.
7953     // /* HeapReference<Object> */ out = *(out + offset)
7954     __ movl(out_reg, Address(out_reg, offset));
7955     __ MaybeUnpoisonHeapReference(out_reg);
7956   }
7957 }
7958 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7959 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7960     HInstruction* instruction,
7961     Location out,
7962     Location obj,
7963     uint32_t offset,
7964     ReadBarrierOption read_barrier_option) {
7965   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7966   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7967   if (read_barrier_option == kWithReadBarrier) {
7968     DCHECK(codegen_->EmitReadBarrier());
7969     if (kUseBakerReadBarrier) {
7970       // Load with fast path based Baker's read barrier.
7971       // /* HeapReference<Object> */ out = *(obj + offset)
7972       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7973           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7974     } else {
7975       // Load with slow path based read barrier.
7976       // /* HeapReference<Object> */ out = *(obj + offset)
7977       __ movl(out_reg, Address(obj_reg, offset));
7978       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7979     }
7980   } else {
7981     // Plain load with no read barrier.
7982     // /* HeapReference<Object> */ out = *(obj + offset)
7983     __ movl(out_reg, Address(obj_reg, offset));
7984     __ MaybeUnpoisonHeapReference(out_reg);
7985   }
7986 }
7987 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7988 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7989     HInstruction* instruction,
7990     Location root,
7991     const Address& address,
7992     Label* fixup_label,
7993     ReadBarrierOption read_barrier_option) {
7994   CpuRegister root_reg = root.AsRegister<CpuRegister>();
7995   if (read_barrier_option == kWithReadBarrier) {
7996     DCHECK(codegen_->EmitReadBarrier());
7997     if (kUseBakerReadBarrier) {
7998       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7999       // Baker's read barrier are used:
8000       //
8001       //   root = obj.field;
8002       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
8003       //   if (temp != null) {
8004       //     root = temp(root)
8005       //   }
8006 
8007       // /* GcRoot<mirror::Object> */ root = *address
8008       __ movl(root_reg, address);
8009       if (fixup_label != nullptr) {
8010         __ Bind(fixup_label);
8011       }
8012       static_assert(
8013           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
8014           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
8015           "have different sizes.");
8016       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
8017                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
8018                     "have different sizes.");
8019 
8020       // Slow path marking the GC root `root`.
8021       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
8022           instruction, root, /* unpoison_ref_before_marking= */ false);
8023       codegen_->AddSlowPath(slow_path);
8024 
8025       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
8026       const int32_t entry_point_offset =
8027           Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
8028       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
8029       // The entrypoint is null when the GC is not marking.
8030       __ j(kNotEqual, slow_path->GetEntryLabel());
8031       __ Bind(slow_path->GetExitLabel());
8032     } else {
8033       // GC root loaded through a slow path for read barriers other
8034       // than Baker's.
8035       // /* GcRoot<mirror::Object>* */ root = address
8036       __ leaq(root_reg, address);
8037       if (fixup_label != nullptr) {
8038         __ Bind(fixup_label);
8039       }
8040       // /* mirror::Object* */ root = root->Read()
8041       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8042     }
8043   } else {
8044     // Plain GC root load with no read barrier.
8045     // /* GcRoot<mirror::Object> */ root = *address
8046     __ movl(root_reg, address);
8047     if (fixup_label != nullptr) {
8048       __ Bind(fixup_label);
8049     }
8050     // Note that GC roots are not affected by heap poisoning, thus we
8051     // do not have to unpoison `root_reg` here.
8052   }
8053 }
8054 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)8055 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8056                                                                 Location ref,
8057                                                                 CpuRegister obj,
8058                                                                 uint32_t offset,
8059                                                                 bool needs_null_check) {
8060   DCHECK(EmitBakerReadBarrier());
8061 
8062   // /* HeapReference<Object> */ ref = *(obj + offset)
8063   Address src(obj, offset);
8064   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8065 }
8066 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)8067 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8068                                                                 Location ref,
8069                                                                 CpuRegister obj,
8070                                                                 uint32_t data_offset,
8071                                                                 Location index,
8072                                                                 bool needs_null_check) {
8073   DCHECK(EmitBakerReadBarrier());
8074 
8075   static_assert(
8076       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8077       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8078   // /* HeapReference<Object> */ ref =
8079   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
8080   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
8081   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8082 }
8083 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)8084 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8085                                                                     Location ref,
8086                                                                     CpuRegister obj,
8087                                                                     const Address& src,
8088                                                                     bool needs_null_check,
8089                                                                     bool always_update_field,
8090                                                                     CpuRegister* temp1,
8091                                                                     CpuRegister* temp2) {
8092   DCHECK(EmitBakerReadBarrier());
8093 
8094   // In slow path based read barriers, the read barrier call is
8095   // inserted after the original load. However, in fast path based
8096   // Baker's read barriers, we need to perform the load of
8097   // mirror::Object::monitor_ *before* the original reference load.
8098   // This load-load ordering is required by the read barrier.
8099   // The fast path/slow path (for Baker's algorithm) should look like:
8100   //
8101   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8102   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
8103   //   HeapReference<Object> ref = *src;  // Original reference load.
8104   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
8105   //   if (is_gray) {
8106   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
8107   //   }
8108   //
8109   // Note: the original implementation in ReadBarrier::Barrier is
8110   // slightly more complex as:
8111   // - it implements the load-load fence using a data dependency on
8112   //   the high-bits of rb_state, which are expected to be all zeroes
8113   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
8114   //   here, which is a no-op thanks to the x86-64 memory model);
8115   // - it performs additional checks that we do not do here for
8116   //   performance reasons.
8117 
8118   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
8119   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8120 
8121   // Given the numeric representation, it's enough to check the low bit of the rb_state.
8122   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8123   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8124   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8125   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8126   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8127 
8128   // if (rb_state == ReadBarrier::GrayState())
8129   //   ref = ReadBarrier::Mark(ref);
8130   // At this point, just do the "if" and make sure that flags are preserved until the branch.
8131   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8132   if (needs_null_check) {
8133     MaybeRecordImplicitNullCheck(instruction);
8134   }
8135 
8136   // Load fence to prevent load-load reordering.
8137   // Note that this is a no-op, thanks to the x86-64 memory model.
8138   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8139 
8140   // The actual reference load.
8141   // /* HeapReference<Object> */ ref = *src
8142   __ movl(ref_reg, src);  // Flags are unaffected.
8143 
8144   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8145   // Slow path marking the object `ref` when it is gray.
8146   SlowPathCode* slow_path;
8147   if (always_update_field) {
8148     DCHECK(temp1 != nullptr);
8149     DCHECK(temp2 != nullptr);
8150     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
8151         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
8152   } else {
8153     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
8154         instruction, ref, /* unpoison_ref_before_marking= */ true);
8155   }
8156   AddSlowPath(slow_path);
8157 
8158   // We have done the "if" of the gray bit check above, now branch based on the flags.
8159   __ j(kNotZero, slow_path->GetEntryLabel());
8160 
8161   // Object* ref = ref_addr->AsMirrorPtr()
8162   __ MaybeUnpoisonHeapReference(ref_reg);
8163 
8164   __ Bind(slow_path->GetExitLabel());
8165 }
8166 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8167 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
8168                                                   Location out,
8169                                                   Location ref,
8170                                                   Location obj,
8171                                                   uint32_t offset,
8172                                                   Location index) {
8173   DCHECK(EmitReadBarrier());
8174 
8175   // Insert a slow path based read barrier *after* the reference load.
8176   //
8177   // If heap poisoning is enabled, the unpoisoning of the loaded
8178   // reference will be carried out by the runtime within the slow
8179   // path.
8180   //
8181   // Note that `ref` currently does not get unpoisoned (when heap
8182   // poisoning is enabled), which is alright as the `ref` argument is
8183   // not used by the artReadBarrierSlow entry point.
8184   //
8185   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8186   SlowPathCode* slow_path = new (GetScopedAllocator())
8187       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
8188   AddSlowPath(slow_path);
8189 
8190   __ jmp(slow_path->GetEntryLabel());
8191   __ Bind(slow_path->GetExitLabel());
8192 }
8193 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8194 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8195                                                        Location out,
8196                                                        Location ref,
8197                                                        Location obj,
8198                                                        uint32_t offset,
8199                                                        Location index) {
8200   if (EmitReadBarrier()) {
8201     // Baker's read barriers shall be handled by the fast path
8202     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
8203     DCHECK(!kUseBakerReadBarrier);
8204     // If heap poisoning is enabled, unpoisoning will be taken care of
8205     // by the runtime within the slow path.
8206     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8207   } else if (kPoisonHeapReferences) {
8208     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
8209   }
8210 }
8211 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8212 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8213                                                          Location out,
8214                                                          Location root) {
8215   DCHECK(EmitReadBarrier());
8216 
8217   // Insert a slow path based read barrier *after* the GC root load.
8218   //
8219   // Note that GC roots are not affected by heap poisoning, so we do
8220   // not need to do anything special for this here.
8221   SlowPathCode* slow_path =
8222       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
8223   AddSlowPath(slow_path);
8224 
8225   __ jmp(slow_path->GetEntryLabel());
8226   __ Bind(slow_path->GetExitLabel());
8227 }
8228 
VisitBoundType(HBoundType * instruction)8229 void LocationsBuilderX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8230   // Nothing to do, this should be removed during prepare for register allocator.
8231   LOG(FATAL) << "Unreachable";
8232 }
8233 
VisitBoundType(HBoundType * instruction)8234 void InstructionCodeGeneratorX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8235   // Nothing to do, this should be removed during prepare for register allocator.
8236   LOG(FATAL) << "Unreachable";
8237 }
8238 
8239 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8240 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8241   LocationSummary* locations =
8242       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8243   locations->SetInAt(0, Location::RequiresRegister());
8244   locations->AddRegisterTemps(2);
8245 }
8246 
VisitPackedSwitch(HPackedSwitch * switch_instr)8247 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8248   int32_t lower_bound = switch_instr->GetStartValue();
8249   uint32_t num_entries = switch_instr->GetNumEntries();
8250   LocationSummary* locations = switch_instr->GetLocations();
8251   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
8252   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
8253   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
8254   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8255 
8256   // Should we generate smaller inline compare/jumps?
8257   if (num_entries <= kPackedSwitchJumpTableThreshold) {
8258     // Figure out the correct compare values and jump conditions.
8259     // Handle the first compare/branch as a special case because it might
8260     // jump to the default case.
8261     DCHECK_GT(num_entries, 2u);
8262     Condition first_condition;
8263     uint32_t index;
8264     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
8265     if (lower_bound != 0) {
8266       first_condition = kLess;
8267       __ cmpl(value_reg_in, Immediate(lower_bound));
8268       __ j(first_condition, codegen_->GetLabelOf(default_block));
8269       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8270 
8271       index = 1;
8272     } else {
8273       // Handle all the compare/jumps below.
8274       first_condition = kBelow;
8275       index = 0;
8276     }
8277 
8278     // Handle the rest of the compare/jumps.
8279     for (; index + 1 < num_entries; index += 2) {
8280       int32_t compare_to_value = lower_bound + index + 1;
8281       __ cmpl(value_reg_in, Immediate(compare_to_value));
8282       // Jump to successors[index] if value < case_value[index].
8283       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8284       // Jump to successors[index + 1] if value == case_value[index + 1].
8285       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8286     }
8287 
8288     if (index != num_entries) {
8289       // There are an odd number of entries. Handle the last one.
8290       DCHECK_EQ(index + 1, num_entries);
8291       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
8292       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8293     }
8294 
8295     // And the default for any other value.
8296     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
8297       __ jmp(codegen_->GetLabelOf(default_block));
8298     }
8299     return;
8300   }
8301 
8302   // Remove the bias, if needed.
8303   Register value_reg_out = value_reg_in.AsRegister();
8304   if (lower_bound != 0) {
8305     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
8306     value_reg_out = temp_reg.AsRegister();
8307   }
8308   CpuRegister value_reg(value_reg_out);
8309 
8310   // Is the value in range?
8311   __ cmpl(value_reg, Immediate(num_entries - 1));
8312   __ j(kAbove, codegen_->GetLabelOf(default_block));
8313 
8314   // We are in the range of the table.
8315   // Load the address of the jump table in the constant area.
8316   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
8317 
8318   // Load the (signed) offset from the jump table.
8319   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
8320 
8321   // Add the offset to the address of the table base.
8322   __ addq(temp_reg, base_reg);
8323 
8324   // And jump.
8325   __ jmp(temp_reg);
8326 }
8327 
VisitIntermediateAddress(HIntermediateAddress * instruction)8328 void LocationsBuilderX86_64::VisitIntermediateAddress(
8329     [[maybe_unused]] HIntermediateAddress* instruction) {
8330   LOG(FATAL) << "Unreachable";
8331 }
8332 
VisitIntermediateAddress(HIntermediateAddress * instruction)8333 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(
8334     [[maybe_unused]] HIntermediateAddress* instruction) {
8335   LOG(FATAL) << "Unreachable";
8336 }
8337 
Load32BitValue(CpuRegister dest,int32_t value)8338 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
8339   if (value == 0) {
8340     __ xorl(dest, dest);
8341   } else {
8342     __ movl(dest, Immediate(value));
8343   }
8344 }
8345 
Load64BitValue(CpuRegister dest,int64_t value)8346 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
8347   if (value == 0) {
8348     // Clears upper bits too.
8349     __ xorl(dest, dest);
8350   } else if (IsUint<32>(value)) {
8351     // We can use a 32 bit move, as it will zero-extend and is shorter.
8352     __ movl(dest, Immediate(static_cast<int32_t>(value)));
8353   } else {
8354     __ movq(dest, Immediate(value));
8355   }
8356 }
8357 
Load32BitValue(XmmRegister dest,int32_t value)8358 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
8359   if (value == 0) {
8360     __ xorps(dest, dest);
8361   } else {
8362     __ movss(dest, LiteralInt32Address(value));
8363   }
8364 }
8365 
Load64BitValue(XmmRegister dest,int64_t value)8366 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
8367   if (value == 0) {
8368     __ xorpd(dest, dest);
8369   } else {
8370     __ movsd(dest, LiteralInt64Address(value));
8371   }
8372 }
8373 
Load32BitValue(XmmRegister dest,float value)8374 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
8375   Load32BitValue(dest, bit_cast<int32_t, float>(value));
8376 }
8377 
Load64BitValue(XmmRegister dest,double value)8378 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
8379   Load64BitValue(dest, bit_cast<int64_t, double>(value));
8380 }
8381 
Compare32BitValue(CpuRegister dest,int32_t value)8382 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
8383   if (value == 0) {
8384     __ testl(dest, dest);
8385   } else {
8386     __ cmpl(dest, Immediate(value));
8387   }
8388 }
8389 
Compare64BitValue(CpuRegister dest,int64_t value)8390 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
8391   if (IsInt<32>(value)) {
8392     if (value == 0) {
8393       __ testq(dest, dest);
8394     } else {
8395       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
8396     }
8397   } else {
8398     // Value won't fit in an int.
8399     __ cmpq(dest, LiteralInt64Address(value));
8400   }
8401 }
8402 
GenerateIntCompare(Location lhs,Location rhs)8403 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
8404   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8405   GenerateIntCompare(lhs_reg, rhs);
8406 }
8407 
GenerateIntCompare(CpuRegister lhs,Location rhs)8408 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
8409   if (rhs.IsConstant()) {
8410     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8411     Compare32BitValue(lhs, value);
8412   } else if (rhs.IsStackSlot()) {
8413     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8414   } else {
8415     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
8416   }
8417 }
8418 
GenerateLongCompare(Location lhs,Location rhs)8419 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
8420   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8421   if (rhs.IsConstant()) {
8422     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
8423     Compare64BitValue(lhs_reg, value);
8424   } else if (rhs.IsDoubleStackSlot()) {
8425     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8426   } else {
8427     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
8428   }
8429 }
8430 
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)8431 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
8432                                           Location index,
8433                                           ScaleFactor scale,
8434                                           uint32_t data_offset) {
8435   return index.IsConstant()
8436       ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
8437       : Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
8438 }
8439 
Store64BitValueToStack(Location dest,int64_t value)8440 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
8441   DCHECK(dest.IsDoubleStackSlot());
8442   if (IsInt<32>(value)) {
8443     // Can move directly as an int32 constant.
8444     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
8445             Immediate(static_cast<int32_t>(value)));
8446   } else {
8447     Load64BitValue(CpuRegister(TMP), value);
8448     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
8449   }
8450 }
8451 
8452 /**
8453  * Class to handle late fixup of offsets into constant area.
8454  */
8455 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8456  public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)8457   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
8458       : codegen_(&codegen), offset_into_constant_area_(offset) {}
8459 
8460  protected:
SetOffset(size_t offset)8461   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8462 
8463   CodeGeneratorX86_64* codegen_;
8464 
8465  private:
Process(const MemoryRegion & region,int pos)8466   void Process(const MemoryRegion& region, int pos) override {
8467     // Patch the correct offset for the instruction.  We use the address of the
8468     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
8469     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8470     int32_t relative_position = constant_offset - pos;
8471 
8472     // Patch in the right value.
8473     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8474   }
8475 
8476   // Location in constant area that the fixup refers to.
8477   size_t offset_into_constant_area_;
8478 };
8479 
8480 /**
8481  t * Class to handle late fixup of offsets to a jump table that will be created in the
8482  * constant area.
8483  */
8484 class JumpTableRIPFixup : public RIPFixup {
8485  public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)8486   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
8487       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
8488 
CreateJumpTable()8489   void CreateJumpTable() {
8490     X86_64Assembler* assembler = codegen_->GetAssembler();
8491 
8492     // Ensure that the reference to the jump table has the correct offset.
8493     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8494     SetOffset(offset_in_constant_table);
8495 
8496     // Compute the offset from the start of the function to this jump table.
8497     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
8498 
8499     // Populate the jump table with the correct values for the jump table.
8500     int32_t num_entries = switch_instr_->GetNumEntries();
8501     HBasicBlock* block = switch_instr_->GetBlock();
8502     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8503     // The value that we want is the target offset - the position of the table.
8504     for (int32_t i = 0; i < num_entries; i++) {
8505       HBasicBlock* b = successors[i];
8506       Label* l = codegen_->GetLabelOf(b);
8507       DCHECK(l->IsBound());
8508       int32_t offset_to_block = l->Position() - current_table_offset;
8509       assembler->AppendInt32(offset_to_block);
8510     }
8511   }
8512 
8513  private:
8514   const HPackedSwitch* switch_instr_;
8515 };
8516 
Finalize()8517 void CodeGeneratorX86_64::Finalize() {
8518   // Generate the constant area if needed.
8519   X86_64Assembler* assembler = GetAssembler();
8520   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8521     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
8522     assembler->Align(4, 0);
8523     constant_area_start_ = assembler->CodeSize();
8524 
8525     // Populate any jump tables.
8526     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8527       jump_table->CreateJumpTable();
8528     }
8529 
8530     // And now add the constant area to the generated code.
8531     assembler->AddConstantArea();
8532   }
8533 
8534   // And finish up.
8535   CodeGenerator::Finalize();
8536 }
8537 
LiteralDoubleAddress(double v)8538 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
8539   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
8540   return Address::RIP(fixup);
8541 }
8542 
LiteralFloatAddress(float v)8543 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
8544   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
8545   return Address::RIP(fixup);
8546 }
8547 
LiteralInt32Address(int32_t v)8548 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
8549   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
8550   return Address::RIP(fixup);
8551 }
8552 
LiteralInt64Address(int64_t v)8553 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
8554   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
8555   return Address::RIP(fixup);
8556 }
8557 
8558 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)8559 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
8560   if (!trg.IsValid()) {
8561     DCHECK_EQ(type, DataType::Type::kVoid);
8562     return;
8563   }
8564 
8565   DCHECK_NE(type, DataType::Type::kVoid);
8566 
8567   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
8568   if (trg.Equals(return_loc)) {
8569     return;
8570   }
8571 
8572   // Let the parallel move resolver take care of all of this.
8573   HParallelMove parallel_move(GetGraph()->GetAllocator());
8574   parallel_move.AddMove(return_loc, trg, type, nullptr);
8575   GetMoveResolver()->EmitNativeCode(&parallel_move);
8576 }
8577 
LiteralCaseTable(HPackedSwitch * switch_instr)8578 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
8579   // Create a fixup to be used to create and address the jump table.
8580   JumpTableRIPFixup* table_fixup =
8581       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8582 
8583   // We have to populate the jump tables.
8584   fixups_to_jump_tables_.push_back(table_fixup);
8585   return Address::RIP(table_fixup);
8586 }
8587 
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)8588 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
8589                                              const Address& addr_high,
8590                                              int64_t v,
8591                                              HInstruction* instruction) {
8592   if (IsInt<32>(v)) {
8593     int32_t v_32 = v;
8594     __ movq(addr_low, Immediate(v_32));
8595     MaybeRecordImplicitNullCheck(instruction);
8596   } else {
8597     // Didn't fit in a register.  Do it in pieces.
8598     int32_t low_v = Low32Bits(v);
8599     int32_t high_v = High32Bits(v);
8600     __ movl(addr_low, Immediate(low_v));
8601     MaybeRecordImplicitNullCheck(instruction);
8602     __ movl(addr_high, Immediate(high_v));
8603   }
8604 }
8605 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8606 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
8607                                           const uint8_t* roots_data,
8608                                           const PatchInfo<Label>& info,
8609                                           uint64_t index_in_table) const {
8610   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8611   uintptr_t address =
8612       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8613   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8614   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8615       dchecked_integral_cast<uint32_t>(address);
8616 }
8617 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8618 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8619   for (const PatchInfo<Label>& info : jit_string_patches_) {
8620     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8621     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8622     PatchJitRootUse(code, roots_data, info, index_in_table);
8623   }
8624 
8625   for (const PatchInfo<Label>& info : jit_class_patches_) {
8626     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8627     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8628     PatchJitRootUse(code, roots_data, info, index_in_table);
8629   }
8630 
8631   for (const PatchInfo<Label>& info : jit_method_type_patches_) {
8632     ProtoReference proto_reference(info.target_dex_file, dex::ProtoIndex(info.offset_or_index));
8633     uint64_t index_in_table = GetJitMethodTypeRootIndex(proto_reference);
8634     PatchJitRootUse(code, roots_data, info, index_in_table);
8635   }
8636 }
8637 
CpuHasAvxFeatureFlag()8638 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
8639   return codegen_->GetInstructionSetFeatures().HasAVX();
8640 }
8641 
CpuHasAvx2FeatureFlag()8642 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
8643   return codegen_->GetInstructionSetFeatures().HasAVX2();
8644 }
8645 
CpuHasAvxFeatureFlag()8646 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
8647   return codegen_->GetInstructionSetFeatures().HasAVX();
8648 }
8649 
CpuHasAvx2FeatureFlag()8650 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
8651   return codegen_->GetInstructionSetFeatures().HasAVX2();
8652 }
8653 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8654 void LocationsBuilderX86_64::VisitBitwiseNegatedRight(
8655     [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8656   LOG(FATAL) << "Unimplemented";
8657 }
8658 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8659 void InstructionCodeGeneratorX86_64::VisitBitwiseNegatedRight(
8660     [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8661   LOG(FATAL) << "Unimplemented";
8662 }
8663 
8664 #undef __
8665 
8666 }  // namespace x86_64
8667 }  // namespace art
8668