1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ 19 20 #include "arch/x86/instruction_set_features_x86.h" 21 #include "base/macros.h" 22 #include "base/pointer_size.h" 23 #include "code_generator.h" 24 #include "dex/dex_file_types.h" 25 #include "driver/compiler_options.h" 26 #include "nodes.h" 27 #include "parallel_move_resolver.h" 28 #include "utils/x86/assembler_x86.h" 29 30 namespace art HIDDEN { 31 namespace x86 { 32 33 // Use a local definition to prevent copying mistakes. 34 static constexpr size_t kX86WordSize = static_cast<size_t>(kX86PointerSize); 35 36 class CodeGeneratorX86; 37 38 static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX }; 39 static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX }; 40 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); 41 static constexpr XmmRegister kParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; 42 static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); 43 44 static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX }; 45 static constexpr size_t kRuntimeParameterCoreRegistersLength = 46 arraysize(kRuntimeParameterCoreRegisters); 47 static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; 48 static constexpr size_t kRuntimeParameterFpuRegistersLength = 49 arraysize(kRuntimeParameterFpuRegisters); 50 51 #define UNIMPLEMENTED_INTRINSIC_LIST_X86(V) \ 52 V(MathSignumFloat) \ 53 V(MathSignumDouble) \ 54 V(MathCopySignFloat) \ 55 V(MathCopySignDouble) \ 56 V(MathRoundDouble) \ 57 V(FloatIsInfinite) \ 58 V(DoubleIsInfinite) \ 59 V(IntegerHighestOneBit) \ 60 V(LongHighestOneBit) \ 61 V(LongDivideUnsigned) \ 62 V(IntegerRemainderUnsigned) \ 63 V(LongRemainderUnsigned) \ 64 V(CRC32Update) \ 65 V(CRC32UpdateBytes) \ 66 V(CRC32UpdateByteBuffer) \ 67 V(FP16ToFloat) \ 68 V(FP16ToHalf) \ 69 V(FP16Floor) \ 70 V(FP16Ceil) \ 71 V(FP16Rint) \ 72 V(FP16Greater) \ 73 V(FP16GreaterEquals) \ 74 V(FP16Less) \ 75 V(FP16LessEquals) \ 76 V(FP16Compare) \ 77 V(FP16Min) \ 78 V(FP16Max) \ 79 V(MathMultiplyHigh) \ 80 V(StringStringIndexOf) \ 81 V(StringStringIndexOfAfter) \ 82 V(StringBufferAppend) \ 83 V(StringBufferLength) \ 84 V(StringBufferToString) \ 85 V(StringBuilderAppendObject) \ 86 V(StringBuilderAppendString) \ 87 V(StringBuilderAppendCharSequence) \ 88 V(StringBuilderAppendCharArray) \ 89 V(StringBuilderAppendBoolean) \ 90 V(StringBuilderAppendChar) \ 91 V(StringBuilderAppendInt) \ 92 V(StringBuilderAppendLong) \ 93 V(StringBuilderAppendFloat) \ 94 V(StringBuilderAppendDouble) \ 95 V(StringBuilderLength) \ 96 V(StringBuilderToString) \ 97 V(UnsafeArrayBaseOffset) \ 98 /* 1.8 */ \ 99 V(MethodHandleInvokeExact) \ 100 V(MethodHandleInvoke) \ 101 /* OpenJDK 11 */ \ 102 V(JdkUnsafeArrayBaseOffset) 103 104 class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> { 105 public: InvokeRuntimeCallingConvention()106 InvokeRuntimeCallingConvention() 107 : CallingConvention(kRuntimeParameterCoreRegisters, 108 kRuntimeParameterCoreRegistersLength, 109 kRuntimeParameterFpuRegisters, 110 kRuntimeParameterFpuRegistersLength, 111 kX86PointerSize) {} 112 113 private: 114 DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); 115 }; 116 117 class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegister> { 118 public: InvokeDexCallingConvention()119 InvokeDexCallingConvention() : CallingConvention( 120 kParameterCoreRegisters, 121 kParameterCoreRegistersLength, 122 kParameterFpuRegisters, 123 kParameterFpuRegistersLength, 124 kX86PointerSize) {} 125 GetRegisterPairAt(size_t argument_index)126 RegisterPair GetRegisterPairAt(size_t argument_index) { 127 DCHECK_LT(argument_index + 1, GetNumberOfRegisters()); 128 return kParameterCorePairRegisters[argument_index]; 129 } 130 131 private: 132 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); 133 }; 134 135 class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor { 136 public: InvokeDexCallingConventionVisitorX86()137 InvokeDexCallingConventionVisitorX86() {} ~InvokeDexCallingConventionVisitorX86()138 virtual ~InvokeDexCallingConventionVisitorX86() {} 139 140 Location GetNextLocation(DataType::Type type) override; 141 Location GetReturnLocation(DataType::Type type) const override; 142 Location GetMethodLocation() const override; 143 144 private: 145 InvokeDexCallingConvention calling_convention; 146 147 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86); 148 }; 149 150 class CriticalNativeCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor { 151 public: CriticalNativeCallingConventionVisitorX86(bool for_register_allocation)152 explicit CriticalNativeCallingConventionVisitorX86(bool for_register_allocation) 153 : for_register_allocation_(for_register_allocation) {} 154 ~CriticalNativeCallingConventionVisitorX86()155 virtual ~CriticalNativeCallingConventionVisitorX86() {} 156 157 Location GetNextLocation(DataType::Type type) override; 158 Location GetReturnLocation(DataType::Type type) const override; 159 Location GetMethodLocation() const override; 160 GetStackOffset()161 size_t GetStackOffset() const { return stack_offset_; } 162 163 private: 164 // Register allocator does not support adjusting frame size, so we cannot provide final locations 165 // of stack arguments for register allocation. We ask the register allocator for any location and 166 // move these arguments to the right place after adjusting the SP when generating the call. 167 const bool for_register_allocation_; 168 size_t stack_offset_ = 0u; 169 170 DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86); 171 }; 172 173 class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { 174 public: FieldAccessCallingConventionX86()175 FieldAccessCallingConventionX86() {} 176 GetObjectLocation()177 Location GetObjectLocation() const override { 178 return Location::RegisterLocation(ECX); 179 } GetFieldIndexLocation()180 Location GetFieldIndexLocation() const override { 181 return Location::RegisterLocation(EAX); 182 } GetReturnLocation(DataType::Type type)183 Location GetReturnLocation(DataType::Type type) const override { 184 return DataType::Is64BitType(type) 185 ? Location::RegisterPairLocation(EAX, EDX) 186 : Location::RegisterLocation(EAX); 187 } GetSetValueLocation(DataType::Type type,bool is_instance)188 Location GetSetValueLocation(DataType::Type type, bool is_instance) const override { 189 return DataType::Is64BitType(type) 190 ? (is_instance 191 ? Location::RegisterPairLocation(EDX, EBX) 192 : Location::RegisterPairLocation(ECX, EDX)) 193 : (is_instance 194 ? Location::RegisterLocation(EDX) 195 : Location::RegisterLocation(ECX)); 196 } GetFpuLocation(DataType::Type type)197 Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { 198 return Location::FpuRegisterLocation(XMM0); 199 } 200 201 private: 202 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86); 203 }; 204 205 class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { 206 public: ParallelMoveResolverX86(ArenaAllocator * allocator,CodeGeneratorX86 * codegen)207 ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen) 208 : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} 209 210 void EmitMove(size_t index) override; 211 void EmitSwap(size_t index) override; 212 void SpillScratch(int reg) override; 213 void RestoreScratch(int reg) override; 214 215 X86Assembler* GetAssembler() const; 216 217 private: 218 void Exchange(Register reg, int mem); 219 void Exchange32(XmmRegister reg, int mem); 220 void Exchange128(XmmRegister reg, int mem); 221 void ExchangeMemory(int mem1, int mem2, int number_of_words); 222 void MoveMemoryToMemory(int dst, int src, int number_of_words); 223 224 CodeGeneratorX86* const codegen_; 225 226 DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86); 227 }; 228 229 class LocationsBuilderX86 : public HGraphVisitor { 230 public: LocationsBuilderX86(HGraph * graph,CodeGeneratorX86 * codegen)231 LocationsBuilderX86(HGraph* graph, CodeGeneratorX86* codegen) 232 : HGraphVisitor(graph), codegen_(codegen) {} 233 234 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 235 void Visit##name(H##name* instr) override; 236 237 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)238 FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) 239 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 240 241 #undef DECLARE_VISIT_INSTRUCTION 242 243 void VisitInstruction(HInstruction* instruction) override { 244 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 245 << " (id " << instruction->GetId() << ")"; 246 } 247 248 private: 249 void HandleBitwiseOperation(HBinaryOperation* instruction); 250 void HandleInvoke(HInvoke* invoke); 251 void HandleCondition(HCondition* condition); 252 void HandleRotate(HBinaryOperation* rotate); 253 void HandleShift(HBinaryOperation* instruction); 254 void HandleFieldSet(HInstruction* instruction, 255 const FieldInfo& field_info, 256 WriteBarrierKind write_barrier_kind); 257 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); 258 bool CpuHasAvxFeatureFlag(); 259 bool CpuHasAvx2FeatureFlag(); 260 261 CodeGeneratorX86* const codegen_; 262 InvokeDexCallingConventionVisitorX86 parameter_visitor_; 263 264 DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86); 265 }; 266 267 class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { 268 public: 269 InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen); 270 271 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 272 void Visit##name(H##name* instr) override; 273 274 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)275 FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) 276 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 277 278 #undef DECLARE_VISIT_INSTRUCTION 279 280 void VisitInstruction(HInstruction* instruction) override { 281 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 282 << " (id " << instruction->GetId() << ")"; 283 } 284 GetAssembler()285 X86Assembler* GetAssembler() const { return assembler_; } 286 287 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump 288 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will 289 // generates less code/data with a small num_entries. 290 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; 291 292 // Generate a GC root reference load: 293 // 294 // root <- *address 295 // 296 // while honoring read barriers based on read_barrier_option. 297 void GenerateGcRootFieldLoad(HInstruction* instruction, 298 Location root, 299 const Address& address, 300 Label* fixup_label, 301 ReadBarrierOption read_barrier_option); 302 303 void HandleFieldSet(HInstruction* instruction, 304 uint32_t value_index, 305 DataType::Type type, 306 Address field_addr, 307 Register base, 308 bool is_volatile, 309 bool value_can_be_null, 310 WriteBarrierKind write_barrier_kind); 311 312 private: 313 // Generate code for the given suspend check. If not null, `successor` 314 // is the block to branch to if the suspend check is not needed, and after 315 // the suspend call. 316 void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); 317 void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); 318 void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); 319 void HandleBitwiseOperation(HBinaryOperation* instruction); 320 void GenerateDivRemIntegral(HBinaryOperation* instruction); 321 void DivRemOneOrMinusOne(HBinaryOperation* instruction); 322 void DivByPowerOfTwo(HDiv* instruction); 323 void RemByPowerOfTwo(HRem* instruction); 324 void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); 325 void GenerateRemFP(HRem* rem); 326 void HandleCondition(HCondition* condition); 327 void HandleShift(HBinaryOperation* instruction); 328 void GenerateShlLong(const Location& loc, Register shifter); 329 void GenerateShrLong(const Location& loc, Register shifter); 330 void GenerateUShrLong(const Location& loc, Register shifter); 331 void GenerateShlLong(const Location& loc, int shift); 332 void GenerateShrLong(const Location& loc, int shift); 333 void GenerateUShrLong(const Location& loc, int shift); 334 void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); 335 void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); 336 void GenerateMinMax(HBinaryOperation* minmax, bool is_min); 337 338 void HandleFieldSet(HInstruction* instruction, 339 const FieldInfo& field_info, 340 bool value_can_be_null, 341 WriteBarrierKind write_barrier_kind); 342 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); 343 void HandleRotate(HBinaryOperation* rotate); 344 345 // Generate a heap reference load using one register `out`: 346 // 347 // out <- *(out + offset) 348 // 349 // while honoring heap poisoning and/or read barriers (if any). 350 // 351 // Location `maybe_temp` is used when generating a read barrier and 352 // shall be a register in that case; it may be an invalid location 353 // otherwise. 354 void GenerateReferenceLoadOneRegister(HInstruction* instruction, 355 Location out, 356 uint32_t offset, 357 Location maybe_temp, 358 ReadBarrierOption read_barrier_option); 359 // Generate a heap reference load using two different registers 360 // `out` and `obj`: 361 // 362 // out <- *(obj + offset) 363 // 364 // while honoring heap poisoning and/or read barriers (if any). 365 // 366 // Location `maybe_temp` is used when generating a Baker's (fast 367 // path) read barrier and shall be a register in that case; it may 368 // be an invalid location otherwise. 369 void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, 370 Location out, 371 Location obj, 372 uint32_t offset, 373 ReadBarrierOption read_barrier_option); 374 375 // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. 376 // `is_wide` specifies whether it is long/double or not. 377 void PushOntoFPStack(Location source, uint32_t temp_offset, 378 uint32_t stack_adjustment, bool is_fp, bool is_wide); 379 380 template<class LabelType> 381 void GenerateTestAndBranch(HInstruction* instruction, 382 size_t condition_input_index, 383 LabelType* true_target, 384 LabelType* false_target); 385 template<class LabelType> 386 void GenerateCompareTestAndBranch(HCondition* condition, 387 LabelType* true_target, 388 LabelType* false_target); 389 template<class LabelType> 390 void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); 391 template<class LabelType> 392 void GenerateLongComparesAndJumps(HCondition* cond, 393 LabelType* true_label, 394 LabelType* false_label); 395 396 void HandleGoto(HInstruction* got, HBasicBlock* successor); 397 void GenPackedSwitchWithCompares(Register value_reg, 398 int32_t lower_bound, 399 uint32_t num_entries, 400 HBasicBlock* switch_block, 401 HBasicBlock* default_block); 402 403 void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double); 404 bool CpuHasAvxFeatureFlag(); 405 bool CpuHasAvx2FeatureFlag(); 406 407 void GenerateMethodEntryExitHook(HInstruction* instruction); 408 409 X86Assembler* const assembler_; 410 CodeGeneratorX86* const codegen_; 411 412 DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86); 413 }; 414 415 class JumpTableRIPFixup; 416 417 class CodeGeneratorX86 : public CodeGenerator { 418 public: 419 CodeGeneratorX86(HGraph* graph, 420 const CompilerOptions& compiler_options, 421 OptimizingCompilerStats* stats = nullptr); ~CodeGeneratorX86()422 virtual ~CodeGeneratorX86() {} 423 424 void GenerateFrameEntry() override; 425 void GenerateFrameExit() override; 426 void Bind(HBasicBlock* block) override; 427 void MoveConstant(Location destination, int32_t value) override; 428 void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; 429 void AddLocationAsTemp(Location location, LocationSummary* locations) override; 430 431 size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; 432 size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; 433 size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 434 size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 435 436 // Generate code to invoke a runtime entry point. 437 void InvokeRuntime(QuickEntrypointEnum entrypoint, 438 HInstruction* instruction, 439 uint32_t dex_pc, 440 SlowPathCode* slow_path = nullptr) override; 441 442 // Generate code to invoke a runtime entry point, but do not record 443 // PC-related information in a stack map. 444 void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 445 HInstruction* instruction, 446 SlowPathCode* slow_path); 447 448 void GenerateInvokeRuntime(int32_t entry_point_offset); 449 GetWordSize()450 size_t GetWordSize() const override { 451 return kX86WordSize; 452 } 453 GetSlowPathFPWidth()454 size_t GetSlowPathFPWidth() const override { 455 return GetGraph()->HasSIMD() 456 ? GetSIMDRegisterWidth() 457 : 2 * kX86WordSize; // 8 bytes == 2 words for each spill 458 } 459 GetCalleePreservedFPWidth()460 size_t GetCalleePreservedFPWidth() const override { 461 return 2 * kX86WordSize; 462 } 463 GetSIMDRegisterWidth()464 size_t GetSIMDRegisterWidth() const override { 465 return 4 * kX86WordSize; 466 } 467 GetLocationBuilder()468 HGraphVisitor* GetLocationBuilder() override { 469 return &location_builder_; 470 } 471 GetInstructionVisitor()472 HGraphVisitor* GetInstructionVisitor() override { 473 return &instruction_visitor_; 474 } 475 GetAssembler()476 X86Assembler* GetAssembler() override { 477 return &assembler_; 478 } 479 GetAssembler()480 const X86Assembler& GetAssembler() const override { 481 return assembler_; 482 } 483 GetAddressOf(HBasicBlock * block)484 uintptr_t GetAddressOf(HBasicBlock* block) override { 485 return GetLabelOf(block)->Position(); 486 } 487 488 void SetupBlockedRegisters() const override; 489 490 void DumpCoreRegister(std::ostream& stream, int reg) const override; 491 void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; 492 GetMoveResolver()493 ParallelMoveResolverX86* GetMoveResolver() override { 494 return &move_resolver_; 495 } 496 GetInstructionSet()497 InstructionSet GetInstructionSet() const override { 498 return InstructionSet::kX86; 499 } 500 501 const X86InstructionSetFeatures& GetInstructionSetFeatures() const; 502 503 // Helper method to move a 32bits value between two locations. 504 void Move32(Location destination, Location source); 505 // Helper method to move a 64bits value between two locations. 506 void Move64(Location destination, Location source); 507 // Helper method to load a value from an address to a register. 508 void LoadFromMemoryNoBarrier(DataType::Type dst_type, 509 Location dst, 510 Address src, 511 HInstruction* instr = nullptr, 512 XmmRegister temp = kNoXmmRegister, 513 bool is_atomic_load = false); 514 // Helper method to move a primitive value from a location to an address. 515 void MoveToMemory(DataType::Type src_type, 516 Location src, 517 Register dst_base, 518 Register dst_index = Register::kNoRegister, 519 ScaleFactor dst_scale = TIMES_1, 520 int32_t dst_disp = 0); 521 522 // Check if the desired_string_load_kind is supported. If it is, return it, 523 // otherwise return a fall-back kind that should be used instead. 524 HLoadString::LoadKind GetSupportedLoadStringKind( 525 HLoadString::LoadKind desired_string_load_kind) override; 526 527 // Check if the desired_class_load_kind is supported. If it is, return it, 528 // otherwise return a fall-back kind that should be used instead. 529 HLoadClass::LoadKind GetSupportedLoadClassKind( 530 HLoadClass::LoadKind desired_class_load_kind) override; 531 532 // Check if the desired_dispatch_info is supported. If it is, return it, 533 // otherwise return a fall-back info that should be used instead. 534 HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 535 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 536 ArtMethod* method) override; 537 538 void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke); 539 // Generate a call to a static or direct method. 540 void GenerateStaticOrDirectCall( 541 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 542 // Generate a call to a virtual method. 543 void GenerateVirtualCall( 544 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 545 546 void RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address, 547 uint32_t intrinsic_data); 548 void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, 549 uint32_t boot_image_offset); 550 void RecordBootImageMethodPatch(HInvoke* invoke); 551 void RecordAppImageMethodPatch(HInvoke* invoke); 552 void RecordMethodBssEntryPatch(HInvoke* invoke); 553 void RecordBootImageTypePatch(HLoadClass* load_class); 554 void RecordAppImageTypePatch(HLoadClass* load_class); 555 Label* NewTypeBssEntryPatch(HLoadClass* load_class); 556 void RecordBootImageStringPatch(HLoadString* load_string); 557 Label* NewStringBssEntryPatch(HLoadString* load_string); 558 void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke); 559 560 void LoadBootImageAddress(Register reg, 561 uint32_t boot_image_reference, 562 HInvokeStaticOrDirect* invoke); 563 void LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke); 564 565 Label* NewJitRootStringPatch(const DexFile& dex_file, 566 dex::StringIndex string_index, 567 Handle<mirror::String> handle); 568 Label* NewJitRootClassPatch(const DexFile& dex_file, 569 dex::TypeIndex type_index, 570 Handle<mirror::Class> handle); 571 572 void MoveFromReturnRegister(Location trg, DataType::Type type) override; 573 574 // Emit linker patches. 575 void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; 576 577 void PatchJitRootUse(uint8_t* code, 578 const uint8_t* roots_data, 579 const PatchInfo<Label>& info, 580 uint64_t index_in_table) const; 581 void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; 582 583 // Emit a write barrier if: 584 // A) emit_null_check is false 585 // B) emit_null_check is true, and value is not null. 586 void MaybeMarkGCCard( 587 Register temp, Register card, Register object, Register value, bool emit_null_check); 588 589 // Emit a write barrier unconditionally. 590 void MarkGCCard(Register temp, Register card, Register object); 591 592 // Crash if the card table is not valid. This check is only emitted for the CC GC. We assert 593 // `(!clean || !self->is_gc_marking)`, since the card table should not be set to clean when the CC 594 // GC is marking for eliminated write barriers. 595 void CheckGCCardIsValid(Register temp, Register card, Register object); 596 597 void GenerateMemoryBarrier(MemBarrierKind kind); 598 GetLabelOf(HBasicBlock * block)599 Label* GetLabelOf(HBasicBlock* block) const { 600 return CommonGetLabelOf<Label>(block_labels_, block); 601 } 602 Initialize()603 void Initialize() override { 604 block_labels_ = CommonInitializeLabels<Label>(); 605 } 606 NeedsTwoRegisters(DataType::Type type)607 bool NeedsTwoRegisters(DataType::Type type) const override { 608 return type == DataType::Type::kInt64; 609 } 610 ShouldSplitLongMoves()611 bool ShouldSplitLongMoves() const override { return true; } 612 GetFrameEntryLabel()613 Label* GetFrameEntryLabel() { return &frame_entry_label_; } 614 AddMethodAddressOffset(HX86ComputeBaseMethodAddress * method_base,int32_t offset)615 void AddMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base, int32_t offset) { 616 method_address_offset_.Put(method_base->GetId(), offset); 617 } 618 GetMethodAddressOffset(HX86ComputeBaseMethodAddress * method_base)619 int32_t GetMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base) const { 620 return method_address_offset_.Get(method_base->GetId()); 621 } 622 ConstantAreaStart()623 int32_t ConstantAreaStart() const { 624 return constant_area_start_; 625 } 626 627 Address LiteralDoubleAddress(double v, HX86ComputeBaseMethodAddress* method_base, Register reg); 628 Address LiteralFloatAddress(float v, HX86ComputeBaseMethodAddress* method_base, Register reg); 629 Address LiteralInt32Address(int32_t v, HX86ComputeBaseMethodAddress* method_base, Register reg); 630 Address LiteralInt64Address(int64_t v, HX86ComputeBaseMethodAddress* method_base, Register reg); 631 632 // Load a 32-bit value into a register in the most efficient manner. 633 void Load32BitValue(Register dest, int32_t value); 634 635 // Compare a register with a 32-bit value in the most efficient manner. 636 void Compare32BitValue(Register dest, int32_t value); 637 638 // Compare int values. Supports only register locations for `lhs`. 639 void GenerateIntCompare(Location lhs, Location rhs); 640 void GenerateIntCompare(Register lhs, Location rhs); 641 642 // Construct address for array access. 643 static Address ArrayAddress(Register obj, 644 Location index, 645 ScaleFactor scale, 646 uint32_t data_offset); 647 648 Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value); 649 650 void Finalize() override; 651 652 // Fast path implementation of ReadBarrier::Barrier for a heap 653 // reference field load when Baker's read barriers are used. 654 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 655 Location ref, 656 Register obj, 657 uint32_t offset, 658 bool needs_null_check); 659 // Fast path implementation of ReadBarrier::Barrier for a heap 660 // reference array load when Baker's read barriers are used. 661 void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 662 Location ref, 663 Register obj, 664 uint32_t data_offset, 665 Location index, 666 bool needs_null_check); 667 // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, 668 // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. 669 // 670 // Load the object reference located at address `src`, held by 671 // object `obj`, into `ref`, and mark it if needed. The base of 672 // address `src` must be `obj`. 673 // 674 // If `always_update_field` is true, the value of the reference is 675 // atomically updated in the holder (`obj`). This operation 676 // requires a temporary register, which must be provided as a 677 // non-null pointer (`temp`). 678 void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 679 Location ref, 680 Register obj, 681 const Address& src, 682 bool needs_null_check, 683 bool always_update_field = false, 684 Register* temp = nullptr); 685 686 // Generate a read barrier for a heap reference within `instruction` 687 // using a slow path. 688 // 689 // A read barrier for an object reference read from the heap is 690 // implemented as a call to the artReadBarrierSlow runtime entry 691 // point, which is passed the values in locations `ref`, `obj`, and 692 // `offset`: 693 // 694 // mirror::Object* artReadBarrierSlow(mirror::Object* ref, 695 // mirror::Object* obj, 696 // uint32_t offset); 697 // 698 // The `out` location contains the value returned by 699 // artReadBarrierSlow. 700 // 701 // When `index` is provided (i.e. for array accesses), the offset 702 // value passed to artReadBarrierSlow is adjusted to take `index` 703 // into account. 704 void GenerateReadBarrierSlow(HInstruction* instruction, 705 Location out, 706 Location ref, 707 Location obj, 708 uint32_t offset, 709 Location index = Location::NoLocation()); 710 711 // If read barriers are enabled, generate a read barrier for a heap 712 // reference using a slow path. If heap poisoning is enabled, also 713 // unpoison the reference in `out`. 714 void MaybeGenerateReadBarrierSlow(HInstruction* instruction, 715 Location out, 716 Location ref, 717 Location obj, 718 uint32_t offset, 719 Location index = Location::NoLocation()); 720 721 // Generate a read barrier for a GC root within `instruction` using 722 // a slow path. 723 // 724 // A read barrier for an object reference GC root is implemented as 725 // a call to the artReadBarrierForRootSlow runtime entry point, 726 // which is passed the value in location `root`: 727 // 728 // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); 729 // 730 // The `out` location contains the value returned by 731 // artReadBarrierForRootSlow. 732 void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); 733 734 // Ensure that prior stores complete to memory before subsequent loads. 735 // The locked add implementation will avoid serializing device memory, but will 736 // touch (but not change) the top of the stack. 737 // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. 738 void MemoryFence(bool non_temporal = false) { 739 if (!non_temporal) { 740 assembler_.lock()->addl(Address(ESP, 0), Immediate(0)); 741 } else { 742 assembler_.mfence(); 743 } 744 } 745 746 void IncreaseFrame(size_t adjustment) override; 747 void DecreaseFrame(size_t adjustment) override; 748 749 void GenerateNop() override; 750 void GenerateImplicitNullCheck(HNullCheck* instruction) override; 751 void GenerateExplicitNullCheck(HNullCheck* instruction) override; 752 753 void MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass); 754 void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry); 755 756 // When we don't know the proper offset for the value, we use kPlaceholder32BitOffset. 757 // The correct value will be inserted when processing Assembler fixups. 758 static constexpr int32_t kPlaceholder32BitOffset = 256; 759 760 private: 761 struct X86PcRelativePatchInfo : PatchInfo<Label> { X86PcRelativePatchInfoX86PcRelativePatchInfo762 X86PcRelativePatchInfo(HX86ComputeBaseMethodAddress* address, 763 const DexFile* target_dex_file, 764 uint32_t target_index) 765 : PatchInfo(target_dex_file, target_index), 766 method_address(address) {} 767 HX86ComputeBaseMethodAddress* method_address; 768 }; 769 770 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 771 void EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo>& infos, 772 ArenaVector<linker::LinkerPatch>* linker_patches); 773 774 Register GetInvokeExtraParameter(HInvoke* invoke, Register temp); 775 Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); 776 777 // Labels for each block that will be compiled. 778 Label* block_labels_; // Indexed by block id. 779 Label frame_entry_label_; 780 LocationsBuilderX86 location_builder_; 781 InstructionCodeGeneratorX86 instruction_visitor_; 782 ParallelMoveResolverX86 move_resolver_; 783 X86Assembler assembler_; 784 785 // PC-relative method patch info for kBootImageLinkTimePcRelative. 786 ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; 787 // PC-relative method patch info for kAppImageRelRo. 788 ArenaDeque<X86PcRelativePatchInfo> app_image_method_patches_; 789 // PC-relative method patch info for kBssEntry. 790 ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_; 791 // PC-relative type patch info for kBootImageLinkTimePcRelative. 792 ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; 793 // PC-relative type patch info for kAppImageRelRo. 794 ArenaDeque<X86PcRelativePatchInfo> app_image_type_patches_; 795 // PC-relative type patch info for kBssEntry. 796 ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; 797 // PC-relative public type patch info for kBssEntryPublic. 798 ArenaDeque<X86PcRelativePatchInfo> public_type_bss_entry_patches_; 799 // PC-relative package type patch info for kBssEntryPackage. 800 ArenaDeque<X86PcRelativePatchInfo> package_type_bss_entry_patches_; 801 // PC-relative String patch info for kBootImageLinkTimePcRelative. 802 ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_; 803 // PC-relative String patch info for kBssEntry. 804 ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; 805 // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative. 806 ArenaDeque<X86PcRelativePatchInfo> boot_image_jni_entrypoint_patches_; 807 // PC-relative patch info for IntrinsicObjects for the boot image, 808 // and for method/type/string patches for kBootImageRelRo otherwise. 809 ArenaDeque<X86PcRelativePatchInfo> boot_image_other_patches_; 810 811 // Patches for string root accesses in JIT compiled code. 812 ArenaDeque<PatchInfo<Label>> jit_string_patches_; 813 // Patches for class root accesses in JIT compiled code. 814 ArenaDeque<PatchInfo<Label>> jit_class_patches_; 815 816 // Offset to the start of the constant area in the assembled code. 817 // Used for fixups to the constant area. 818 int32_t constant_area_start_; 819 820 // Fixups for jump tables that need to be patched after the constant table is generated. 821 ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; 822 823 // Maps a HX86ComputeBaseMethodAddress instruction id, to its offset in the 824 // compiled code. 825 ArenaSafeMap<uint32_t, int32_t> method_address_offset_; 826 827 DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86); 828 }; 829 830 } // namespace x86 831 } // namespace art 832 833 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ 834