1 /* 2 * Copyright (C) 2023 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_ 18 #define BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_ 19 20 #include <cstdint> 21 #include <tuple> 22 #include <variant> 23 24 #include "berberis/assembler/common.h" 25 #include "berberis/assembler/x86_64.h" 26 #include "berberis/base/checks.h" 27 #include "berberis/base/dependent_false.h" 28 #include "berberis/base/macros.h" 29 #include "berberis/decoder/riscv64/decoder.h" 30 #include "berberis/decoder/riscv64/semantics_player.h" 31 #include "berberis/guest_state/guest_addr.h" 32 #include "berberis/guest_state/guest_state.h" 33 #include "berberis/intrinsics/intrinsics.h" 34 #include "berberis/intrinsics/intrinsics_float.h" 35 #include "berberis/intrinsics/macro_assembler.h" 36 #include "berberis/lite_translator/lite_translate_region.h" 37 #include "berberis/runtime_primitives/platform.h" 38 39 #include "allocator.h" 40 #include "call_intrinsic.h" 41 #include "inline_intrinsic.h" 42 #include "register_maintainer.h" 43 44 namespace berberis { 45 46 class MachindeCode; 47 48 class LiteTranslator { 49 public: 50 using Assembler = MacroAssembler<x86_64::Assembler>; 51 using CsrName = berberis::CsrName; 52 using Decoder = Decoder<SemanticsPlayer<LiteTranslator>>; 53 using Register = Assembler::Register; 54 static constexpr auto no_register = Assembler::no_register; 55 // Note: on RISC-V architecture FP register and SIMD registers are disjoint, but on x86 they are 56 // the same. 57 using FpRegister = Assembler::XMMRegister; 58 static constexpr auto no_fp_register = Assembler::no_xmm_register; 59 using SimdRegister = Assembler::XMMRegister; 60 using Condition = Assembler::Condition; 61 using Float32 = intrinsics::Float32; 62 using Float64 = intrinsics::Float64; 63 64 explicit LiteTranslator(MachineCode* machine_code, 65 GuestAddr pc, 66 LiteTranslateParams params = LiteTranslateParams{}) as_(machine_code)67 : as_(machine_code), 68 success_(true), 69 pc_(pc), 70 params_(params), 71 is_region_end_reached_(false){}; 72 73 // 74 // Instruction implementations. 75 // 76 77 Register Op(Decoder::OpOpcode opcode, Register arg1, Register arg2); 78 Register Op32(Decoder::Op32Opcode opcode, Register arg1, Register arg2); 79 Register OpImm(Decoder::OpImmOpcode opcode, Register arg, int16_t imm); 80 Register OpImm32(Decoder::OpImm32Opcode opcode, Register arg, int16_t imm); 81 Register Slli(Register arg, int8_t imm); 82 Register Srli(Register arg, int8_t imm); 83 Register Srai(Register arg, int8_t imm); 84 Register ShiftImm32(Decoder::ShiftImm32Opcode opcode, Register arg, uint16_t imm); 85 Register Rori(Register arg, int8_t shamt); 86 Register Roriw(Register arg, int8_t shamt); 87 Register Lui(int32_t imm); 88 Register Auipc(int32_t imm); 89 void CompareAndBranch(Decoder::BranchOpcode opcode, Register arg1, Register arg2, int16_t offset); 90 void Branch(int32_t offset); 91 void BranchRegister(Register base, int16_t offset); 92 void ExitGeneratedCode(GuestAddr target); 93 void ExitRegion(GuestAddr target); 94 void ExitRegionIndirect(Register target); 95 void Store(Decoder::MemoryDataOperandType operand_type, 96 Register arg, 97 int16_t offset, 98 Register data); 99 Register Load(Decoder::LoadOperandType operand_type, Register arg, int16_t offset); 100 Ecall(Register syscall_nr,Register arg0,Register arg1,Register arg2,Register arg3,Register arg4,Register arg5)101 Register Ecall(Register syscall_nr, 102 Register arg0, 103 Register arg1, 104 Register arg2, 105 Register arg3, 106 Register arg4, 107 Register arg5) { 108 UNUSED(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5); 109 Undefined(); 110 return Assembler::no_register; 111 } 112 Fence(Decoder::FenceOpcode,Register,bool sw,bool sr,bool,bool,bool pw,bool pr,bool,bool)113 void Fence(Decoder::FenceOpcode /*opcode*/, 114 Register /*src*/, 115 bool sw, 116 bool sr, 117 bool /*so*/, 118 bool /*si*/, 119 bool pw, 120 bool pr, 121 bool /*po*/, 122 bool /*pi*/) { 123 UNUSED(sw, sr, pw, pr); 124 Undefined(); 125 } 126 Nop()127 void Nop() {} 128 129 // 130 // Csr 131 // 132 133 Register UpdateCsr(Decoder::CsrOpcode opcode, Register arg, Register csr); 134 Register UpdateCsr(Decoder::CsrImmOpcode opcode, uint8_t imm, Register csr); 135 136 // 137 // F and D extensions. 138 // 139 140 template <typename DataType> LoadFp(Register arg,int16_t offset)141 FpRegister LoadFp(Register arg, int16_t offset) { 142 FpRegister res = AllocTempSimdReg(); 143 as_.Movs<DataType>(res, {.base = arg, .disp = offset}); 144 return res; 145 } 146 147 template <typename DataType> StoreFp(Register arg,int16_t offset,FpRegister data)148 void StoreFp(Register arg, int16_t offset, FpRegister data) { 149 as_.Movs<DataType>({.base = arg, .disp = offset}, data); 150 } 151 Fmv(FpRegister arg)152 FpRegister Fmv(FpRegister arg) { 153 SimdRegister res = AllocTempSimdReg(); 154 if (host_platform::kHasAVX) { 155 as_.Vmovapd(res, arg); 156 } else { 157 as_.Vmovaps(res, arg); 158 } 159 return res; 160 } 161 162 // 163 // V extension. 164 // 165 166 template <typename VOpArgs, typename... ExtraAegs> OpVector(const VOpArgs &,ExtraAegs...)167 void OpVector(const VOpArgs& /*args*/, ExtraAegs... /*extra_args*/) { 168 // TODO(300690740): develop and implement strategy which would allow us to support vector 169 // intrinsics not just in the interpreter. 170 Undefined(); 171 } 172 173 // 174 // Guest state getters/setters. 175 // 176 GetInsnAddr()177 GuestAddr GetInsnAddr() const { return pc_; } 178 GetReg(uint8_t reg)179 Register GetReg(uint8_t reg) { 180 CHECK_GT(reg, 0); 181 CHECK_LT(reg, std::size(ThreadState{}.cpu.x)); 182 if (IsRegMappingEnabled()) { 183 auto [mapped_reg, is_new_mapping] = GetMappedRegisterOrMap(reg); 184 if (is_new_mapping) { 185 int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8; 186 as_.Movq(mapped_reg, {.base = as_.rbp, .disp = offset}); 187 } 188 return mapped_reg; 189 } 190 Register result = AllocTempReg(); 191 int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8; 192 as_.Movq(result, {.base = as_.rbp, .disp = offset}); 193 return result; 194 } 195 SetReg(uint8_t reg,Register value)196 void SetReg(uint8_t reg, Register value) { 197 CHECK_GT(reg, 0); 198 CHECK_LT(reg, std::size(ThreadState{}.cpu.x)); 199 CHECK_LE(reg, kNumGuestRegs); 200 if (IsRegMappingEnabled()) { 201 auto [mapped_reg, _] = GetMappedRegisterOrMap(reg); 202 if (success()) { 203 as_.Movq(mapped_reg, value); 204 gp_maintainer_.NoticeModified(reg); 205 } 206 return; 207 } 208 int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8; 209 as_.Movq({.base = as_.rbp, .disp = offset}, value); 210 } 211 StoreMappedRegs()212 void StoreMappedRegs() { 213 if (!IsRegMappingEnabled()) { 214 return; 215 } 216 for (int i = 0; i < int(kNumGuestRegs); i++) { 217 if (gp_maintainer_.IsModified(i)) { 218 auto mapped_reg = gp_maintainer_.GetMapped(i); 219 int32_t offset = offsetof(ThreadState, cpu.x[0]) + i * 8; 220 as_.Movq({.base = as_.rbp, .disp = offset}, mapped_reg); 221 } 222 } 223 for (int i = 0; i < int(kNumGuestFpRegs); i++) { 224 if (simd_maintainer_.IsModified(i)) { 225 auto mapped_reg = simd_maintainer_.GetMapped(i); 226 int32_t offset = offsetof(ThreadState, cpu.f) + i * sizeof(Float64); 227 StoreFpReg(mapped_reg, offset); 228 } 229 } 230 } 231 GetFpReg(uint8_t reg)232 FpRegister GetFpReg(uint8_t reg) { 233 CHECK_LT(reg, std::size(ThreadState{}.cpu.f)); 234 CHECK_LE(reg, kNumGuestFpRegs); 235 if (IsRegMappingEnabled()) { 236 auto [mapped_reg, is_new_mapping] = GetMappedFpRegOrMap(reg); 237 if (is_new_mapping) { 238 int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64); 239 as_.Movsd(mapped_reg, {.base = Assembler::rbp, .disp = offset}); 240 } 241 return mapped_reg; 242 } 243 SimdRegister result = AllocTempSimdReg(); 244 int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64); 245 as_.Movsd(result, {.base = Assembler::rbp, .disp = offset}); 246 return result; 247 } 248 249 template <typename FloatType> GetFRegAndUnboxNan(uint8_t reg)250 FpRegister GetFRegAndUnboxNan(uint8_t reg) { 251 SimdRegister result = GetFpReg(reg); 252 SimdRegister unboxed_result = AllocTempSimdReg(); 253 if (host_platform::kHasAVX) { 254 as_.MacroUnboxNanAVX<FloatType>(unboxed_result, result); 255 } else { 256 as_.MacroUnboxNan<FloatType>(unboxed_result, result); 257 } 258 return unboxed_result; 259 } 260 261 template <typename FloatType> NanBoxFpReg(FpRegister value)262 void NanBoxFpReg(FpRegister value) { 263 if (host_platform::kHasAVX) { 264 as_.MacroNanBoxAVX<FloatType>(value, value); 265 return; 266 } 267 as_.MacroNanBox<FloatType>(value); 268 } 269 270 template <typename FloatType> NanBoxAndSetFpReg(uint8_t reg,FpRegister value)271 void NanBoxAndSetFpReg(uint8_t reg, FpRegister value) { 272 CHECK_LT(reg, std::size(ThreadState{}.cpu.f)); 273 int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64); 274 NanBoxFpReg<FloatType>(value); 275 276 if (IsRegMappingEnabled()) { 277 auto [mapped_reg, _] = GetMappedFpRegOrMap(reg); 278 if (success()) { 279 // Operand type doesn't matter. 280 MoveFpReg(mapped_reg, value); 281 simd_maintainer_.NoticeModified(reg); 282 } 283 return; 284 } 285 286 StoreFpReg(value, offset); 287 } 288 289 // 290 // Various helper methods. 291 // 292 293 template <CsrName kName> GetCsr()294 [[nodiscard]] Register GetCsr() { 295 Register csr_reg = AllocTempReg(); 296 as_.Expand<uint64_t, CsrFieldType<kName>>( 297 csr_reg, {.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>}); 298 return csr_reg; 299 } 300 301 template <CsrName kName> SetCsr(uint8_t imm)302 void SetCsr(uint8_t imm) { 303 // Note: csr immediate only have 5 bits in RISC-V encoding which guarantess us that 304 // “imm & kCsrMask<kName>”can be used as 8-bit immediate. 305 as_.Mov<CsrFieldType<kName>>({.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>}, 306 static_cast<int8_t>(imm & kCsrMask<kName>)); 307 } 308 309 template <CsrName kName> SetCsr(Register arg)310 void SetCsr(Register arg) { 311 // Use RCX as temporary register. 312 as_.Mov<CsrFieldType<kName>>(Assembler::rcx, arg); 313 if constexpr (sizeof(CsrFieldType<kName>) <= sizeof(int32_t)) { 314 as_.And<CsrFieldType<kName>>(Assembler::rcx, kCsrMask<kName>); 315 } else { 316 as_.And<CsrFieldType<kName>>(Assembler::rcx, 317 {.disp = constants_pool::kConst<uint64_t{kCsrMask<kName>}>}); 318 } 319 as_.Mov<CsrFieldType<kName>>({.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>}, 320 Assembler::rcx); 321 } 322 GetImm(uint64_t imm)323 [[nodiscard]] Register GetImm(uint64_t imm) { 324 Register imm_reg = AllocTempReg(); 325 as_.Movq(imm_reg, imm); 326 return imm_reg; 327 } 328 Copy(Register value)329 [[nodiscard]] Register Copy(Register value) { 330 Register result = AllocTempReg(); 331 as_.Movq(result, value); 332 return result; 333 } 334 Undefined()335 void Undefined() { success_ = false; } 336 gp_maintainer()337 RegisterFileMaintainer<Register, kNumGuestRegs>* gp_maintainer() { return &gp_maintainer_; } simd_maintainer()338 RegisterFileMaintainer<SimdRegister, kNumGuestFpRegs>* simd_maintainer() { 339 return &simd_maintainer_; 340 } as()341 [[nodiscard]] Assembler* as() { return &as_; } success()342 [[nodiscard]] bool success() const { return success_; } 343 FreeTempRegs()344 void FreeTempRegs() { 345 gp_allocator_.FreeTemps(); 346 simd_allocator_.FreeTemps(); 347 } 348 StoreFpReg(FpRegister value,int32_t offset)349 void StoreFpReg(FpRegister value, int32_t offset) { 350 if (host_platform::kHasAVX) { 351 as_.Vmovsd({.base = Assembler::rbp, .disp = offset}, value); 352 } else { 353 as_.Movsd({.base = Assembler::rbp, .disp = offset}, value); 354 } 355 } 356 MoveFpReg(FpRegister reg,FpRegister value)357 void MoveFpReg(FpRegister reg, FpRegister value) { 358 if (host_platform::kHasAVX) { 359 as_.Vmovsd(reg, value, value); 360 } else { 361 as_.Movsd(reg, value); 362 } 363 } 364 365 #include "berberis/intrinsics/translator_intrinsics_hooks-inl.h" 366 is_region_end_reached()367 bool is_region_end_reached() const { return is_region_end_reached_; } 368 IncrementInsnAddr(uint8_t insn_size)369 void IncrementInsnAddr(uint8_t insn_size) { pc_ += insn_size; } 370 IsRegMappingEnabled()371 bool IsRegMappingEnabled() { return params_.enable_reg_mapping; } 372 GetMappedRegisterOrMap(int reg)373 std::tuple<Register, bool> GetMappedRegisterOrMap(int reg) { 374 if (gp_maintainer_.IsMapped(reg)) { 375 return {gp_maintainer_.GetMapped(reg), false}; 376 } 377 378 if (auto alloc_result = gp_allocator_.Alloc()) { 379 gp_maintainer_.Map(reg, alloc_result.value()); 380 return {alloc_result.value(), true}; 381 } 382 success_ = false; 383 return {Assembler::no_register, false}; 384 } 385 GetMappedFpRegOrMap(int reg)386 std::tuple<SimdRegister, bool> GetMappedFpRegOrMap(int reg) { 387 if (simd_maintainer_.IsMapped(reg)) { 388 return {simd_maintainer_.GetMapped(reg), false}; 389 } 390 391 if (auto alloc_result = simd_allocator_.Alloc()) { 392 simd_maintainer_.Map(reg, alloc_result.value()); 393 return {alloc_result.value(), true}; 394 } 395 success_ = false; 396 return {Assembler::no_xmm_register, false}; 397 } 398 AllocTempReg()399 Register AllocTempReg() { 400 if (auto reg_option = gp_allocator_.AllocTemp()) { 401 return reg_option.value(); 402 } 403 success_ = false; 404 return Assembler::no_register; 405 }; 406 AllocTempSimdReg()407 SimdRegister AllocTempSimdReg() { 408 if (auto reg_option = simd_allocator_.AllocTemp()) { 409 return reg_option.value(); 410 } 411 success_ = false; 412 return Assembler::no_xmm_register; 413 }; 414 415 template <typename IntType, bool aq, bool rl> Lr(Register)416 Register Lr(Register /* addr */) { 417 Undefined(); 418 return Assembler::no_register; 419 } 420 421 template <typename IntType, bool aq, bool rl> Sc(Register,Register)422 Register Sc(Register /* addr */, Register /* data */) { 423 Undefined(); 424 return Assembler::no_register; 425 } 426 427 private: 428 template <auto kFunction, typename AssemblerResType, typename... AssemblerArgType> CallIntrinsic(AssemblerArgType...args)429 AssemblerResType CallIntrinsic(AssemblerArgType... args) { 430 if constexpr (std::is_same_v<AssemblerResType, void>) { 431 if (inline_intrinsic::TryInlineIntrinsic<kFunction>( 432 as_, 433 [this]() { return AllocTempReg(); }, 434 [this]() { return AllocTempSimdReg(); }, 435 std::monostate{}, 436 args...)) { 437 return; 438 } 439 call_intrinsic::CallIntrinsic<AssemblerResType>(as_, kFunction, args...); 440 } else { 441 AssemblerResType result = [this] { 442 if constexpr (std::is_same_v<AssemblerResType, Register>) { 443 return AllocTempReg(); 444 } else if constexpr (std::is_same_v<AssemblerResType, std::tuple<Register, Register>>) { 445 return std::tuple{AllocTempReg(), AllocTempReg()}; 446 } else if constexpr (std::is_same_v<AssemblerResType, SimdRegister>) { 447 return AllocTempSimdReg(); 448 } else { 449 // This should not be reached by the compiler. If it is - there is a new result type that 450 // needs to be supported. 451 static_assert(kDependentTypeFalse<AssemblerResType>, "Unsupported result type"); 452 } 453 }(); 454 455 if (inline_intrinsic::TryInlineIntrinsic<kFunction>( 456 as_, 457 [this]() { return AllocTempReg(); }, 458 [this]() { return AllocTempSimdReg(); }, 459 result, 460 args...)) { 461 return result; 462 } 463 464 call_intrinsic::CallIntrinsic<AssemblerResType>(as_, kFunction, result, args...); 465 466 return result; 467 } 468 } 469 470 Assembler as_; 471 bool success_; 472 GuestAddr pc_; 473 Allocator<Register> gp_allocator_; 474 RegisterFileMaintainer<Register, kNumGuestRegs> gp_maintainer_; 475 RegisterFileMaintainer<SimdRegister, kNumGuestFpRegs> simd_maintainer_; 476 Allocator<SimdRegister> simd_allocator_; 477 const LiteTranslateParams params_; 478 bool is_region_end_reached_; 479 }; 480 481 template <> 482 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kCycle>() { 483 return CPUClockCount(); 484 } 485 486 template <> 487 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kFCsr>() { 488 Register csr_reg = AllocTempReg(); 489 bool inline_succeful = inline_intrinsic::TryInlineIntrinsic<&intrinsics::FeGetExceptions>( 490 as_, 491 [this]() { return AllocTempReg(); }, 492 [this]() { return AllocTempSimdReg(); }, 493 Assembler::rax); 494 CHECK(inline_succeful); 495 as_.Expand<uint64_t, CsrFieldType<CsrName::kFrm>>( 496 csr_reg, {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>}); 497 as_.Shl<uint8_t>(csr_reg, 5); 498 as_.Or<uint8_t>(csr_reg, as_.rax); 499 return csr_reg; 500 } 501 502 template <> 503 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kFFlags>() { 504 return FeGetExceptions(); 505 } 506 507 template <> 508 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVlenb>() { 509 return GetImm(16); 510 } 511 512 template <> 513 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVxrm>() { 514 Register reg = AllocTempReg(); 515 as_.Expand<uint64_t, uint8_t>(reg, 516 {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}); 517 as_.And<uint8_t>(reg, 0b11); 518 return reg; 519 } 520 521 template <> 522 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVxsat>() { 523 Register reg = AllocTempReg(); 524 as_.Expand<uint64_t, uint8_t>(reg, 525 {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}); 526 as_.Shr<uint8_t>(reg, 2); 527 return reg; 528 } 529 530 template <> 531 inline void LiteTranslator::SetCsr<CsrName::kFCsr>(uint8_t imm) { 532 // Note: instructions Csrrci or Csrrsi couldn't affect Frm because immediate only has five bits. 533 // But these instruction don't pass their immediate-specified argument into `SetCsr`, they combine 534 // it with register first. Fixing that can only be done by changing code in the semantics player. 535 // 536 // But Csrrwi may clear it. And we actually may only arrive here from Csrrwi. 537 // Thus, technically, we know that imm >> 5 is always zero, but it doesn't look like a good idea 538 // to rely on that: it's very subtle and it only affects code generation speed. 539 as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>}, 540 static_cast<int8_t>(imm >> 5)); 541 as_.MacroFeSetExceptionsAndRoundImmTranslate( 542 {Assembler::rbp, .disp = static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area))}, 543 imm); 544 } 545 546 template <> 547 inline void LiteTranslator::SetCsr<CsrName::kFCsr>(Register arg) { 548 // Use RAX as temporary register for exceptions and RCX for rm. 549 // We know RCX would be used by FeSetRound, too. 550 as_.Mov<uint8_t>(Assembler::rax, arg); 551 as_.And<uint32_t>(Assembler::rax, 0b1'1111); 552 as_.Shldl(Assembler::rcx, arg, int8_t{32 - 5}); 553 as_.And<uint8_t>(Assembler::rcx, kCsrMask<CsrName::kFrm>); 554 as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>}, 555 Assembler::rcx); 556 as_.MacroFeSetExceptionsAndRoundTranslate( 557 Assembler::rax, 558 {Assembler::rbp, .disp = static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area))}, 559 Assembler::rax); 560 } 561 562 template <> 563 inline void LiteTranslator::SetCsr<CsrName::kFFlags>(uint8_t imm) { 564 FeSetExceptionsImm(static_cast<int8_t>(imm & 0b1'1111)); 565 } 566 567 template <> 568 inline void LiteTranslator::SetCsr<CsrName::kFFlags>(Register arg) { 569 // Use RAX as temporary register. 570 as_.Mov<uint8_t>(Assembler::rax, arg); 571 as_.And<uint32_t>(Assembler::rax, 0b1'1111); 572 FeSetExceptions(Assembler::rax); 573 } 574 575 template <> 576 inline void LiteTranslator::SetCsr<CsrName::kFrm>(uint8_t imm) { 577 as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>}, 578 static_cast<int8_t>(imm & kCsrMask<CsrName::kFrm>)); 579 FeSetRoundImm(static_cast<int8_t>(imm & kCsrMask<CsrName::kFrm>)); 580 } 581 582 template <> 583 inline void LiteTranslator::SetCsr<CsrName::kFrm>(Register arg) { 584 // Use RCX as temporary register. We know it would be used by FeSetRound, too. 585 as_.Mov<uint8_t>(Assembler::rcx, arg); 586 as_.And<uint8_t>(Assembler::rcx, kCsrMask<CsrName::kFrm>); 587 as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>}, 588 Assembler::rcx); 589 FeSetRound(Assembler::rcx); 590 } 591 592 template <> 593 inline void LiteTranslator::SetCsr<CsrName::kVxrm>(uint8_t imm) { 594 imm &= 0b11; 595 if (imm != 0b11) { 596 as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100); 597 } 598 if (imm != 0b00) { 599 as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, imm); 600 } 601 } 602 603 template <> 604 inline void LiteTranslator::SetCsr<CsrName::kVxrm>(Register arg) { 605 as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100); 606 as_.And<uint8_t>(arg, 0b11); 607 as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, arg); 608 } 609 610 template <> 611 inline void LiteTranslator::SetCsr<CsrName::kVxsat>(uint8_t imm) { 612 if (imm & 0b1) { 613 as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100); 614 } else { 615 as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b11); 616 } 617 } 618 619 template <> 620 inline void LiteTranslator::SetCsr<CsrName::kVxsat>(Register arg) { 621 as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b11); 622 as_.Test<uint8_t>(arg, 1); 623 // Use RCX as temporary register. 624 as_.Setcc(Condition::kNotZero, as_.rcx); 625 as_.Shl<uint8_t>(as_.rcx, int8_t{2}); 626 as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, as_.rcx); 627 } 628 629 // There is no NanBoxing for Float64 except on CPUs with Float128 support. 630 template <> 631 inline LiteTranslator::FpRegister LiteTranslator::GetFRegAndUnboxNan<LiteTranslator::Float64>( 632 uint8_t reg) { 633 SimdRegister result = GetFpReg(reg); 634 return result; 635 } 636 637 template <> 638 inline void LiteTranslator::NanBoxFpReg<LiteTranslator::Float64>(FpRegister) {} 639 640 } // namespace berberis 641 642 #endif // BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_ 643