1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_
18 #define BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_
19 
20 #include <cstdint>
21 #include <tuple>
22 #include <variant>
23 
24 #include "berberis/assembler/common.h"
25 #include "berberis/assembler/x86_64.h"
26 #include "berberis/base/checks.h"
27 #include "berberis/base/dependent_false.h"
28 #include "berberis/base/macros.h"
29 #include "berberis/decoder/riscv64/decoder.h"
30 #include "berberis/decoder/riscv64/semantics_player.h"
31 #include "berberis/guest_state/guest_addr.h"
32 #include "berberis/guest_state/guest_state.h"
33 #include "berberis/intrinsics/intrinsics.h"
34 #include "berberis/intrinsics/intrinsics_float.h"
35 #include "berberis/intrinsics/macro_assembler.h"
36 #include "berberis/lite_translator/lite_translate_region.h"
37 #include "berberis/runtime_primitives/platform.h"
38 
39 #include "allocator.h"
40 #include "call_intrinsic.h"
41 #include "inline_intrinsic.h"
42 #include "register_maintainer.h"
43 
44 namespace berberis {
45 
46 class MachindeCode;
47 
48 class LiteTranslator {
49  public:
50   using Assembler = MacroAssembler<x86_64::Assembler>;
51   using CsrName = berberis::CsrName;
52   using Decoder = Decoder<SemanticsPlayer<LiteTranslator>>;
53   using Register = Assembler::Register;
54   static constexpr auto no_register = Assembler::no_register;
55   // Note: on RISC-V architecture FP register and SIMD registers are disjoint, but on x86 they are
56   // the same.
57   using FpRegister = Assembler::XMMRegister;
58   static constexpr auto no_fp_register = Assembler::no_xmm_register;
59   using SimdRegister = Assembler::XMMRegister;
60   using Condition = Assembler::Condition;
61   using Float32 = intrinsics::Float32;
62   using Float64 = intrinsics::Float64;
63 
64   explicit LiteTranslator(MachineCode* machine_code,
65                           GuestAddr pc,
66                           LiteTranslateParams params = LiteTranslateParams{})
as_(machine_code)67       : as_(machine_code),
68         success_(true),
69         pc_(pc),
70         params_(params),
71         is_region_end_reached_(false){};
72 
73   //
74   // Instruction implementations.
75   //
76 
77   Register Op(Decoder::OpOpcode opcode, Register arg1, Register arg2);
78   Register Op32(Decoder::Op32Opcode opcode, Register arg1, Register arg2);
79   Register OpImm(Decoder::OpImmOpcode opcode, Register arg, int16_t imm);
80   Register OpImm32(Decoder::OpImm32Opcode opcode, Register arg, int16_t imm);
81   Register Slli(Register arg, int8_t imm);
82   Register Srli(Register arg, int8_t imm);
83   Register Srai(Register arg, int8_t imm);
84   Register ShiftImm32(Decoder::ShiftImm32Opcode opcode, Register arg, uint16_t imm);
85   Register Rori(Register arg, int8_t shamt);
86   Register Roriw(Register arg, int8_t shamt);
87   Register Lui(int32_t imm);
88   Register Auipc(int32_t imm);
89   void CompareAndBranch(Decoder::BranchOpcode opcode, Register arg1, Register arg2, int16_t offset);
90   void Branch(int32_t offset);
91   void BranchRegister(Register base, int16_t offset);
92   void ExitGeneratedCode(GuestAddr target);
93   void ExitRegion(GuestAddr target);
94   void ExitRegionIndirect(Register target);
95   void Store(Decoder::MemoryDataOperandType operand_type,
96              Register arg,
97              int16_t offset,
98              Register data);
99   Register Load(Decoder::LoadOperandType operand_type, Register arg, int16_t offset);
100 
Ecall(Register syscall_nr,Register arg0,Register arg1,Register arg2,Register arg3,Register arg4,Register arg5)101   Register Ecall(Register syscall_nr,
102                  Register arg0,
103                  Register arg1,
104                  Register arg2,
105                  Register arg3,
106                  Register arg4,
107                  Register arg5) {
108     UNUSED(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5);
109     Undefined();
110     return Assembler::no_register;
111   }
112 
Fence(Decoder::FenceOpcode,Register,bool sw,bool sr,bool,bool,bool pw,bool pr,bool,bool)113   void Fence(Decoder::FenceOpcode /*opcode*/,
114              Register /*src*/,
115              bool sw,
116              bool sr,
117              bool /*so*/,
118              bool /*si*/,
119              bool pw,
120              bool pr,
121              bool /*po*/,
122              bool /*pi*/) {
123     UNUSED(sw, sr, pw, pr);
124     Undefined();
125   }
126 
Nop()127   void Nop() {}
128 
129   //
130   // Csr
131   //
132 
133   Register UpdateCsr(Decoder::CsrOpcode opcode, Register arg, Register csr);
134   Register UpdateCsr(Decoder::CsrImmOpcode opcode, uint8_t imm, Register csr);
135 
136   //
137   // F and D extensions.
138   //
139 
140   template <typename DataType>
LoadFp(Register arg,int16_t offset)141   FpRegister LoadFp(Register arg, int16_t offset) {
142     FpRegister res = AllocTempSimdReg();
143     as_.Movs<DataType>(res, {.base = arg, .disp = offset});
144     return res;
145   }
146 
147   template <typename DataType>
StoreFp(Register arg,int16_t offset,FpRegister data)148   void StoreFp(Register arg, int16_t offset, FpRegister data) {
149     as_.Movs<DataType>({.base = arg, .disp = offset}, data);
150   }
151 
Fmv(FpRegister arg)152   FpRegister Fmv(FpRegister arg) {
153     SimdRegister res = AllocTempSimdReg();
154     if (host_platform::kHasAVX) {
155       as_.Vmovapd(res, arg);
156     } else {
157       as_.Vmovaps(res, arg);
158     }
159     return res;
160   }
161 
162   //
163   // V extension.
164   //
165 
166   template <typename VOpArgs, typename... ExtraAegs>
OpVector(const VOpArgs &,ExtraAegs...)167   void OpVector(const VOpArgs& /*args*/, ExtraAegs... /*extra_args*/) {
168     // TODO(300690740): develop and implement strategy which would allow us to support vector
169     // intrinsics not just in the interpreter.
170     Undefined();
171   }
172 
173   //
174   // Guest state getters/setters.
175   //
176 
GetInsnAddr()177   GuestAddr GetInsnAddr() const { return pc_; }
178 
GetReg(uint8_t reg)179   Register GetReg(uint8_t reg) {
180     CHECK_GT(reg, 0);
181     CHECK_LT(reg, std::size(ThreadState{}.cpu.x));
182     if (IsRegMappingEnabled()) {
183       auto [mapped_reg, is_new_mapping] = GetMappedRegisterOrMap(reg);
184       if (is_new_mapping) {
185         int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8;
186         as_.Movq(mapped_reg, {.base = as_.rbp, .disp = offset});
187       }
188       return mapped_reg;
189     }
190     Register result = AllocTempReg();
191     int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8;
192     as_.Movq(result, {.base = as_.rbp, .disp = offset});
193     return result;
194   }
195 
SetReg(uint8_t reg,Register value)196   void SetReg(uint8_t reg, Register value) {
197     CHECK_GT(reg, 0);
198     CHECK_LT(reg, std::size(ThreadState{}.cpu.x));
199     CHECK_LE(reg, kNumGuestRegs);
200     if (IsRegMappingEnabled()) {
201       auto [mapped_reg, _] = GetMappedRegisterOrMap(reg);
202       if (success()) {
203         as_.Movq(mapped_reg, value);
204         gp_maintainer_.NoticeModified(reg);
205       }
206       return;
207     }
208     int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8;
209     as_.Movq({.base = as_.rbp, .disp = offset}, value);
210   }
211 
StoreMappedRegs()212   void StoreMappedRegs() {
213     if (!IsRegMappingEnabled()) {
214       return;
215     }
216     for (int i = 0; i < int(kNumGuestRegs); i++) {
217       if (gp_maintainer_.IsModified(i)) {
218         auto mapped_reg = gp_maintainer_.GetMapped(i);
219         int32_t offset = offsetof(ThreadState, cpu.x[0]) + i * 8;
220         as_.Movq({.base = as_.rbp, .disp = offset}, mapped_reg);
221       }
222     }
223     for (int i = 0; i < int(kNumGuestFpRegs); i++) {
224       if (simd_maintainer_.IsModified(i)) {
225         auto mapped_reg = simd_maintainer_.GetMapped(i);
226         int32_t offset = offsetof(ThreadState, cpu.f) + i * sizeof(Float64);
227         StoreFpReg(mapped_reg, offset);
228       }
229     }
230   }
231 
GetFpReg(uint8_t reg)232   FpRegister GetFpReg(uint8_t reg) {
233     CHECK_LT(reg, std::size(ThreadState{}.cpu.f));
234     CHECK_LE(reg, kNumGuestFpRegs);
235     if (IsRegMappingEnabled()) {
236       auto [mapped_reg, is_new_mapping] = GetMappedFpRegOrMap(reg);
237       if (is_new_mapping) {
238         int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64);
239         as_.Movsd(mapped_reg, {.base = Assembler::rbp, .disp = offset});
240       }
241       return mapped_reg;
242     }
243     SimdRegister result = AllocTempSimdReg();
244     int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64);
245     as_.Movsd(result, {.base = Assembler::rbp, .disp = offset});
246     return result;
247   }
248 
249   template <typename FloatType>
GetFRegAndUnboxNan(uint8_t reg)250   FpRegister GetFRegAndUnboxNan(uint8_t reg) {
251     SimdRegister result = GetFpReg(reg);
252     SimdRegister unboxed_result = AllocTempSimdReg();
253     if (host_platform::kHasAVX) {
254       as_.MacroUnboxNanAVX<FloatType>(unboxed_result, result);
255     } else {
256       as_.MacroUnboxNan<FloatType>(unboxed_result, result);
257     }
258     return unboxed_result;
259   }
260 
261   template <typename FloatType>
NanBoxFpReg(FpRegister value)262   void NanBoxFpReg(FpRegister value) {
263     if (host_platform::kHasAVX) {
264       as_.MacroNanBoxAVX<FloatType>(value, value);
265       return;
266     }
267     as_.MacroNanBox<FloatType>(value);
268   }
269 
270   template <typename FloatType>
NanBoxAndSetFpReg(uint8_t reg,FpRegister value)271   void NanBoxAndSetFpReg(uint8_t reg, FpRegister value) {
272     CHECK_LT(reg, std::size(ThreadState{}.cpu.f));
273     int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64);
274     NanBoxFpReg<FloatType>(value);
275 
276     if (IsRegMappingEnabled()) {
277       auto [mapped_reg, _] = GetMappedFpRegOrMap(reg);
278       if (success()) {
279         // Operand type doesn't matter.
280         MoveFpReg(mapped_reg, value);
281         simd_maintainer_.NoticeModified(reg);
282       }
283       return;
284     }
285 
286     StoreFpReg(value, offset);
287   }
288 
289   //
290   // Various helper methods.
291   //
292 
293   template <CsrName kName>
GetCsr()294   [[nodiscard]] Register GetCsr() {
295     Register csr_reg = AllocTempReg();
296     as_.Expand<uint64_t, CsrFieldType<kName>>(
297         csr_reg, {.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>});
298     return csr_reg;
299   }
300 
301   template <CsrName kName>
SetCsr(uint8_t imm)302   void SetCsr(uint8_t imm) {
303     // Note: csr immediate only have 5 bits in RISC-V encoding which guarantess us that
304     // “imm & kCsrMask<kName>”can be used as 8-bit immediate.
305     as_.Mov<CsrFieldType<kName>>({.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>},
306                                  static_cast<int8_t>(imm & kCsrMask<kName>));
307   }
308 
309   template <CsrName kName>
SetCsr(Register arg)310   void SetCsr(Register arg) {
311     // Use RCX as temporary register.
312     as_.Mov<CsrFieldType<kName>>(Assembler::rcx, arg);
313     if constexpr (sizeof(CsrFieldType<kName>) <= sizeof(int32_t)) {
314       as_.And<CsrFieldType<kName>>(Assembler::rcx, kCsrMask<kName>);
315     } else {
316       as_.And<CsrFieldType<kName>>(Assembler::rcx,
317                                    {.disp = constants_pool::kConst<uint64_t{kCsrMask<kName>}>});
318     }
319     as_.Mov<CsrFieldType<kName>>({.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>},
320                                  Assembler::rcx);
321   }
322 
GetImm(uint64_t imm)323   [[nodiscard]] Register GetImm(uint64_t imm) {
324     Register imm_reg = AllocTempReg();
325     as_.Movq(imm_reg, imm);
326     return imm_reg;
327   }
328 
Copy(Register value)329   [[nodiscard]] Register Copy(Register value) {
330     Register result = AllocTempReg();
331     as_.Movq(result, value);
332     return result;
333   }
334 
Undefined()335   void Undefined() { success_ = false; }
336 
gp_maintainer()337   RegisterFileMaintainer<Register, kNumGuestRegs>* gp_maintainer() { return &gp_maintainer_; }
simd_maintainer()338   RegisterFileMaintainer<SimdRegister, kNumGuestFpRegs>* simd_maintainer() {
339     return &simd_maintainer_;
340   }
as()341   [[nodiscard]] Assembler* as() { return &as_; }
success()342   [[nodiscard]] bool success() const { return success_; }
343 
FreeTempRegs()344   void FreeTempRegs() {
345     gp_allocator_.FreeTemps();
346     simd_allocator_.FreeTemps();
347   }
348 
StoreFpReg(FpRegister value,int32_t offset)349   void StoreFpReg(FpRegister value, int32_t offset) {
350     if (host_platform::kHasAVX) {
351       as_.Vmovsd({.base = Assembler::rbp, .disp = offset}, value);
352     } else {
353       as_.Movsd({.base = Assembler::rbp, .disp = offset}, value);
354     }
355   }
356 
MoveFpReg(FpRegister reg,FpRegister value)357   void MoveFpReg(FpRegister reg, FpRegister value) {
358     if (host_platform::kHasAVX) {
359       as_.Vmovsd(reg, value, value);
360     } else {
361       as_.Movsd(reg, value);
362     }
363   }
364 
365 #include "berberis/intrinsics/translator_intrinsics_hooks-inl.h"
366 
is_region_end_reached()367   bool is_region_end_reached() const { return is_region_end_reached_; }
368 
IncrementInsnAddr(uint8_t insn_size)369   void IncrementInsnAddr(uint8_t insn_size) { pc_ += insn_size; }
370 
IsRegMappingEnabled()371   bool IsRegMappingEnabled() { return params_.enable_reg_mapping; }
372 
GetMappedRegisterOrMap(int reg)373   std::tuple<Register, bool> GetMappedRegisterOrMap(int reg) {
374     if (gp_maintainer_.IsMapped(reg)) {
375       return {gp_maintainer_.GetMapped(reg), false};
376     }
377 
378     if (auto alloc_result = gp_allocator_.Alloc()) {
379       gp_maintainer_.Map(reg, alloc_result.value());
380       return {alloc_result.value(), true};
381     }
382     success_ = false;
383     return {Assembler::no_register, false};
384   }
385 
GetMappedFpRegOrMap(int reg)386   std::tuple<SimdRegister, bool> GetMappedFpRegOrMap(int reg) {
387     if (simd_maintainer_.IsMapped(reg)) {
388       return {simd_maintainer_.GetMapped(reg), false};
389     }
390 
391     if (auto alloc_result = simd_allocator_.Alloc()) {
392       simd_maintainer_.Map(reg, alloc_result.value());
393       return {alloc_result.value(), true};
394     }
395     success_ = false;
396     return {Assembler::no_xmm_register, false};
397   }
398 
AllocTempReg()399   Register AllocTempReg() {
400     if (auto reg_option = gp_allocator_.AllocTemp()) {
401       return reg_option.value();
402     }
403     success_ = false;
404     return Assembler::no_register;
405   };
406 
AllocTempSimdReg()407   SimdRegister AllocTempSimdReg() {
408     if (auto reg_option = simd_allocator_.AllocTemp()) {
409       return reg_option.value();
410     }
411     success_ = false;
412     return Assembler::no_xmm_register;
413   };
414 
415   template <typename IntType, bool aq, bool rl>
Lr(Register)416   Register Lr(Register /* addr */) {
417     Undefined();
418     return Assembler::no_register;
419   }
420 
421   template <typename IntType, bool aq, bool rl>
Sc(Register,Register)422   Register Sc(Register /* addr */, Register /* data */) {
423     Undefined();
424     return Assembler::no_register;
425   }
426 
427  private:
428   template <auto kFunction, typename AssemblerResType, typename... AssemblerArgType>
CallIntrinsic(AssemblerArgType...args)429   AssemblerResType CallIntrinsic(AssemblerArgType... args) {
430     if constexpr (std::is_same_v<AssemblerResType, void>) {
431       if (inline_intrinsic::TryInlineIntrinsic<kFunction>(
432               as_,
433               [this]() { return AllocTempReg(); },
434               [this]() { return AllocTempSimdReg(); },
435               std::monostate{},
436               args...)) {
437         return;
438       }
439       call_intrinsic::CallIntrinsic<AssemblerResType>(as_, kFunction, args...);
440     } else {
441       AssemblerResType result = [this] {
442         if constexpr (std::is_same_v<AssemblerResType, Register>) {
443           return AllocTempReg();
444         } else if constexpr (std::is_same_v<AssemblerResType, std::tuple<Register, Register>>) {
445           return std::tuple{AllocTempReg(), AllocTempReg()};
446         } else if constexpr (std::is_same_v<AssemblerResType, SimdRegister>) {
447           return AllocTempSimdReg();
448         } else {
449           // This should not be reached by the compiler. If it is - there is a new result type that
450           // needs to be supported.
451           static_assert(kDependentTypeFalse<AssemblerResType>, "Unsupported result type");
452         }
453       }();
454 
455       if (inline_intrinsic::TryInlineIntrinsic<kFunction>(
456               as_,
457               [this]() { return AllocTempReg(); },
458               [this]() { return AllocTempSimdReg(); },
459               result,
460               args...)) {
461         return result;
462       }
463 
464       call_intrinsic::CallIntrinsic<AssemblerResType>(as_, kFunction, result, args...);
465 
466       return result;
467     }
468   }
469 
470   Assembler as_;
471   bool success_;
472   GuestAddr pc_;
473   Allocator<Register> gp_allocator_;
474   RegisterFileMaintainer<Register, kNumGuestRegs> gp_maintainer_;
475   RegisterFileMaintainer<SimdRegister, kNumGuestFpRegs> simd_maintainer_;
476   Allocator<SimdRegister> simd_allocator_;
477   const LiteTranslateParams params_;
478   bool is_region_end_reached_;
479 };
480 
481 template <>
482 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kCycle>() {
483   return CPUClockCount();
484 }
485 
486 template <>
487 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kFCsr>() {
488   Register csr_reg = AllocTempReg();
489   bool inline_succeful = inline_intrinsic::TryInlineIntrinsic<&intrinsics::FeGetExceptions>(
490       as_,
491       [this]() { return AllocTempReg(); },
492       [this]() { return AllocTempSimdReg(); },
493       Assembler::rax);
494   CHECK(inline_succeful);
495   as_.Expand<uint64_t, CsrFieldType<CsrName::kFrm>>(
496       csr_reg, {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>});
497   as_.Shl<uint8_t>(csr_reg, 5);
498   as_.Or<uint8_t>(csr_reg, as_.rax);
499   return csr_reg;
500 }
501 
502 template <>
503 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kFFlags>() {
504   return FeGetExceptions();
505 }
506 
507 template <>
508 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVlenb>() {
509   return GetImm(16);
510 }
511 
512 template <>
513 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVxrm>() {
514   Register reg = AllocTempReg();
515   as_.Expand<uint64_t, uint8_t>(reg,
516                                 {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>});
517   as_.And<uint8_t>(reg, 0b11);
518   return reg;
519 }
520 
521 template <>
522 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVxsat>() {
523   Register reg = AllocTempReg();
524   as_.Expand<uint64_t, uint8_t>(reg,
525                                 {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>});
526   as_.Shr<uint8_t>(reg, 2);
527   return reg;
528 }
529 
530 template <>
531 inline void LiteTranslator::SetCsr<CsrName::kFCsr>(uint8_t imm) {
532   // Note: instructions Csrrci or Csrrsi couldn't affect Frm because immediate only has five bits.
533   // But these instruction don't pass their immediate-specified argument into `SetCsr`, they combine
534   // it with register first. Fixing that can only be done by changing code in the semantics player.
535   //
536   // But Csrrwi may clear it.  And we actually may only arrive here from Csrrwi.
537   // Thus, technically, we know that imm >> 5 is always zero, but it doesn't look like a good idea
538   // to rely on that: it's very subtle and it only affects code generation speed.
539   as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>},
540                    static_cast<int8_t>(imm >> 5));
541   as_.MacroFeSetExceptionsAndRoundImmTranslate(
542       {Assembler::rbp, .disp = static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area))},
543       imm);
544 }
545 
546 template <>
547 inline void LiteTranslator::SetCsr<CsrName::kFCsr>(Register arg) {
548   // Use RAX as temporary register for exceptions and RCX for rm.
549   // We know RCX would be used by FeSetRound, too.
550   as_.Mov<uint8_t>(Assembler::rax, arg);
551   as_.And<uint32_t>(Assembler::rax, 0b1'1111);
552   as_.Shldl(Assembler::rcx, arg, int8_t{32 - 5});
553   as_.And<uint8_t>(Assembler::rcx, kCsrMask<CsrName::kFrm>);
554   as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>},
555                    Assembler::rcx);
556   as_.MacroFeSetExceptionsAndRoundTranslate(
557       Assembler::rax,
558       {Assembler::rbp, .disp = static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area))},
559       Assembler::rax);
560 }
561 
562 template <>
563 inline void LiteTranslator::SetCsr<CsrName::kFFlags>(uint8_t imm) {
564   FeSetExceptionsImm(static_cast<int8_t>(imm & 0b1'1111));
565 }
566 
567 template <>
568 inline void LiteTranslator::SetCsr<CsrName::kFFlags>(Register arg) {
569   // Use RAX as temporary register.
570   as_.Mov<uint8_t>(Assembler::rax, arg);
571   as_.And<uint32_t>(Assembler::rax, 0b1'1111);
572   FeSetExceptions(Assembler::rax);
573 }
574 
575 template <>
576 inline void LiteTranslator::SetCsr<CsrName::kFrm>(uint8_t imm) {
577   as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>},
578                    static_cast<int8_t>(imm & kCsrMask<CsrName::kFrm>));
579   FeSetRoundImm(static_cast<int8_t>(imm & kCsrMask<CsrName::kFrm>));
580 }
581 
582 template <>
583 inline void LiteTranslator::SetCsr<CsrName::kFrm>(Register arg) {
584   // Use RCX as temporary register. We know it would be used by FeSetRound, too.
585   as_.Mov<uint8_t>(Assembler::rcx, arg);
586   as_.And<uint8_t>(Assembler::rcx, kCsrMask<CsrName::kFrm>);
587   as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>},
588                    Assembler::rcx);
589   FeSetRound(Assembler::rcx);
590 }
591 
592 template <>
593 inline void LiteTranslator::SetCsr<CsrName::kVxrm>(uint8_t imm) {
594   imm &= 0b11;
595   if (imm != 0b11) {
596     as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100);
597   }
598   if (imm != 0b00) {
599     as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, imm);
600   }
601 }
602 
603 template <>
604 inline void LiteTranslator::SetCsr<CsrName::kVxrm>(Register arg) {
605   as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100);
606   as_.And<uint8_t>(arg, 0b11);
607   as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, arg);
608 }
609 
610 template <>
611 inline void LiteTranslator::SetCsr<CsrName::kVxsat>(uint8_t imm) {
612   if (imm & 0b1) {
613     as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100);
614   } else {
615     as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b11);
616   }
617 }
618 
619 template <>
620 inline void LiteTranslator::SetCsr<CsrName::kVxsat>(Register arg) {
621   as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b11);
622   as_.Test<uint8_t>(arg, 1);
623   // Use RCX as temporary register.
624   as_.Setcc(Condition::kNotZero, as_.rcx);
625   as_.Shl<uint8_t>(as_.rcx, int8_t{2});
626   as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, as_.rcx);
627 }
628 
629 // There is no NanBoxing for Float64 except on CPUs with Float128 support.
630 template <>
631 inline LiteTranslator::FpRegister LiteTranslator::GetFRegAndUnboxNan<LiteTranslator::Float64>(
632     uint8_t reg) {
633   SimdRegister result = GetFpReg(reg);
634   return result;
635 }
636 
637 template <>
638 inline void LiteTranslator::NanBoxFpReg<LiteTranslator::Float64>(FpRegister) {}
639 
640 }  // namespace berberis
641 
642 #endif  // BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_
643