1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // This inline-header is intended be included into a source file testing the correctness of
18 // riscv64 instructions execution by an interpreter or a jit-translator.
19 //
20 // Assumptions list:
21 //
22 // 1. Includes
23 //
24 // #include "gtest/gtest.h"
25 //
26 // #include <cstdint>
27 // #include <initializer_list>
28 // #include <tuple>
29 // #include <vector>
30 //
31 // #include "berberis/base/bit_util.h"
32 // #include "berberis/guest_state/guest_addr.h"
33 // #include "berberis/guest_state/guest_state_riscv64.h"
34 //
35 // 2. RunOneInstruction is defined and implemented
36 //
37 // 3. TESTSUITE macro is defined
38 
39 #ifndef TESTSUITE
40 #error "TESTSUITE is undefined"
41 #endif
42 
43 #if !(defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR) || \
44       defined(TESTING_HEAVY_OPTIMIZER))
45 #error "One of TESTING_INTERPRETER, TESTING_LITE_TRANSLATOR, TESTING_HEAVY_OPTIMIZER must be defined
46 #endif
47 
48 namespace {
49 
50 // TODO(b/276787675): remove these files from interpreter when they are no longer needed there.
51 // Maybe extract FPvalueToFPReg and TupleMap to a separate header?
52 inline constexpr class FPValueToFPReg {
53  public:
54   uint64_t operator()(uint64_t value) const { return value; }
55   uint64_t operator()(float value) const {
56     return bit_cast<uint32_t>(value) | 0xffff'ffff'0000'0000;
57   }
58   uint64_t operator()(double value) const { return bit_cast<uint64_t>(value); }
59 } kFPValueToFPReg;
60 
61 // Helper function for the unit tests. Can be used to normalize values before processing.
62 //
63 // “container” is supposed to be container of tuples, e.g. std::initializer_list<std::tuple<…>>.
64 // “transformer” would be applied to the individual elements of tuples in the following loop:
65 //
66 //   for (auto& [value1, value2, value3] : TupleMap(container, [](auto value){ return …; })) {
67 //     …
68 //   }
69 //
70 // Returns vector of tuples where each tuple element is processed by transformer.
71 template <typename ContainerType, typename Transformer>
72 decltype(auto) TupleMap(const ContainerType& container, const Transformer& transformer) {
73   using std::begin;
74 
75   auto transform_tuple_func = [&transformer](auto&&... value) {
76     return std::tuple{transformer(value)...};
77   };
78 
79   std::vector<decltype(std::apply(transform_tuple_func, *begin(container)))> result;
80 
81   for (const auto& tuple : container) {
82     result.push_back(std::apply(transform_tuple_func, tuple));
83   }
84 
85   return result;
86 }
87 
88 void RaiseFeExceptForGuestFlags(uint8_t riscv_fflags) {
89   EXPECT_EQ(feclearexcept(FE_ALL_EXCEPT), 0);
90   if (riscv_fflags & FPFlags::NX) {
91     EXPECT_EQ(feraiseexcept(FE_INEXACT), 0);
92   }
93   if (riscv_fflags & FPFlags::UF) {
94     EXPECT_EQ(feraiseexcept(FE_UNDERFLOW), 0);
95   }
96   if (riscv_fflags & FPFlags::OF) {
97     EXPECT_EQ(feraiseexcept(FE_OVERFLOW), 0);
98   }
99   if (riscv_fflags & FPFlags::DZ) {
100     EXPECT_EQ(feraiseexcept(FE_DIVBYZERO), 0);
101   }
102   if (riscv_fflags & FPFlags::NV) {
103     EXPECT_EQ(feraiseexcept(FE_INVALID), 0);
104   }
105 }
106 
107 void TestFeExceptForGuestFlags(uint8_t riscv_fflags) {
108   EXPECT_EQ(bool(riscv_fflags & FPFlags::NX), bool(fetestexcept(FE_INEXACT)));
109   EXPECT_EQ(bool(riscv_fflags & FPFlags::UF), bool(fetestexcept(FE_UNDERFLOW)));
110   EXPECT_EQ(bool(riscv_fflags & FPFlags::OF), bool(fetestexcept(FE_OVERFLOW)));
111   EXPECT_EQ(bool(riscv_fflags & FPFlags::DZ), bool(fetestexcept(FE_DIVBYZERO)));
112   EXPECT_EQ(bool(riscv_fflags & FPFlags::NV), bool(fetestexcept(FE_INVALID)));
113 }
114 
115 }  // namespace
116 
117 class TESTSUITE : public ::testing::Test {
118  public:
119   TESTSUITE()
120       : state_{
121             .cpu = {.vtype = uint64_t{1} << 63, .frm = intrinsics::GuestModeFromHostRounding()}} {}
122 
123   template <uint8_t kInsnSize = 4>
124   void RunInstruction(const uint32_t& insn_bytes) {
125     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
126     EXPECT_TRUE(RunOneInstruction<kInsnSize>(&state_, state_.cpu.insn_addr + kInsnSize));
127   }
128 
129   // Compressed Instructions.
130 
131   template <RegisterType register_type, uint64_t expected_result, uint8_t kTargetReg>
132   void TestCompressedStore(uint16_t insn_bytes, uint64_t offset) {
133     store_area_ = 0;
134     SetXReg<kTargetReg>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - offset));
135     SetReg<register_type, 9>(state_.cpu, kDataToLoad);
136     RunInstruction<2>(insn_bytes);
137     EXPECT_EQ(store_area_, expected_result);
138   }
139 
140   template <RegisterType register_type, uint64_t expected_result, uint8_t kSourceReg>
141   void TestCompressedLoad(uint16_t insn_bytes, uint64_t offset) {
142     SetXReg<kSourceReg>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
143     RunInstruction<2>(insn_bytes);
144     EXPECT_EQ((GetReg<register_type, 9>(state_.cpu)), expected_result);
145   }
146 
147   void TestCAddi(uint16_t insn_bytes, uint64_t expected_increment) {
148     SetXReg<2>(state_.cpu, 1);
149     RunInstruction<2>(insn_bytes);
150     EXPECT_EQ(GetXReg<2>(state_.cpu), 1 + expected_increment);
151   }
152 
153   void TestCAddi16sp(uint16_t insn_bytes, uint64_t expected_offset) {
154     SetXReg<2>(state_.cpu, 1);
155     RunInstruction<2>(insn_bytes);
156     EXPECT_EQ(GetXReg<2>(state_.cpu), 1 + expected_offset);
157   }
158 
159   void TestLi(uint32_t insn_bytes, uint64_t expected_result) {
160     RunInstruction<2>(insn_bytes);
161     EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
162   }
163 
164   void TestCAddi4spn(uint16_t insn_bytes, uint64_t expected_offset) {
165     SetXReg<2>(state_.cpu, 1);
166     RunInstruction<2>(insn_bytes);
167     EXPECT_EQ(GetXReg<9>(state_.cpu), 1 + expected_offset);
168   }
169 
170   void TestCBeqzBnez(uint16_t insn_bytes, uint64_t value, int16_t expected_offset) {
171     auto code_start = ToGuestAddr(&insn_bytes);
172     state_.cpu.insn_addr = code_start;
173     if (expected_offset == 0) {
174       // Emit pending signal so we don't get stuck in an infinite loop.
175       state_.pending_signals_status = kPendingSignalsPresent;
176     } else {
177       state_.pending_signals_status = kPendingSignalsDisabled;
178     }
179     SetXReg<9>(state_.cpu, value);
180     EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + expected_offset));
181     EXPECT_EQ(state_.cpu.insn_addr, code_start + expected_offset);
182   }
183 
184   void TestCMiscAlu(uint16_t insn_bytes,
185                     std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) {
186     for (auto [arg1, arg2, expected_result] : args) {
187       SetXReg<8>(state_.cpu, arg1);
188       SetXReg<9>(state_.cpu, arg2);
189       RunInstruction<2>(insn_bytes);
190       EXPECT_EQ(GetXReg<8>(state_.cpu), expected_result);
191     }
192   }
193 
194   void TestCMiscAluSingleInput(uint16_t insn_bytes,
195                                std::initializer_list<std::tuple<uint64_t, uint64_t>> args) {
196     for (auto [arg1, expected_result] : args) {
197       SetXReg<8>(state_.cpu, arg1);
198       RunInstruction<2>(insn_bytes);
199       EXPECT_EQ(GetXReg<8>(state_.cpu), expected_result);
200     }
201   }
202 
203   void TestCMiscAluImm(uint16_t insn_bytes, uint64_t value, uint64_t expected_result) {
204     SetXReg<9>(state_.cpu, value);
205     RunInstruction<2>(insn_bytes);
206     EXPECT_EQ(GetXReg<9>(state_.cpu), expected_result);
207   }
208 
209   void TestCJ(uint16_t insn_bytes, int16_t expected_offset) {
210     auto code_start = ToGuestAddr(&insn_bytes);
211     state_.cpu.insn_addr = code_start;
212     if (expected_offset == 0) {
213       // Emit pending signal so we don't get stuck in an infinite loop.
214       state_.pending_signals_status = kPendingSignalsPresent;
215     } else {
216       state_.pending_signals_status = kPendingSignalsDisabled;
217     }
218     EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + expected_offset));
219     EXPECT_EQ(state_.cpu.insn_addr, code_start + expected_offset);
220   }
221 
222   void TestCOp(uint32_t insn_bytes,
223                std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) {
224     for (auto [arg1, arg2, expected_result] : args) {
225       SetXReg<1>(state_.cpu, arg1);
226       SetXReg<2>(state_.cpu, arg2);
227       RunInstruction<2>(insn_bytes);
228       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
229     }
230   }
231 
232   // Non-Compressed Instructions.
233 
234   void TestFFlagsOnGuestAndHost(uint8_t expected_guest_fflags) {
235     // Read fflags register.
236     RunInstruction(0x00102173);  // frflags x2
237     EXPECT_EQ(GetXReg<2>(state_.cpu), expected_guest_fflags);
238 
239     // Check corresponding fenv exception flags on host.
240     TestFeExceptForGuestFlags(expected_guest_fflags);
241   }
242 
243   void TestFCsr(uint32_t insn_bytes,
244                 uint8_t fcsr_to_set,
245                 uint8_t expected_fcsr,
246                 uint8_t expected_cpustate_frm) {
247     state_.cpu.frm =
248         0b100u;  // Pass non-zero frm to ensure that we don't accidentally rely on it being zero.
249     SetXReg<3>(state_.cpu, fcsr_to_set);
250     RunInstruction(insn_bytes);
251     EXPECT_EQ(GetXReg<2>(state_.cpu), 0b1000'0000ULL | expected_fcsr);
252     EXPECT_EQ(state_.cpu.frm, expected_cpustate_frm);
253   }
254 
255   void TestFrm(uint32_t insn_bytes, uint8_t frm_to_set, uint8_t expected_rm) {
256     state_.cpu.frm = 0b001u;
257     SetXReg<3>(state_.cpu, frm_to_set);
258     RunInstruction(insn_bytes);
259     EXPECT_EQ(GetXReg<2>(state_.cpu), 0b001u);
260     EXPECT_EQ(state_.cpu.frm, expected_rm);
261   }
262 
263   void TestOp(uint32_t insn_bytes,
264               std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) {
265     for (auto [arg1, arg2, expected_result] : args) {
266       SetXReg<2>(state_.cpu, arg1);
267       SetXReg<3>(state_.cpu, arg2);
268       RunInstruction(insn_bytes);
269       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
270     }
271   }
272 
273   template <typename... Types>
274   void TestOpFp(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) {
275     for (auto [arg1, arg2, expected_result] : TupleMap(args, kFPValueToFPReg)) {
276       SetFReg<2>(state_.cpu, arg1);
277       SetFReg<3>(state_.cpu, arg2);
278       RunInstruction(insn_bytes);
279       EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
280     }
281   }
282 
283   void TestOpImm(uint32_t insn_bytes,
284                  std::initializer_list<std::tuple<uint64_t, uint16_t, uint64_t>> args) {
285     for (auto [arg1, imm, expected_result] : args) {
286       CHECK_LE(imm, 63);
287       uint32_t insn_bytes_with_immediate = insn_bytes | imm << 20;
288       SetXReg<2>(state_.cpu, arg1);
289       RunInstruction(insn_bytes_with_immediate);
290       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
291     }
292   }
293 
294   void TestAuipc(uint32_t insn_bytes, uint64_t expected_offset) {
295     RunInstruction(insn_bytes);
296     EXPECT_EQ(GetXReg<1>(state_.cpu), expected_offset + ToGuestAddr(&insn_bytes));
297   }
298 
299   void TestLui(uint32_t insn_bytes, uint64_t expected_result) {
300     RunInstruction(insn_bytes);
301     EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
302   }
303 
304   void TestBranch(uint32_t insn_bytes,
305                   std::initializer_list<std::tuple<uint64_t, uint64_t, int8_t>> args) {
306     auto code_start = ToGuestAddr(&insn_bytes);
307     for (auto [arg1, arg2, expected_offset] : args) {
308       state_.cpu.insn_addr = code_start;
309       SetXReg<1>(state_.cpu, arg1);
310       SetXReg<2>(state_.cpu, arg2);
311       EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + expected_offset));
312       EXPECT_EQ(state_.cpu.insn_addr, code_start + expected_offset);
313     }
314   }
315 
316   void TestJumpAndLink(uint32_t insn_bytes, int8_t expected_offset) {
317     auto code_start = ToGuestAddr(&insn_bytes);
318     state_.cpu.insn_addr = code_start;
319     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + expected_offset));
320     EXPECT_EQ(state_.cpu.insn_addr, code_start + expected_offset);
321     EXPECT_EQ(GetXReg<1>(state_.cpu), code_start + 4);
322   }
323 
324   void TestLoad(uint32_t insn_bytes, uint64_t expected_result) {
325     // Offset is always 8.
326     SetXReg<2>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - 8));
327     RunInstruction(insn_bytes);
328     EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
329   }
330 
331   // kLinkRegisterOffsetIfUsed is size of instruction or 0 if instruction does not link register.
332   template <uint8_t kLinkRegisterOffsetIfUsed>
333   void TestJumpAndLinkRegister(uint32_t insn_bytes, uint64_t base_disp, int64_t expected_offset) {
334     auto code_start = ToGuestAddr(&insn_bytes);
335     state_.cpu.insn_addr = code_start;
336     SetXReg<1>(state_.cpu, 0);
337     SetXReg<2>(state_.cpu, code_start + base_disp);
338     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + expected_offset));
339     EXPECT_EQ(state_.cpu.insn_addr, code_start + expected_offset);
340     if constexpr (kLinkRegisterOffsetIfUsed == 0) {
341       EXPECT_EQ(GetXReg<1>(state_.cpu), 0UL);
342     } else {
343       EXPECT_EQ(GetXReg<1>(state_.cpu), code_start + kLinkRegisterOffsetIfUsed);
344     }
345   }
346 
347   void TestStore(uint32_t insn_bytes, uint64_t expected_result) {
348     // Offset is always 8.
349     SetXReg<1>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - 8));
350     SetXReg<2>(state_.cpu, kDataToStore);
351     store_area_ = 0;
352     RunInstruction(insn_bytes);
353     EXPECT_EQ(store_area_, expected_result);
354   }
355 
356   template <typename... Types>
357   void TestFma(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) {
358     for (auto [arg1, arg2, arg3, expected_result] : TupleMap(args, kFPValueToFPReg)) {
359       SetFReg<2>(state_.cpu, arg1);
360       SetFReg<3>(state_.cpu, arg2);
361       SetFReg<4>(state_.cpu, arg3);
362       RunInstruction(insn_bytes);
363       EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
364     }
365   }
366 
367 #if (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
368 
369   void TestAtomicLoad(uint32_t insn_bytes,
370                       const uint64_t* const data_to_load,
371                       uint64_t expected_result) {
372     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
373     SetXReg<1>(state_.cpu, ToGuestAddr(data_to_load));
374     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
375     EXPECT_EQ(GetXReg<2>(state_.cpu), expected_result);
376     EXPECT_EQ(state_.cpu.reservation_address, ToGuestAddr(data_to_load));
377     // We always reserve the full 64-bit range of the reservation address.
378     EXPECT_EQ(state_.cpu.reservation_value, *data_to_load);
379   }
380 
381   template <typename T>
382   void TestAtomicStore(uint32_t insn_bytes, T expected_result) {
383     store_area_ = ~uint64_t{0};
384     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
385     SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
386     SetXReg<2>(state_.cpu, kDataToStore);
387     SetXReg<3>(state_.cpu, 0xdeadbeef);
388     state_.cpu.reservation_address = ToGuestAddr(&store_area_);
389     state_.cpu.reservation_value = store_area_;
390     MemoryRegionReservation::SetOwner(ToGuestAddr(&store_area_), &state_.cpu);
391     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
392     EXPECT_EQ(static_cast<T>(store_area_), expected_result);
393     EXPECT_EQ(GetXReg<3>(state_.cpu), 0u);
394   }
395 
396   void TestAtomicStoreNoLoadFailure(uint32_t insn_bytes) {
397     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
398     SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
399     SetXReg<2>(state_.cpu, kDataToStore);
400     SetXReg<3>(state_.cpu, 0xdeadbeef);
401     store_area_ = 0;
402     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
403     EXPECT_EQ(store_area_, 0u);
404     EXPECT_EQ(GetXReg<3>(state_.cpu), 1u);
405   }
406 
407   void TestAtomicStoreDifferentLoadFailure(uint32_t insn_bytes) {
408     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
409     SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
410     SetXReg<2>(state_.cpu, kDataToStore);
411     SetXReg<3>(state_.cpu, 0xdeadbeef);
412     state_.cpu.reservation_address = ToGuestAddr(&kDataToStore);
413     state_.cpu.reservation_value = 0;
414     MemoryRegionReservation::SetOwner(ToGuestAddr(&kDataToStore), &state_.cpu);
415     store_area_ = 0;
416     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
417     EXPECT_EQ(store_area_, 0u);
418     EXPECT_EQ(GetXReg<3>(state_.cpu), 1u);
419   }
420 
421 #endif  // (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
422 
423   void TestAmo(uint32_t insn_bytes,
424                uint64_t arg1,
425                uint64_t arg2,
426                uint64_t expected_result,
427                uint64_t expected_memory) {
428     // Copy arg1 into store_area_
429     store_area_ = arg1;
430     SetXReg<2>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_)));
431     SetXReg<3>(state_.cpu, arg2);
432     RunInstruction(insn_bytes);
433     EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
434     EXPECT_EQ(store_area_, expected_memory);
435   }
436 
437   void TestAmo(uint32_t insn_bytes32, uint32_t insn_bytes64, uint64_t expected_memory) {
438     TestAmo(insn_bytes32,
439             0xffff'eeee'dddd'ccccULL,
440             0xaaaa'bbbb'cccc'ddddULL,
441             0xffff'ffff'dddd'ccccULL,
442             0xffff'eeee'0000'0000 | uint32_t(expected_memory));
443     TestAmo(insn_bytes64,
444             0xffff'eeee'dddd'ccccULL,
445             0xaaaa'bbbb'cccc'ddddULL,
446             0xffff'eeee'dddd'ccccULL,
447             expected_memory);
448   }
449 
450   template <typename... Types>
451   void TestFmvFloatToInteger(uint32_t insn_bytes,
452                              std::initializer_list<std::tuple<Types...>> args) {
453     for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) {
454       SetFReg<1>(state_.cpu, arg);
455       RunInstruction(insn_bytes);
456       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
457     }
458   }
459 
460   template <typename... Types>
461   void TestFmvIntegerToFloat(uint32_t insn_bytes,
462                              std::initializer_list<std::tuple<Types...>> args) {
463     for (auto [arg, expected_result] : args) {
464       SetXReg<1>(state_.cpu, arg);
465       RunInstruction(insn_bytes);
466       EXPECT_EQ(GetFReg<1>(state_.cpu), kFPValueToFPReg(expected_result));
467     }
468   }
469 
470   template <typename... Types>
471   void TestOpFpGpRegisterTarget(uint32_t insn_bytes,
472                                 std::initializer_list<std::tuple<Types...>> args) {
473     for (auto [arg1, arg2, expected_result] : TupleMap(args, kFPValueToFPReg)) {
474       SetFReg<2>(state_.cpu, arg1);
475       SetFReg<3>(state_.cpu, arg2);
476       RunInstruction(insn_bytes);
477       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
478     }
479   }
480 
481   template <typename... Types>
482   void TestOpFpGpRegisterTargetSingleInput(uint32_t insn_bytes,
483                                            std::initializer_list<std::tuple<Types...>> args) {
484     for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) {
485       SetFReg<2>(state_.cpu, arg);
486       RunInstruction(insn_bytes);
487       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
488     }
489   }
490 
491   template <typename... Types>
492   void TestOpFpGpRegisterSourceSingleInput(uint32_t insn_bytes,
493                                            std::initializer_list<std::tuple<Types...>> args) {
494     for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) {
495       SetXReg<2>(state_.cpu, arg);
496       RunInstruction(insn_bytes);
497       EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
498     }
499   }
500 
501   template <typename... Types>
502   void TestOpFpSingleInput(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) {
503     for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) {
504       SetFReg<2>(state_.cpu, arg);
505       RunInstruction(insn_bytes);
506       EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
507     }
508   }
509 
510   void TestLoadFp(uint32_t insn_bytes, uint64_t expected_result) {
511     // Offset is always 8.
512     SetXReg<2>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - 8));
513     RunInstruction(insn_bytes);
514     EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
515   }
516 
517   void TestStoreFp(uint32_t insn_bytes, uint64_t expected_result) {
518     // Offset is always 8.
519     SetXReg<1>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - 8));
520     SetFReg<2>(state_.cpu, kDataToStore);
521     store_area_ = 0;
522     RunInstruction(insn_bytes);
523     EXPECT_EQ(store_area_, expected_result);
524   }
525 
526   void TestVsetvl(
527       uint32_t insn_bytes,
528       std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>>
529           args) {
530     for (auto [vl_orig, vtype_orig, avl, vtype_new, vl_expected, vtype_expected] : args) {
531       state_.cpu.vl = vl_orig;
532       state_.cpu.vtype = vtype_orig;
533       SetXReg<1>(state_.cpu, ~0ULL);
534       SetXReg<2>(state_.cpu, avl);
535       SetXReg<3>(state_.cpu, vtype_new);
536       RunInstruction(insn_bytes);
537       if (insn_bytes & 0b11111'0000000) {
538         EXPECT_EQ(GetXReg<1>(state_.cpu), vl_expected);
539       } else {
540         EXPECT_EQ(GetXReg<1>(state_.cpu), ~0ULL);
541       }
542       EXPECT_EQ(state_.cpu.vl, vl_expected);
543       EXPECT_EQ(state_.cpu.vtype, vtype_expected);
544     }
545   }
546 
547  protected:
548   static constexpr uint64_t kDataToLoad{0xffffeeeeddddccccULL};
549   static constexpr uint64_t kDataToStore = kDataToLoad;
550   uint64_t store_area_;
551   ThreadState state_;
552 };
553 
554 // Tests for Compressed Instructions.
555 template <uint16_t opcode, auto execute_instruction_func>
556 void TestCompressedLoadOrStore32bit(TESTSUITE* that) {
557   union {
558     uint16_t offset;
559     struct [[gnu::packed]] {
560       uint8_t : 2;
561       uint8_t i2 : 1;
562       uint8_t i3_i5 : 3;
563       uint8_t i6 : 1;
564     } i_bits;
565   };
566   for (offset = uint8_t{0}; offset < uint8_t{128}; offset += 4) {
567     union {
568       int16_t parcel;
569       struct [[gnu::packed]] {
570         uint8_t low_opcode : 2;
571         uint8_t rd : 3;
572         uint8_t i6 : 1;
573         uint8_t i2 : 1;
574         uint8_t rs : 3;
575         uint8_t i3_i5 : 3;
576         uint8_t high_opcode : 3;
577       } __attribute__((__packed__));
578     } o_bits = {
579         .low_opcode = 0b00,
580         .rd = 1,
581         .i6 = i_bits.i6,
582         .i2 = i_bits.i2,
583         .rs = 0,
584         .i3_i5 = i_bits.i3_i5,
585         .high_opcode = 0b000,
586     };
587     (that->*execute_instruction_func)(o_bits.parcel | opcode, offset);
588   }
589 }
590 
591 TEST_F(TESTSUITE, CompressedLoadAndStores32bit) {
592   // c.Lw
593   TestCompressedLoadOrStore32bit<
594       0b010'000'000'00'000'00,
595       &TESTSUITE::TestCompressedLoad<RegisterType::kReg,
596                                      static_cast<uint64_t>(static_cast<int32_t>(kDataToLoad)),
597                                      8>>(this);
598   // c.Sw
599   TestCompressedLoadOrStore32bit<
600       0b110'000'000'00'000'00,
601       &TESTSUITE::TestCompressedStore<RegisterType::kReg,
602                                       static_cast<uint64_t>(static_cast<uint32_t>(kDataToLoad)),
603                                       8>>(this);
604 }
605 
606 template <uint16_t opcode, auto execute_instruction_func>
607 void TestCompressedLoadOrStore64bit(TESTSUITE* that) {
608   union {
609     uint16_t offset;
610     struct [[gnu::packed]] {
611       uint8_t : 3;
612       uint8_t i3_i5 : 3;
613       uint8_t i6_i7 : 2;
614     } i_bits;
615   };
616   for (offset = int16_t{0}; offset < int16_t{256}; offset += 8) {
617     union {
618       int16_t parcel;
619       struct [[gnu::packed]] {
620         uint8_t low_opcode : 2;
621         uint8_t rd : 3;
622         uint8_t i6_i7 : 2;
623         uint8_t rs : 3;
624         uint8_t i3_i5 : 3;
625         uint8_t high_opcode : 3;
626       };
627     } o_bits = {
628         .low_opcode = 0b00,
629         .rd = 1,
630         .i6_i7 = i_bits.i6_i7,
631         .rs = 0,
632         .i3_i5 = i_bits.i3_i5,
633         .high_opcode = 0b000,
634     };
635     (that->*execute_instruction_func)(o_bits.parcel | opcode, offset);
636   }
637 }
638 
639 TEST_F(TESTSUITE, CompressedLoadAndStores) {
640   // c.Ld
641   TestCompressedLoadOrStore64bit<
642       0b011'000'000'00'000'00,
643       &TESTSUITE::TestCompressedLoad<RegisterType::kReg, kDataToLoad, 8>>(this);
644   // c.Sd
645   TestCompressedLoadOrStore64bit<
646       0b111'000'000'00'000'00,
647       &TESTSUITE::TestCompressedStore<RegisterType::kReg, kDataToLoad, 8>>(this);
648   // c.Fld
649   TestCompressedLoadOrStore64bit<
650       0b001'000'000'00'000'00,
651       &TESTSUITE::TestCompressedLoad<RegisterType::kFpReg, kDataToLoad, 8>>(this);
652   // c.Fsd
653   TestCompressedLoadOrStore64bit<
654       0b101'000'000'00'000'00,
655       &TESTSUITE::TestCompressedStore<RegisterType::kFpReg, kDataToLoad, 8>>(this);
656 }
657 
658 TEST_F(TESTSUITE, TestCompressedStore32bitsp) {
659   union {
660     uint16_t offset;
661     struct [[gnu::packed]] {
662       uint8_t : 2;
663       uint8_t i2_i5 : 4;
664       uint8_t i6_i7 : 2;
665     } i_bits;
666   };
667   for (offset = uint16_t{0}; offset < uint16_t{256}; offset += 4) {
668     union {
669       int16_t parcel;
670       struct [[gnu::packed]] {
671         uint8_t low_opcode : 2;
672         uint8_t rs2 : 5;
673         uint8_t i6_i7 : 2;
674         uint8_t i2_i5 : 4;
675         uint8_t high_opcode : 3;
676       };
677     } o_bits = {
678         .low_opcode = 0b10,
679         .rs2 = 9,
680         .i6_i7 = i_bits.i6_i7,
681         .i2_i5 = i_bits.i2_i5,
682         .high_opcode = 0b110,
683     };
684     // c.Swsp
685     TestCompressedStore<RegisterType::kReg,
686                         static_cast<uint64_t>(static_cast<uint32_t>(kDataToStore)),
687                         2>(o_bits.parcel, offset);
688   }
689 }
690 
691 template <uint16_t opcode, auto execute_instruction_func>
692 void TestCompressedStore64bitsp(TESTSUITE* that) {
693   union {
694     uint16_t offset;
695     struct [[gnu::packed]] {
696       uint8_t : 3;
697       uint8_t i3_i5 : 3;
698       uint8_t i6_i8 : 3;
699     } i_bits;
700   };
701   for (offset = uint16_t{0}; offset < uint16_t{512}; offset += 8) {
702     union {
703       int16_t parcel;
704       struct [[gnu::packed]] {
705         uint8_t low_opcode : 2;
706         uint8_t rs2 : 5;
707         uint8_t i6_i8 : 3;
708         uint8_t i3_i5 : 3;
709         uint8_t high_opcode : 3;
710       };
711     } o_bits = {
712         .low_opcode = 0b10,
713         .rs2 = 9,
714         .i6_i8 = i_bits.i6_i8,
715         .i3_i5 = i_bits.i3_i5,
716         .high_opcode = 0b101,
717     };
718     (that->*execute_instruction_func)(o_bits.parcel | opcode, offset);
719   }
720 }
721 
722 TEST_F(TESTSUITE, TestCompressedStore64bitsp) {
723   // c.Sdsp
724   TestCompressedStore64bitsp<0b011'000'000'00'000'00,
725                              &TESTSUITE::TestCompressedStore<RegisterType::kReg, kDataToStore, 2>>(
726       this);
727   // c.Fsdsp
728   TestCompressedStore64bitsp<
729       0b001'000'000'00'000'00,
730       &TESTSUITE::TestCompressedStore<RegisterType::kFpReg, kDataToStore, 2>>(this);
731 }
732 
733 TEST_F(TESTSUITE, TestCompressedLoad32bitsp) {
734   union {
735     uint16_t offset;
736     struct [[gnu::packed]] {
737       uint8_t : 2;
738       uint8_t i2_i4 : 3;
739       uint8_t i5 : 1;
740       uint8_t i6_i7 : 2;
741     } i_bits;
742   };
743   for (offset = uint16_t{0}; offset < uint16_t{256}; offset += 4) {
744     union {
745       int16_t parcel;
746       struct [[gnu::packed]] {
747         uint8_t low_opcode : 2;
748         uint8_t i6_i7 : 2;
749         uint8_t i2_i4 : 3;
750         uint8_t rd : 5;
751         uint8_t i5 : 1;
752         uint8_t high_opcode : 3;
753       };
754     } o_bits = {
755         .low_opcode = 0b10,
756         .i6_i7 = i_bits.i6_i7,
757         .i2_i4 = i_bits.i2_i4,
758         .rd = 9,
759         .i5 = i_bits.i5,
760         .high_opcode = 0b010,
761     };
762     // c.Lwsp
763     TestCompressedLoad<RegisterType::kReg,
764                        static_cast<uint64_t>(static_cast<int32_t>(kDataToLoad)),
765                        2>(o_bits.parcel, offset);
766   }
767 }
768 
769 template <uint16_t opcode, auto execute_instruction_func>
770 void TestCompressedLoad64bitsp(TESTSUITE* that) {
771   union {
772     uint16_t offset;
773     struct [[gnu::packed]] {
774       uint8_t : 3;
775       uint8_t i3_i4 : 2;
776       uint8_t i5 : 1;
777       uint8_t i6_i8 : 3;
778     } i_bits;
779   };
780   for (offset = uint16_t{0}; offset < uint16_t{512}; offset += 8) {
781     union {
782       int16_t parcel;
783       struct [[gnu::packed]] {
784         uint8_t low_opcode : 2;
785         uint8_t i6_i8 : 3;
786         uint8_t i3_i4 : 2;
787         uint8_t rd : 5;
788         uint8_t i5 : 1;
789         uint8_t high_opcode : 3;
790       };
791     } o_bits = {
792         .low_opcode = 0b10,
793         .i6_i8 = i_bits.i6_i8,
794         .i3_i4 = i_bits.i3_i4,
795         .rd = 9,
796         .i5 = i_bits.i5,
797         .high_opcode = 0b001,
798     };
799     (that->*execute_instruction_func)(o_bits.parcel | opcode, offset);
800   }
801 }
802 
803 TEST_F(TESTSUITE, TestCompressedLoad64bitsp) {
804   // c.Ldsp
805   TestCompressedLoad64bitsp<0b011'000'000'00'000'00,
806                             &TESTSUITE::TestCompressedLoad<RegisterType::kReg, kDataToLoad, 2>>(
807       this);
808   // c.Fldsp
809   TestCompressedLoad64bitsp<0b001'000'000'00'000'00,
810                             &TESTSUITE::TestCompressedLoad<RegisterType::kFpReg, kDataToLoad, 2>>(
811       this);
812 }
813 
814 TEST_F(TESTSUITE, CAddi) {
815   union {
816     int8_t offset;
817     struct [[gnu::packed]] {
818       uint8_t i4_i0 : 5;
819       uint8_t i5 : 1;
820     } i_bits;
821   };
822   for (offset = int8_t{-32}; offset < int8_t{31}; offset++) {
823     union {
824       int16_t parcel;
825       struct [[gnu::packed]] {
826         uint8_t low_opcode : 2;
827         uint8_t i4_i0 : 5;
828         uint8_t r : 5;
829         uint8_t i5 : 1;
830         uint8_t high_opcode : 3;
831       } __attribute__((__packed__));
832     } o_bits = {
833         .low_opcode = 0,
834         .i4_i0 = i_bits.i4_i0,
835         .r = 2,
836         .i5 = i_bits.i5,
837         .high_opcode = 0,
838     };
839     // c.Addi
840     TestCAddi(o_bits.parcel | 0b0000'0000'0000'0001, offset);
841     // c.Addiw
842     TestCAddi(o_bits.parcel | 0b0010'0000'0000'0001, offset);
843   }
844 }
845 
846 TEST_F(TESTSUITE, CAddi16sp) {
847   union {
848     int16_t offset;
849     struct [[gnu::packed]] {
850       uint8_t : 4;
851       uint8_t i4 : 1;
852       uint8_t i5 : 1;
853       uint8_t i6 : 1;
854       uint8_t i7 : 1;
855       uint8_t i8 : 1;
856       uint8_t i9 : 1;
857     } i_bits;
858   };
859   for (offset = int16_t{-512}; offset < int16_t{512}; offset += 16) {
860     union {
861       int16_t parcel;
862       struct [[gnu::packed]] {
863         uint8_t low_opcode : 2;
864         uint8_t i5 : 1;
865         uint8_t i7 : 1;
866         uint8_t i8 : 1;
867         uint8_t i6 : 1;
868         uint8_t i4 : 1;
869         uint8_t rd : 5;
870         uint8_t i9 : 1;
871         uint8_t high_opcode : 3;
872       };
873     } o_bits = {
874         .low_opcode = 0b01,
875         .i5 = i_bits.i5,
876         .i7 = i_bits.i7,
877         .i8 = i_bits.i8,
878         .i6 = i_bits.i6,
879         .i4 = i_bits.i4,
880         .rd = 2,
881         .i9 = i_bits.i9,
882         .high_opcode = 0b011,
883     };
884     TestCAddi16sp(o_bits.parcel, offset);
885   }
886 }
887 
888 TEST_F(TESTSUITE, CLui) {
889   union {
890     int32_t offset;
891     struct [[gnu::packed]] {
892       uint8_t : 12;
893       uint8_t i12_i16 : 5;
894       uint8_t i17 : 1;
895     } i_bits;
896   };
897   for (offset = int32_t{-131072}; offset < int32_t{131072}; offset += 4096) {
898     union {
899       int16_t parcel;
900       struct [[gnu::packed]] {
901         uint8_t low_opcode : 2;
902         uint8_t i12_i16 : 5;
903         uint8_t rd : 5;
904         uint8_t i17 : 1;
905         uint8_t high_opcode : 3;
906       };
907     } o_bits = {
908         .low_opcode = 0b01,
909         .i12_i16 = i_bits.i12_i16,
910         .rd = 1,
911         .i17 = i_bits.i17,
912         .high_opcode = 0b011,
913     };
914     TestLi(o_bits.parcel, offset);
915   }
916 }
917 
918 TEST_F(TESTSUITE, CLi) {
919   union {
920     int8_t offset;
921     struct [[gnu::packed]] {
922       uint8_t i0_i4 : 5;
923       uint8_t i5 : 1;
924     } i_bits;
925   };
926   for (offset = int8_t{-32}; offset < int8_t{32}; offset++) {
927     union {
928       int16_t parcel;
929       struct [[gnu::packed]] {
930         uint8_t low_opcode : 2;
931         uint8_t i0_i4 : 5;
932         uint8_t rd : 5;
933         uint8_t i5 : 1;
934         uint8_t high_opcode : 3;
935       };
936     } o_bits = {
937         .low_opcode = 0b01,
938         .i0_i4 = i_bits.i0_i4,
939         .rd = 1,
940         .i5 = i_bits.i5,
941         .high_opcode = 0b010,
942     };
943     TestLi(o_bits.parcel, offset);
944   }
945 }
946 
947 TEST_F(TESTSUITE, CAddi4spn) {
948   union {
949     int16_t offset;
950     struct [[gnu::packed]] {
951       uint8_t : 2;
952       uint8_t i2 : 1;
953       uint8_t i3 : 1;
954       uint8_t i4 : 1;
955       uint8_t i5 : 1;
956       uint8_t i6 : 1;
957       uint8_t i7 : 1;
958       uint8_t i8 : 1;
959       uint8_t i9 : 1;
960     } i_bits;
961   };
962   for (offset = int16_t{4}; offset < int16_t{1024}; offset += 4) {
963     union {
964       int16_t parcel;
965       struct [[gnu::packed]] {
966         uint8_t low_opcode : 2;
967         uint8_t rd : 3;
968         uint8_t i3 : 1;
969         uint8_t i2 : 1;
970         uint8_t i6 : 1;
971         uint8_t i7 : 1;
972         uint8_t i8 : 1;
973         uint8_t i9 : 1;
974         uint8_t i4 : 1;
975         uint8_t i5 : 1;
976         uint8_t high_opcode : 3;
977       };
978     } o_bits = {
979         .low_opcode = 0b00,
980         .rd = 1,
981         .i3 = i_bits.i3,
982         .i2 = i_bits.i2,
983         .i6 = i_bits.i6,
984         .i7 = i_bits.i7,
985         .i8 = i_bits.i8,
986         .i9 = i_bits.i9,
987         .i4 = i_bits.i4,
988         .i5 = i_bits.i5,
989         .high_opcode = 0b000,
990     };
991     TestCAddi4spn(o_bits.parcel, offset);
992   }
993 }
994 
995 TEST_F(TESTSUITE, CBeqzBnez) {
996   union {
997     int16_t offset;
998     struct [[gnu::packed]] {
999       uint8_t : 1;
1000       uint8_t i1 : 1;
1001       uint8_t i2 : 1;
1002       uint8_t i3 : 1;
1003       uint8_t i4 : 1;
1004       uint8_t i5 : 1;
1005       uint8_t i6 : 1;
1006       uint8_t i7 : 1;
1007       uint8_t i8 : 1;
1008     } i_bits;
1009   };
1010   for (offset = int16_t{-256}; offset < int16_t{256}; offset += 8) {
1011     union {
1012       int16_t parcel;
1013       struct [[gnu::packed]] {
1014         uint8_t low_opcode : 2;
1015         uint8_t i5 : 1;
1016         uint8_t i1 : 1;
1017         uint8_t i2 : 1;
1018         uint8_t i6 : 1;
1019         uint8_t i7 : 1;
1020         uint8_t rs : 3;
1021         uint8_t i3 : 1;
1022         uint8_t i4 : 1;
1023         uint8_t i8 : 1;
1024         uint8_t high_opcode : 3;
1025       };
1026     } o_bits = {
1027         .low_opcode = 0,
1028         .i5 = i_bits.i5,
1029         .i1 = i_bits.i1,
1030         .i2 = i_bits.i2,
1031         .i6 = i_bits.i6,
1032         .i7 = i_bits.i7,
1033         .rs = 1,
1034         .i3 = i_bits.i3,
1035         .i4 = i_bits.i4,
1036         .i8 = i_bits.i8,
1037         .high_opcode = 0,
1038     };
1039     TestCBeqzBnez(o_bits.parcel | 0b1100'0000'0000'0001, 0, offset);
1040     TestCBeqzBnez(o_bits.parcel | 0b1110'0000'0000'0001, 1, offset);
1041   }
1042 }
1043 
1044 TEST_F(TESTSUITE, CMiscAluInstructions) {
1045   // c.Sub
1046   TestCMiscAlu(0x8c05, {{42, 23, 19}});
1047   // c.Xor
1048   TestCMiscAlu(0x8c25, {{0b0101, 0b0011, 0b0110}});
1049   // c.Or
1050   TestCMiscAlu(0x8c45, {{0b0101, 0b0011, 0b0111}});
1051   // c.And
1052   TestCMiscAlu(0x8c65, {{0b0101, 0b0011, 0b0001}});
1053   // c.SubW
1054   TestCMiscAlu(0x9c05, {{42, 23, 19}});
1055   // c.AddW
1056   TestCMiscAlu(0x9c25, {{19, 23, 42}});
1057 }
1058 
1059 TEST_F(TESTSUITE, CBitManipInstructions) {
1060   // c.zext.h
1061   TestCMiscAluSingleInput(0x9c69, {{0xffff'ffff'ffff'fffe, 0xfffe}});
1062   // c.zext.w
1063   TestCMiscAluSingleInput(0x9c71, {{0xffff'ffff'ffff'fffe, 0xffff'fffe}});
1064   // c.zext.b
1065   TestCMiscAluSingleInput(0x9c61, {{0xffff'ffff'ffff'fffe, 0xfe}});
1066   // c.sext.b
1067   TestCMiscAluSingleInput(0x9c65, {{0b1111'1110, 0xffff'ffff'ffff'fffe}});
1068   // c.sext.h
1069   TestCMiscAluSingleInput(0x9c6d, {{0b1111'1111'1111'1110, 0xffff'ffff'ffff'fffe}});
1070 }
1071 
1072 TEST_F(TESTSUITE, CMiscAluImm) {
1073   union {
1074     uint8_t uimm;
1075     // Note: c.Andi uses sign-extended immediate while c.Srli/c.cSrain need zero-extended one.
1076     // If we store the value into uimm and read from imm compiler would do correct conversion.
1077     int8_t imm : 6;
1078     struct [[gnu::packed]] {
1079       uint8_t i0_i4 : 5;
1080       uint8_t i5 : 1;
1081     } i_bits;
1082   };
1083   for (uimm = uint8_t{0}; uimm < uint8_t{64}; uimm++) {
1084     union {
1085       int16_t parcel;
1086       struct [[gnu::packed]] {
1087         uint8_t low_opcode : 2;
1088         uint8_t i0_i4 : 5;
1089         uint8_t r : 3;
1090         uint8_t mid_opcode : 2;
1091         uint8_t i5 : 1;
1092         uint8_t high_opcode : 3;
1093       };
1094     } o_bits = {
1095         .low_opcode = 0,
1096         .i0_i4 = i_bits.i0_i4,
1097         .r = 1,
1098         .mid_opcode = 0,
1099         .i5 = i_bits.i5,
1100         .high_opcode = 0,
1101     };
1102     // The o_bits.parcel here doesn't include opcodes and we are adding it in the function call.
1103     // c.Srli
1104     TestCMiscAluImm(o_bits.parcel | 0b1000'0000'0000'0001,
1105                     0x8000'0000'0000'0000ULL,
1106                     0x8000'0000'0000'0000ULL >> uimm);
1107     // c.Srai
1108     TestCMiscAluImm(o_bits.parcel | 0b1000'0100'0000'0001,
1109                     0x8000'0000'0000'0000LL,
1110                     ~0 ^ ((0x8000'0000'0000'0000 ^ ~0) >>
1111                           uimm));  // Avoid shifting negative numbers to avoid UB
1112     // c.Andi
1113     TestCMiscAluImm(o_bits.parcel | 0b1000'1000'0000'0001,
1114                     0xffff'ffff'ffff'ffffULL,
1115                     0xffff'ffff'ffff'ffffULL & imm);
1116 
1117     // Previous instructions use 3-bit register encoding where 0b000 is r8, 0b001 is r9, etc.
1118     // c.Slli uses 5-bit register encoding. Since we want it to also work with r9 in the test body
1119     // we add 0b01000 to register bits to mimic that shift-by-8.
1120     // c.Slli                                   vvvvvv adds 8 to r to handle rd' vs rd difference.
1121     TestCMiscAluImm(o_bits.parcel | 0b0000'0100'0000'0010,
1122                     0x0000'0000'0000'0001ULL,
1123                     0x0000'0000'0000'0001ULL << uimm);
1124   }
1125 }
1126 
1127 TEST_F(TESTSUITE, CJ) {
1128   union {
1129     int16_t offset;
1130     struct [[gnu::packed]] {
1131       uint8_t : 1;
1132       uint8_t i1 : 1;
1133       uint8_t i2 : 1;
1134       uint8_t i3 : 1;
1135       uint8_t i4 : 1;
1136       uint8_t i5 : 1;
1137       uint8_t i6 : 1;
1138       uint8_t i7 : 1;
1139       uint8_t i8 : 1;
1140       uint8_t i9 : 1;
1141       uint8_t i10 : 1;
1142       uint8_t i11 : 1;
1143     } i_bits;
1144   };
1145   for (offset = int16_t{-2048}; offset < int16_t{2048}; offset += 2) {
1146     union {
1147       int16_t parcel;
1148       struct [[gnu::packed]] {
1149         uint8_t low_opcode : 2;
1150         uint8_t i5 : 1;
1151         uint8_t i1 : 1;
1152         uint8_t i2 : 1;
1153         uint8_t i3 : 1;
1154         uint8_t i7 : 1;
1155         uint8_t i6 : 1;
1156         uint8_t i10 : 1;
1157         uint8_t i8 : 1;
1158         uint8_t i9 : 1;
1159         uint8_t i4 : 1;
1160         uint8_t i11 : 1;
1161         uint8_t high_opcode : 3;
1162       };
1163     } o_bits = {
1164         .low_opcode = 0b01,
1165         .i5 = i_bits.i5,
1166         .i1 = i_bits.i1,
1167         .i2 = i_bits.i2,
1168         .i3 = i_bits.i3,
1169         .i7 = i_bits.i7,
1170         .i6 = i_bits.i6,
1171         .i10 = i_bits.i10,
1172         .i8 = i_bits.i8,
1173         .i9 = i_bits.i9,
1174         .i4 = i_bits.i4,
1175         .i11 = i_bits.i11,
1176         .high_opcode = 0b101,
1177     };
1178     TestCJ(o_bits.parcel, offset);
1179   }
1180 }
1181 
1182 TEST_F(TESTSUITE, CJalr) {
1183   // C.Jr
1184   TestJumpAndLinkRegister<0>(0x8102, 42, 42);
1185   // C.Mv
1186   TestCOp(0x808a, {{0, 1, 1}});
1187   // C.Jalr
1188   TestJumpAndLinkRegister<2>(0x9102, 42, 42);
1189   // C.Add
1190   TestCOp(0x908a, {{1, 2, 3}});
1191 }
1192 
1193 // Tests for Non-Compressed Instructions.
1194 
1195 TEST_F(TESTSUITE, CsrInstructions) {
1196   ScopedRoundingMode scoped_rounding_mode;
1197   // Csrrw x2, frm, 2
1198   TestFrm(0x00215173, 0, 2);
1199   // Csrrsi x2, frm, 2
1200   TestFrm(0x00216173, 0, 3);
1201   // Csrrci x2, frm, 1
1202   TestFrm(0x0020f173, 0, 0);
1203 }
1204 
1205 constexpr uint8_t kFPFlagsAll = FPFlags::NX | FPFlags::UF | FPFlags::OF | FPFlags::DZ | FPFlags::NV;
1206 // Ensure all trailing bits are set in kFPFlagsAll so that all combinations are possible.
1207 static_assert(__builtin_ctz(~kFPFlagsAll) == 5);
1208 
1209 // Automatically saves and restores fenv throughout the lifetime of a parent scope.
1210 class ScopedFenv {
1211  public:
1212   ScopedFenv() { EXPECT_EQ(fegetenv(&env_), 0); }
1213   ~ScopedFenv() { EXPECT_EQ(fesetenv(&env_), 0); }
1214 
1215  private:
1216   fenv_t env_;
1217 };
1218 
1219 TEST_F(TESTSUITE, FFlagsRead) {
1220   ScopedFenv fenv;
1221   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1222     RaiseFeExceptForGuestFlags(fflags);
1223     RunInstruction(0x00102173);  // frflags x2
1224     EXPECT_EQ(GetXReg<2>(state_.cpu), fflags);
1225   }
1226 }
1227 
1228 TEST_F(TESTSUITE, FFlagsSwap) {
1229   ScopedFenv fenv;
1230   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1231     RaiseFeExceptForGuestFlags(fflags);
1232     // After swapping in 0 for flags, read fflags to verify.
1233     SetXReg<3>(state_.cpu, 0);
1234     RunInstruction(0x00119173);  // fsflags x2, x3
1235     EXPECT_EQ(GetXReg<2>(state_.cpu), fflags);
1236     TestFFlagsOnGuestAndHost(0u);
1237   }
1238 }
1239 
1240 TEST_F(TESTSUITE, FFlagsSwapImmediate) {
1241   ScopedFenv fenv;
1242   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1243     RaiseFeExceptForGuestFlags(fflags);
1244     // After swapping in 0 for flags, read fflags to verify.
1245     RunInstruction(0x00105173);  // fsflags x2, 0
1246     EXPECT_EQ(GetXReg<2>(state_.cpu), fflags);
1247     TestFFlagsOnGuestAndHost(0u);
1248   }
1249 }
1250 
1251 TEST_F(TESTSUITE, FFlagsWrite) {
1252   ScopedFenv fenv;
1253   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1254     SetXReg<3>(state_.cpu, fflags);
1255     RunInstruction(0x00119073);  // fsflags x3
1256     TestFFlagsOnGuestAndHost(fflags);
1257   }
1258 }
1259 
1260 TEST_F(TESTSUITE, FFlagsWriteImmediate) {
1261   ScopedFenv fenv;
1262   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1263     RunInstruction(0x00105073 | fflags << 15);  // fsflagsi 0 (+ fflags)
1264     TestFFlagsOnGuestAndHost(fflags);
1265   }
1266 }
1267 
1268 TEST_F(TESTSUITE, FFlagsClearBits) {
1269   ScopedFenv fenv;
1270   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1271     RaiseFeExceptForGuestFlags(kFPFlagsAll);
1272     SetXReg<3>(state_.cpu, fflags);
1273     RunInstruction(0x0011b073);  // csrc fflags, x3
1274     // Read fflags to verify previous bitwise clear operation.
1275     TestFFlagsOnGuestAndHost(static_cast<uint8_t>(~fflags & kFPFlagsAll));
1276   }
1277 }
1278 
1279 TEST_F(TESTSUITE, FFlagsClearBitsImmediate) {
1280   ScopedFenv fenv;
1281   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1282     RaiseFeExceptForGuestFlags(kFPFlagsAll);
1283     RunInstruction(0x00107073 | fflags << 15);  // csrci fflags, 0 (+ fflags)
1284     // Read fflags to verify previous bitwise clear operation.
1285     TestFFlagsOnGuestAndHost(static_cast<uint8_t>(~fflags & kFPFlagsAll));
1286   }
1287 }
1288 
1289 TEST_F(TESTSUITE, FCsrRegister) {
1290   ScopedFenv fenv;
1291   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1292     RaiseFeExceptForGuestFlags(fflags);
1293 
1294     // Read and verify fflags, then replace with all flags.
1295     TestFCsr(0x00319173 /* fscsr x2,x3 */, fflags, fflags, 0);
1296 
1297     // Only read fcsr and verify fflags.
1298     TestFCsr(0x00302173 /* frcsr x2 */, /* ignored */ 0, fflags, /* expected_frm= */ 0b100u);
1299   }
1300 
1301   for (bool immediate_source : {true, false}) {
1302     for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1303       EXPECT_EQ(feclearexcept(FE_ALL_EXCEPT), 0);
1304       if (immediate_source) {
1305         TestFCsr(0x00305173 /* csrrwi x2,fcsr,0 */ | (fflags << 15), 0, 0, 0);
1306       } else {
1307         TestFCsr(0x00319173 /* fscsr x2,x3 */, 0b100'0000 | fflags, 0, /* expected_frm= */ 0b010u);
1308       }
1309       TestFFlagsOnGuestAndHost(fflags);
1310     }
1311   }
1312 }
1313 
1314 TEST_F(TESTSUITE, FsrRegister) {
1315   ScopedRoundingMode scoped_rounding_mode;
1316   int rounding[][2] = {{0, FE_TONEAREST},
1317                        {1, FE_TOWARDZERO},
1318                        {2, FE_DOWNWARD},
1319                        {3, FE_UPWARD},
1320                        {4, FE_TOWARDZERO},
1321                        // Only low three bits must be affecting output (for forward compatibility).
1322                        {8, FE_TONEAREST},
1323                        {9, FE_TOWARDZERO},
1324                        {10, FE_DOWNWARD},
1325                        {11, FE_UPWARD},
1326                        {12, FE_TOWARDZERO}};
1327   for (bool immediate_source : {true, false}) {
1328     for (auto [guest_rounding, host_rounding] : rounding) {
1329       if (immediate_source) {
1330         TestFrm(0x00205173 | (guest_rounding << 15), 0, guest_rounding & 0b111);
1331       } else {
1332         TestFrm(0x00219173, guest_rounding, guest_rounding & 0b111);
1333       }
1334       EXPECT_EQ(std::fegetround(), host_rounding);
1335     }
1336   }
1337 }
1338 
1339 TEST_F(TESTSUITE, OpInstructions) {
1340   // Add
1341   TestOp(0x003100b3, {{19, 23, 42}});
1342   // Sub
1343   TestOp(0x403100b3, {{42, 23, 19}});
1344   // And
1345   TestOp(0x003170b3, {{0b0101, 0b0011, 0b0001}});
1346   // Or
1347   TestOp(0x003160b3, {{0b0101, 0b0011, 0b0111}});
1348   // Xor
1349   TestOp(0x003140b3, {{0b0101, 0b0011, 0b0110}});
1350   // Sll
1351   TestOp(0x003110b3, {{0b1010, 3, 0b0101'0000}});
1352   // Srl
1353   TestOp(0x003150b3, {{0xf000'0000'0000'0000ULL, 12, 0x000f'0000'0000'0000ULL}});
1354   // Sra
1355   TestOp(0x403150b3, {{0xf000'0000'0000'0000ULL, 12, 0xffff'0000'0000'0000ULL}});
1356   // Slt
1357   TestOp(0x003120b3,
1358          {
1359              {19, 23, 1},
1360              {23, 19, 0},
1361              {~0ULL, 0, 1},
1362          });
1363   // Sltu
1364   TestOp(0x003130b3,
1365          {
1366              {19, 23, 1},
1367              {23, 19, 0},
1368              {~0ULL, 0, 0},
1369          });
1370   // Mul
1371   TestOp(0x023100b3, {{0x9999'9999'9999'9999, 0x9999'9999'9999'9999, 0x0a3d'70a3'd70a'3d71}});
1372   // Mulh
1373   TestOp(0x23110b3, {{0x9999'9999'9999'9999, 0x9999'9999'9999'9999, 0x28f5'c28f'5c28'f5c3}});
1374   // Mulhsu
1375   TestOp(0x23120b3, {{0x9999'9999'9999'9999, 0x9999'9999'9999'9999, 0xc28f'5c28'f5c2'8f5c}});
1376   // Mulhu
1377   TestOp(0x23130b3, {{0x9999'9999'9999'9999, 0x9999'9999'9999'9999, 0x5c28'f5c2'8f5c'28f5}});
1378   // Div
1379   TestOp(0x23140b3, {{0x9999'9999'9999'9999, 0x3333, 0xfffd'fffd'fffd'fffe}});
1380   TestOp(0x23140b3, {{42, 2, 21}});
1381   TestOp(0x23140b3, {{42, 0, -1}});
1382   TestOp(0x23140b3, {{-2147483648, -1, 2147483648}});
1383   TestOp(0x23140b3, {{0x8000'0000'0000'0000, -1, 0x8000'0000'0000'0000}});
1384 
1385   // Divu
1386   TestOp(0x23150b3, {{0x9999'9999'9999'9999, 0x3333, 0x0003'0003'0003'0003}});
1387   TestOp(0x23150b3, {{42, 2, 21}});
1388   TestOp(0x23150b3, {{42, 0, 0xffff'ffff'ffff'ffffULL}});
1389   // Rem
1390   TestOp(0x23160b3, {{0x9999'9999'9999'9999, 0x3333, 0xffff'ffff'ffff'ffff}});
1391   TestOp(0x23160b3, {{0x9999'9999'9999'9999, 0, 0x9999'9999'9999'9999}});
1392   // Remu
1393   TestOp(0x23170b3, {{0x9999'9999'9999'9999, 0x3333, 0}});
1394   TestOp(0x23170b3, {{0x9999'9999'9999'9999, 0, 0x9999'9999'9999'9999}});
1395   // Andn
1396   TestOp(0x403170b3, {{0b0101, 0b0011, 0b0100}});
1397   // Orn
1398   TestOp(0x403160b3, {{0b0101, 0b0011, 0xffff'ffff'ffff'fffd}});
1399   // Xnor
1400   TestOp(0x403140b3, {{0b0101, 0b0011, 0xffff'ffff'ffff'fff9}});
1401   // Max
1402   TestOp(0x0a3160b3, {{bit_cast<uint64_t>(int64_t{-5}), 4, 4}});
1403   TestOp(0x0a3160b3,
1404          {{bit_cast<uint64_t>(int64_t{-5}),
1405            bit_cast<uint64_t>(int64_t{-10}),
1406            bit_cast<uint64_t>(int64_t{-5})}});
1407   // Maxu
1408   TestOp(0x0a3170b3, {{50, 1, 50}});
1409   // Min
1410   TestOp(0x0a3140b3, {{bit_cast<uint64_t>(int64_t{-5}), 4, bit_cast<uint64_t>(int64_t{-5})}});
1411   TestOp(0x0a3140b3,
1412          {{bit_cast<uint64_t>(int64_t{-5}),
1413            bit_cast<uint64_t>(int64_t{-10}),
1414            bit_cast<uint64_t>(int64_t{-10})}});
1415   // Minu
1416   TestOp(0x0a3150b3, {{50, 1, 1}});
1417 
1418   // Ror
1419   TestOp(0x603150b3, {{0xf000'0000'0000'000fULL, 4, 0xff00'0000'0000'0000ULL}});
1420   TestOp(0x603150b3, {{0xf000'0000'0000'000fULL, 8, 0x0ff0'0000'0000'0000ULL}});
1421   // Rol
1422   TestOp(0x603110b3, {{0xff00'0000'0000'0000ULL, 4, 0xf000'0000'0000'000fULL}});
1423   TestOp(0x603110b3, {{0x000f'ff00'0000'000fULL, 8, 0x0fff'0000'0000'0f00ULL}});
1424   // Sh1add
1425   TestOp(0x203120b3, {{0x0008'0000'0000'0001, 0x1001'0001'0000'0000ULL, 0x1011'0001'0000'0002ULL}});
1426   // Sh2add
1427   TestOp(0x203140b3, {{0x0008'0000'0000'0001, 0x0001'0001'0000'0000ULL, 0x0021'0001'0000'0004ULL}});
1428   // Sh3add
1429   TestOp(0x203160b3, {{0x0008'0000'0000'0001, 0x1001'0011'0000'0000ULL, 0x1041'0011'0000'0008ULL}});
1430   // Bclr
1431   TestOp(0x483110b3, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0000ULL}});
1432   TestOp(0x483110b3, {{0b1000'0001'0000'0001ULL, 8, 0b1000'0000'0000'0001ULL}});
1433   // Bext
1434   TestOp(0x483150b3, {{0b1000'0001'0000'0001ULL, 0, 0b0000'0000'0000'0001ULL}});
1435   TestOp(0x483150b3, {{0b1000'0001'0000'0001ULL, 8, 0b0000'0000'0000'0001ULL}});
1436   TestOp(0x483150b3, {{0b1000'0001'0000'0001ULL, 7, 0b0000'0000'0000'0000ULL}});
1437   // Binv
1438   TestOp(0x683110b3, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0000ULL}});
1439   TestOp(0x683110b3, {{0b1000'0001'0000'0001ULL, 1, 0b1000'0001'0000'0011ULL}});
1440   // Bset
1441   TestOp(0x283110b3, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0001ULL}});
1442   TestOp(0x283110b3, {{0b1000'0001'0000'0001ULL, 1, 0b1000'0001'0000'0011ULL}});
1443 }
1444 
1445 TEST_F(TESTSUITE, Op32Instructions) {
1446   // Addw
1447   TestOp(0x003100bb, {{19, 23, 42}, {0x8000'0000, 0, 0xffff'ffff'8000'0000}});
1448   // Add.uw
1449   TestOp(0x083100bb, {{19, 23, 42}, {0x8000'0000'8000'0000, 1, 0x0000'0000'8000'0001}});
1450   // Subw
1451   TestOp(0x403100bb, {{42, 23, 19}, {0x8000'0000, 0, 0xffff'ffff'8000'0000}});
1452   // Sllw
1453   TestOp(0x003110bb, {{0b1010, 3, 0b1010'000}});
1454   // Srlw
1455   TestOp(0x003150bb, {{0x0000'0000'f000'0000ULL, 12, 0x0000'0000'000f'0000ULL}});
1456   // Sraw
1457   TestOp(0x403150bb, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}});
1458   // Mulw
1459   TestOp(0x023100bb, {{0x9999'9999'9999'9999, 0x9999'9999'9999'9999, 0xffff'ffff'd70a'3d71}});
1460   // Divw
1461   TestOp(0x23140bb, {{0x9999'9999'9999'9999, 0x3333, 0xffff'ffff'fffd'fffe}});
1462   TestOp(0x23140bb, {{0x9999'9999'9999'9999, 0, -1}});
1463   TestOp(0x23140bb, {{-2147483648, -1, -2147483648}});
1464 
1465   // Divuw
1466   TestOp(0x23150bb,
1467          {{0x9999'9999'9999'9999, 0x3333, 0x0000'0000'0003'0003},
1468           {0xffff'ffff'8000'0000, 1, 0xffff'ffff'8000'0000}});
1469   // Remw
1470   TestOp(0x23160bb, {{0x9999'9999'9999'9999, 0x3333, 0xffff'ffff'ffff'ffff}});
1471   // Remuw
1472   TestOp(0x23170bb,
1473          {{0x9999'9999'9999'9999, 0x3333, 0},
1474           {0xffff'ffff'8000'0000, 0xffff'ffff'8000'0001, 0xffff'ffff'8000'0000}});
1475   // Zext.h
1476   TestOp(0x080140bb, {{0xffff'ffff'ffff'fffeULL, 0, 0xfffe}});
1477   // Zext.b
1478   TestOp(0x0ff17093, {{0xffff'ffff'ffff'fffeULL, 0, 0xfe}});
1479   // Zext.w
1480   TestOp(0x080100bb, {{0xffff'ffff'ffff'fffeULL, 0, 0xffff'fffe}});
1481   // Rorw
1482   TestOp(0x603150bb, {{0x0000'0000'f000'000fULL, 4, 0xffff'ffff'ff00'0000}});
1483   TestOp(0x603150bb, {{0x0000'0000'f000'0000ULL, 4, 0x0000'0000'0f00'0000}});
1484   TestOp(0x603150bb, {{0x0000'0000'0f00'000fULL, 4, 0xffff'ffff'f0f0'0000}});
1485   // Rolw
1486   TestOp(0x603110bb, {{0x0000'0000'f000'000fULL, 4, 0x0000'0000'0000'00ff}});
1487   TestOp(0x603110bb, {{0x0000'0000'0ff0'0000ULL, 4, 0xffff'ffff'ff00'0000}});
1488   // Sh1add.uw
1489   TestOp(0x203120bb, {{0xf0ff'0000'8000'0001, 0x8000'0000, 0x0000'0001'8000'0002}});
1490   // Sh2add.uw
1491   TestOp(0x203140bb, {{0xf0ff'00ff'8000'0001, 0x8000'0000, 0x0000'0002'8000'0004}});
1492   // Sh3add.uw
1493   TestOp(0x203160bb, {{0xf0ff'0f00'8000'0001, 0x8000'0000, 0x0000'0004'8000'0008}});
1494 }
1495 
1496 TEST_F(TESTSUITE, OpImmInstructions) {
1497   // Addi
1498   TestOpImm(0x00010093, {{19, 23, 42}});
1499   // Slti
1500   TestOpImm(0x00012093,
1501             {
1502                 {19, 23, 1},
1503                 {23, 19, 0},
1504                 {~0ULL, 0, 1},
1505             });
1506   // Sltiu
1507   TestOpImm(0x00013093,
1508             {
1509                 {19, 23, 1},
1510                 {23, 19, 0},
1511                 {~0ULL, 0, 0},
1512             });
1513   // Xori
1514   TestOpImm(0x00014093, {{0b0101, 0b0011, 0b0110}});
1515   // Ori
1516   TestOpImm(0x00016093, {{0b0101, 0b0011, 0b0111}});
1517   // Andi
1518   TestOpImm(0x00017093, {{0b0101, 0b0011, 0b0001}});
1519   // Slli
1520   TestOpImm(0x00011093, {{0b1010, 3, 0b1010'000}});
1521   // Srli
1522   TestOpImm(0x00015093, {{0xf000'0000'0000'0000ULL, 12, 0x000f'0000'0000'0000ULL}});
1523   // Srai
1524   TestOpImm(0x40015093, {{0xf000'0000'0000'0000ULL, 12, 0xffff'0000'0000'0000ULL}});
1525   // Rori
1526   TestOpImm(0x60015093, {{0xf000'0000'0000'000fULL, 4, 0xff00'0000'0000'0000ULL}});
1527   // Clz
1528   TestOpImm(0x60011093, {{0, 0, 64}});
1529   TestOpImm(0x60011093, {{123, 0, 57}});
1530   // Ctz
1531   TestOpImm(0x60111093, {{0, 0, 64}});
1532   TestOpImm(0x60111093, {{0x01000000'0000, 0, 40}});
1533   // Cpop
1534   TestOpImm(0x60211093, {{0xf000'0000'0000'000fULL, 0, 8}});
1535   // Rev8
1536   TestOpImm(0x6b815093, {{0x0000'0000'0000'000fULL, 0, 0x0f00'0000'0000'0000ULL}});
1537   TestOpImm(0x6b815093, {{0xf000'0000'0000'0000ULL, 0, 0x0000'0000'0000'00f0ULL}});
1538   TestOpImm(0x6b815093, {{0x00f0'0000'0000'0000ULL, 0, 0x0000'0000'0000'f000ULL}});
1539   TestOpImm(0x6b815093, {{0x0000'000f'0000'0000ULL, 0, 0x0000'0000'0f00'0000ULL}});
1540 
1541   // Sext.b
1542   TestOpImm(0x60411093, {{0b1111'1110, 0, 0xffff'ffff'ffff'fffe}});  // -2
1543   // Sext.h
1544   TestOpImm(0x60511093, {{0b1111'1110, 0, 0xfe}});
1545   TestOpImm(0x60511093, {{0b1111'1111'1111'1110, 0, 0xffff'ffff'ffff'fffe}});
1546   // Orc.b
1547   TestOpImm(0x28715093, {{0xfe00'f0ff'fa00'fffb, 0, 0xff00'ffff'ff00'ffff}});
1548   TestOpImm(0x28715093, {{0xfa00, 0, 0xff00}});
1549   // Bclri
1550   TestOpImm(0x48011093, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0000ULL}});
1551   TestOpImm(0x48011093, {{0b1000'0001'0000'0001ULL, 8, 0b1000'0000'0000'0001ULL}});
1552   // Bexti
1553   TestOpImm(0x48015093, {{0b1000'0001'0000'0001ULL, 0, 0b0000'0000'0000'0001ULL}});
1554   TestOpImm(0x48015093, {{0b1000'0001'0000'0001ULL, 8, 0b0000'0000'0000'0001ULL}});
1555   TestOpImm(0x48015093, {{0b1000'0001'0000'0001ULL, 7, 0b0000'0000'0000'0000ULL}});
1556   // Binvi
1557   TestOpImm(0x68011093, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0000ULL}});
1558   TestOpImm(0x68011093, {{0b1000'0001'0000'0001ULL, 1, 0b1000'0001'0000'0011ULL}});
1559   // Bseti
1560   TestOpImm(0x28011093, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0001ULL}});
1561   TestOpImm(0x28011093, {{0b1000'0001'0000'0001ULL, 1, 0b1000'0001'0000'0011ULL}});
1562 }
1563 
1564 TEST_F(TESTSUITE, OpImm32Instructions) {
1565   // Addiw
1566   TestOpImm(0x0001009b, {{19, 23, 42}, {0x8000'0000, 0, 0xffff'ffff'8000'0000}});
1567   // Slliw
1568   TestOpImm(0x0001109b, {{0b1010, 3, 0b1010'000}});
1569   // Srliw
1570   TestOpImm(0x0001509b, {{0x0000'0000'f000'0000ULL, 12, 0x0000'0000'000f'0000ULL}});
1571   // Sraiw
1572   TestOpImm(0x4001509b, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}});
1573   // Roriw
1574   TestOpImm(0x6001509b, {{0x0000'0000'f000'000fULL, 4, 0xffff'ffff'ff00'0000}});
1575   TestOpImm(0x6001509b, {{0x0000'0000'f000'0000ULL, 4, 0x0000'0000'0f00'0000}});
1576   TestOpImm(0x6001509b, {{0x0000'0000'0f00'000fULL, 4, 0xffff'ffff'f0f0'0000}});
1577   // Clzw
1578   TestOpImm(0x6001109b, {{0, 0, 32}});
1579   TestOpImm(0x6001109b, {{123, 0, 25}});
1580   // Ctzw
1581   TestOpImm(0x6011109b, {{0, 0, 32}});
1582   TestOpImm(0x6011109b, {{0x0000'0000'0000'0010, 0, 4}});
1583   // Cpopw
1584   TestOpImm(0x6021109b, {{0xf000'0000'0000'000f, 0, 4}});
1585   // Slli.uw
1586   TestOpImm(0x0801109b, {{0x0000'0000'f000'000fULL, 4, 0x0000'000f'0000'00f0}});
1587 }
1588 
1589 TEST_F(TESTSUITE, OpFpInstructions) {
1590   // FAdd.S
1591   TestOpFp(0x003100d3, {std::tuple{1.0f, 2.0f, 3.0f}});
1592   // FAdd.D
1593   TestOpFp(0x023100d3, {std::tuple{1.0, 2.0, 3.0}});
1594   // FSub.S
1595   TestOpFp(0x083100d3, {std::tuple{3.0f, 2.0f, 1.0f}});
1596   // FSub.D
1597   TestOpFp(0x0a3100d3, {std::tuple{3.0, 2.0, 1.0}});
1598   // FMul.S
1599   TestOpFp(0x103100d3, {std::tuple{3.0f, 2.0f, 6.0f}});
1600   // FMul.D
1601   TestOpFp(0x123100d3, {std::tuple{3.0, 2.0, 6.0}});
1602   // FDiv.S
1603   TestOpFp(0x183100d3, {std::tuple{6.0f, 2.0f, 3.0f}});
1604   // FDiv.D
1605   TestOpFp(0x1a3100d3, {std::tuple{6.0, 2.0, 3.0}});
1606   // FSgnj.S
1607   TestOpFp(0x203100d3,
1608            {std::tuple{1.0f, 2.0f, 1.0f},
1609             {-1.0f, 2.0f, 1.0f},
1610             {1.0f, -2.0f, -1.0f},
1611             {-1.0f, -2.0f, -1.0f}});
1612   // FSgnj.D
1613   TestOpFp(0x223100d3,
1614            {
1615                std::tuple{1.0, 2.0, 1.0},
1616                {-1.0, 2.0, 1.0},
1617                {1.0, -2.0, -1.0},
1618                {-1.0, -2.0, -1.0},
1619            });
1620   // FSgnjn.S
1621   TestOpFp(0x203110d3,
1622            {
1623                std::tuple{1.0f, 2.0f, -1.0f},
1624                {1.0f, 2.0f, -1.0f},
1625                {1.0f, -2.0f, 1.0f},
1626                {-1.0f, -2.0f, 1.0f},
1627            });
1628   // FSgnjn.D
1629   TestOpFp(0x223110d3,
1630            {
1631                std::tuple{1.0, 2.0, -1.0},
1632                {1.0, 2.0, -1.0},
1633                {1.0, -2.0, 1.0},
1634                {-1.0, -2.0, 1.0},
1635            });
1636   // FSgnjx.S
1637   TestOpFp(0x203120d3,
1638            {
1639                std::tuple{1.0f, 2.0f, 1.0f},
1640                {-1.0f, 2.0f, -1.0f},
1641                {1.0f, -2.0f, -1.0f},
1642                {-1.0f, -2.0f, 1.0f},
1643            });
1644   // FSgnjx.D
1645   TestOpFp(0x223120d3,
1646            {
1647                std::tuple{1.0, 2.0, 1.0},
1648                {-1.0, 2.0, -1.0},
1649                {1.0, -2.0, -1.0},
1650                {-1.0, -2.0, 1.0},
1651            });
1652   // FMin.S
1653   TestOpFp(0x283100d3,
1654            {std::tuple{+0.f, +0.f, +0.f},
1655             {+0.f, -0.f, -0.f},
1656             {-0.f, +0.f, -0.f},
1657             {-0.f, -0.f, -0.f},
1658             {+0.f, 1.f, +0.f},
1659             {-0.f, 1.f, -0.f}});
1660   // FMin.D
1661   TestOpFp(0x2a3100d3,
1662            {std::tuple{+0.0, +0.0, +0.0},
1663             {+0.0, -0.0, -0.0},
1664             {-0.0, +0.0, -0.0},
1665             {-0.0, -0.0, -0.0},
1666             {+0.0, 1.0, +0.0},
1667             {-0.0, 1.0, -0.0}});
1668   // FMax.S
1669   TestOpFp(0x283110d3,
1670            {std::tuple{+0.f, +0.f, +0.f},
1671             {+0.f, -0.f, +0.f},
1672             {-0.f, +0.f, +0.f},
1673             {-0.f, -0.f, -0.f},
1674             {+0.f, 1.f, 1.f},
1675             {-0.f, 1.f, 1.f}});
1676   // FMax.D
1677   TestOpFp(0x2a3110d3,
1678            {std::tuple{+0.0, +0.0, +0.0},
1679             {+0.0, -0.0, +0.0},
1680             {-0.0, +0.0, +0.0},
1681             {-0.0, -0.0, -0.0},
1682             {+0.0, 1.0, 1.0},
1683             {-0.0, 1.0, 1.0}});
1684 }
1685 
1686 TEST_F(TESTSUITE, UpperImmInstructions) {
1687   // Auipc
1688   TestAuipc(0xfedcb097, 0xffff'ffff'fedc'b000);
1689   // Lui
1690   TestLui(0xfedcb0b7, 0xffff'ffff'fedc'b000);
1691 }
1692 
1693 TEST_F(TESTSUITE, TestBranchInstructions) {
1694   // Beq
1695   TestBranch(0x00208463,
1696              {
1697                  {42, 42, 8},
1698                  {41, 42, 4},
1699                  {42, 41, 4},
1700              });
1701   // Bne
1702   TestBranch(0x00209463,
1703              {
1704                  {42, 42, 4},
1705                  {41, 42, 8},
1706                  {42, 41, 8},
1707              });
1708   // Bltu
1709   TestBranch(0x0020e463,
1710              {
1711                  {41, 42, 8},
1712                  {42, 42, 4},
1713                  {42, 41, 4},
1714                  {0xf000'0000'0000'0000ULL, 42, 4},
1715                  {42, 0xf000'0000'0000'0000ULL, 8},
1716              });
1717   // Bgeu
1718   TestBranch(0x0020f463,
1719              {
1720                  {42, 41, 8},
1721                  {42, 42, 8},
1722                  {41, 42, 4},
1723                  {0xf000'0000'0000'0000ULL, 42, 8},
1724                  {42, 0xf000'0000'0000'0000ULL, 4},
1725              });
1726   // Blt
1727   TestBranch(0x0020c463,
1728              {
1729                  {41, 42, 8},
1730                  {42, 42, 4},
1731                  {42, 41, 4},
1732                  {0xf000'0000'0000'0000ULL, 42, 8},
1733                  {42, 0xf000'0000'0000'0000ULL, 4},
1734              });
1735   // Bge
1736   TestBranch(0x0020d463,
1737              {
1738                  {42, 41, 8},
1739                  {42, 42, 8},
1740                  {41, 42, 4},
1741                  {0xf000'0000'0000'0000ULL, 42, 4},
1742                  {42, 0xf000'0000'0000'0000ULL, 8},
1743              });
1744   // Beq with negative offset.
1745   TestBranch(0xfe208ee3,
1746              {
1747                  {42, 42, -4},
1748              });
1749 }
1750 
1751 TEST_F(TESTSUITE, JumpAndLinkInstructions) {
1752   // Jal
1753   TestJumpAndLink(0x008000ef, 8);
1754   // Jal with negative offset.
1755   TestJumpAndLink(0xffdff0ef, -4);
1756 }
1757 
1758 TEST_F(TESTSUITE, JumpAndLinkWithReturnAddressRegisterAsTarget) {
1759   uint32_t insn_bytes{// jalr   ra
1760                       0x000080e7};
1761   auto code_start = ToGuestAddr(&insn_bytes);
1762   state_.cpu.insn_addr = code_start;
1763   // Translation cache requires upper bits to be zero.
1764   constexpr GuestAddr kJumpTargetAddr = 0x0000'f00d'cafe'b0baULL;
1765   SetXReg<RA>(state_.cpu, kJumpTargetAddr);
1766 
1767   EXPECT_TRUE(RunOneInstruction(&state_, kJumpTargetAddr));
1768   EXPECT_EQ(state_.cpu.insn_addr, kJumpTargetAddr);
1769   EXPECT_EQ(GetXReg<RA>(state_.cpu), code_start + 4);
1770 }
1771 
1772 TEST_F(TESTSUITE, JumpAndLinkRegisterInstructions) {
1773   // Jalr offset=4.
1774   TestJumpAndLinkRegister<4>(0x004100e7, 38, 42);
1775   // Jalr offset=-4.
1776   TestJumpAndLinkRegister<4>(0xffc100e7, 42, 38);
1777   // Jalr offset=5 - must properly align the target to even.
1778   TestJumpAndLinkRegister<4>(0x005100e7, 38, 42);
1779   // Jr offset=4.
1780   TestJumpAndLinkRegister<0>(0x00410067, 38, 42);
1781   // Jr offset=-4.
1782   TestJumpAndLinkRegister<0>(0xffc10067, 42, 38);
1783   // Jr offset=5 - must properly align the target to even.
1784   TestJumpAndLinkRegister<0>(0x00510067, 38, 42);
1785 }
1786 
1787 TEST_F(TESTSUITE, LoadInstructions) {
1788   // Offset is always 8.
1789   // Lbu
1790   TestLoad(0x00814083, kDataToLoad & 0xffULL);
1791   // Lhu
1792   TestLoad(0x00815083, kDataToLoad & 0xffffULL);
1793   // Lwu
1794   TestLoad(0x00816083, kDataToLoad & 0xffff'ffffULL);
1795   // Ldu
1796   TestLoad(0x00813083, kDataToLoad);
1797   // Lb
1798   TestLoad(0x00810083, int64_t{int8_t(kDataToLoad)});
1799   // Lh
1800   TestLoad(0x00811083, int64_t{int16_t(kDataToLoad)});
1801   // Lw
1802   TestLoad(0x00812083, int64_t{int32_t(kDataToLoad)});
1803 }
1804 
1805 TEST_F(TESTSUITE, StoreInstructions) {
1806   // Offset is always 8.
1807   // Sb
1808   TestStore(0x00208423, kDataToStore & 0xffULL);
1809   // Sh
1810   TestStore(0x00209423, kDataToStore & 0xffffULL);
1811   // Sw
1812   TestStore(0x0020a423, kDataToStore & 0xffff'ffffULL);
1813   // Sd
1814   TestStore(0x0020b423, kDataToStore);
1815 }
1816 
1817 TEST_F(TESTSUITE, FmaInstructions) {
1818   // Fmadd.S
1819   TestFma(0x203170c3, {std::tuple{1.0f, 2.0f, 3.0f, 5.0f}});
1820   // Fmadd.D
1821   TestFma(0x223170c3, {std::tuple{1.0, 2.0, 3.0, 5.0}});
1822   // Fmsub.S
1823   TestFma(0x203170c7, {std::tuple{1.0f, 2.0f, 3.0f, -1.0f}});
1824   // Fmsub.D
1825   TestFma(0x223170c7, {std::tuple{1.0, 2.0, 3.0, -1.0}});
1826   // Fnmsub.S
1827   TestFma(0x203170cb, {std::tuple{1.0f, 2.0f, 3.0f, 1.0f}});
1828   // Fnmsub.D
1829   TestFma(0x223170cb, {std::tuple{1.0, 2.0, 3.0, 1.0}});
1830   // Fnmadd.S
1831   TestFma(0x203170cf, {std::tuple{1.0f, 2.0f, 3.0f, -5.0f}});
1832   // Fnmadd.D
1833   TestFma(0x223170cf, {std::tuple{1.0, 2.0, 3.0, -5.0}});
1834 }
1835 
1836 #if (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
1837 
1838 TEST_F(TESTSUITE, AtomicLoadInstructions) {
1839   // Validate sign-extension of returned value.
1840   const uint64_t kNegative32BitValue = 0x0000'0000'8000'0000ULL;
1841   const uint64_t kSignExtendedNegative = 0xffff'ffff'8000'0000ULL;
1842   const uint64_t kPositive32BitValue = 0xffff'ffff'0000'0000ULL;
1843   const uint64_t kSignExtendedPositive = 0ULL;
1844   static_assert(static_cast<int32_t>(kSignExtendedPositive) >= 0);
1845   static_assert(static_cast<int32_t>(kSignExtendedNegative) < 0);
1846 
1847   // Lrw - sign extends from 32 to 64.
1848   TestAtomicLoad(0x1000a12f, &kPositive32BitValue, kSignExtendedPositive);
1849   TestAtomicLoad(0x1000a12f, &kNegative32BitValue, kSignExtendedNegative);
1850 
1851   // Lrd
1852   TestAtomicLoad(0x1000b12f, &kDataToLoad, kDataToLoad);
1853 }
1854 
1855 TEST_F(TESTSUITE, AtomicStoreInstructions) {
1856   // Scw
1857   TestAtomicStore(0x1820a1af, static_cast<uint32_t>(kDataToStore));
1858 
1859   // Scd
1860   TestAtomicStore(0x1820b1af, kDataToStore);
1861 }
1862 
1863 TEST_F(TESTSUITE, AtomicStoreInstructionNoLoadFailure) {
1864   // Scw
1865   TestAtomicStoreNoLoadFailure(0x1820a1af);
1866 
1867   // Scd
1868   TestAtomicStoreNoLoadFailure(0x1820b1af);
1869 }
1870 
1871 TEST_F(TESTSUITE, AtomicStoreInstructionDifferentLoadFailure) {
1872   // Scw
1873   TestAtomicStoreDifferentLoadFailure(0x1820a1af);
1874 
1875   // Scd
1876   TestAtomicStoreDifferentLoadFailure(0x1820b1af);
1877 }
1878 
1879 #endif  // (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
1880 
1881 TEST_F(TESTSUITE, AmoInstructions) {
1882   // Verifying that all aq and rl combinations work for Amoswap, but only test relaxed one for most
1883   // other instructions for brevity.
1884 
1885   // AmoswaoW/AmoswaoD
1886   TestAmo(0x083120af, 0x083130af, 0xaaaa'bbbb'cccc'ddddULL);
1887 
1888   // AmoswapWAq/AmoswapDAq
1889   TestAmo(0x0c3120af, 0x0c3130af, 0xaaaa'bbbb'cccc'ddddULL);
1890 
1891   // AmoswapWRl/AmoswapDRl
1892   TestAmo(0x0a3120af, 0x0a3130af, 0xaaaa'bbbb'cccc'ddddULL);
1893 
1894   // AmoswapWAqrl/AmoswapDAqrl
1895   TestAmo(0x0e3120af, 0x0e3130af, 0xaaaa'bbbb'cccc'ddddULL);
1896 
1897   // AmoaddW/AmoaddD
1898   TestAmo(0x003120af, 0x003130af, 0xaaaa'aaaa'aaaa'aaa9);
1899 
1900   // AmoxorW/AmoxorD
1901   TestAmo(0x203120af, 0x203130af, 0x5555'5555'1111'1111);
1902 
1903   // AmoandW/AmoandD
1904   TestAmo(0x603120af, 0x603130af, 0xaaaa'aaaa'cccc'cccc);
1905 
1906   // AmoorW/AmoorD
1907   TestAmo(0x403120af, 0x403130af, 0xffff'ffff'dddd'dddd);
1908 
1909   // AmominW/AmominD
1910   TestAmo(0x803120af, 0x803130af, 0xaaaa'bbbb'cccc'ddddULL);
1911 
1912   // AmomaxW/AmomaxD
1913   TestAmo(0xa03120af, 0xa03130af, 0xffff'eeee'dddd'ccccULL);
1914 
1915   // AmominuW/AmominuD
1916   TestAmo(0xc03120af, 0xc03130af, 0xaaaa'bbbb'cccc'ddddULL);
1917 
1918   // AmomaxuW/AmomaxuD
1919   TestAmo(0xe03120af, 0xe03130af, 0xffff'eeee'dddd'ccccULL);
1920 }
1921 
1922 TEST_F(TESTSUITE, OpFpSingleInputInstructions) {
1923   // FSqrt.S
1924   TestOpFpSingleInput(0x580170d3, {std::tuple{4.0f, 2.0f}});
1925   // FSqrt.D
1926   TestOpFpSingleInput(0x5a0170d3, {std::tuple{16.0, 4.0}});
1927 }
1928 
1929 TEST_F(TESTSUITE, Fmv) {
1930   // Fmv.X.W
1931   TestFmvFloatToInteger(0xe00080d3,
1932                         {std::tuple{1.0f, static_cast<uint64_t>(bit_cast<uint32_t>(1.0f))},
1933                          {-1.0f, static_cast<int64_t>(bit_cast<int32_t>(-1.0f))}});
1934   // Fmv.W.X
1935   TestFmvIntegerToFloat(
1936       0xf00080d3, {std::tuple{bit_cast<uint32_t>(1.0f), 1.0f}, {bit_cast<uint32_t>(-1.0f), -1.0f}});
1937   // Fmv.X.D
1938   TestFmvFloatToInteger(
1939       0xe20080d3, {std::tuple{1.0, bit_cast<uint64_t>(1.0)}, {-1.0, bit_cast<uint64_t>(-1.0)}});
1940   // Fmv.D.X
1941   TestFmvIntegerToFloat(
1942       0xf20080d3, {std::tuple{bit_cast<uint64_t>(1.0), 1.0}, {bit_cast<uint64_t>(-1.0), -1.0}});
1943   // Fmv.S
1944   TestOpFpSingleInput(0x202100d3, {std::tuple{1.0f, 1.0f}, {-1.0f, -1.0f}});
1945   // Fmv.D
1946   TestOpFpSingleInput(0x222100d3,
1947                       {std::tuple{bit_cast<uint64_t>(1.0), 1.0}, {bit_cast<uint64_t>(-1.0), -1.0}});
1948 }
1949 
1950 const uint32_t kPosNanFloat = kFPValueToFPReg(std::numeric_limits<float>::quiet_NaN());
1951 const uint32_t kNegNanFloat = kFPValueToFPReg(-std::numeric_limits<float>::quiet_NaN());
1952 const uint64_t kPosNanDouble = kFPValueToFPReg(std::numeric_limits<double>::quiet_NaN());
1953 const uint64_t kNegNanDouble = kFPValueToFPReg(-std::numeric_limits<double>::quiet_NaN());
1954 constexpr uint64_t kMaskFloatBits = (uint64_t{1} << 32) - 1;
1955 
1956 TEST_F(TESTSUITE, FabsSinglePrecisionNanPosToPos) {
1957   SetFReg<2>(state_.cpu, kPosNanFloat);
1958   RunInstruction(0x202120d3);  // fabs.s f1, f2
1959   EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kPosNanFloat);
1960 }
1961 
1962 TEST_F(TESTSUITE, FabsSinglePrecisionNanNegToPos) {
1963   SetFReg<2>(state_.cpu, kNegNanFloat);
1964   RunInstruction(0x202120d3);  // fabs.s f1, f2
1965   EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kPosNanFloat);
1966 }
1967 
1968 TEST_F(TESTSUITE, FabsDoublePrecisionNanPosToPos) {
1969   SetFReg<2>(state_.cpu, kPosNanDouble);
1970   RunInstruction(0x222120d3);  // fabs.d f1, f2
1971   EXPECT_EQ(GetFReg<1>(state_.cpu), kPosNanDouble);
1972 }
1973 
1974 TEST_F(TESTSUITE, FabsDoublePrecisionNanNegToPos) {
1975   SetFReg<2>(state_.cpu, kNegNanDouble);
1976   RunInstruction(0x222120d3);  // fabs.d f1, f2
1977   EXPECT_EQ(GetFReg<1>(state_.cpu), kPosNanDouble);
1978 }
1979 
1980 TEST_F(TESTSUITE, FnegSinglePrecisionNanPosToNeg) {
1981   SetFReg<2>(state_.cpu, kPosNanFloat);
1982   RunInstruction(0x202110d3);  // fneg.s f1, f2
1983   EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kNegNanFloat);
1984 }
1985 
1986 TEST_F(TESTSUITE, FnegSinglePrecisionNanNegToPos) {
1987   SetFReg<2>(state_.cpu, kNegNanFloat);
1988   RunInstruction(0x202110d3);  // fneg.s f1, f2
1989   EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kPosNanFloat);
1990 }
1991 
1992 TEST_F(TESTSUITE, FnegDoublePrecisionNanPosToNeg) {
1993   SetFReg<2>(state_.cpu, kPosNanDouble);
1994   RunInstruction(0x222110d3);  // fneg.s f1, f2
1995   EXPECT_EQ(GetFReg<1>(state_.cpu), kNegNanDouble);
1996 }
1997 
1998 TEST_F(TESTSUITE, FnegDoublePrecisionNanNegToPos) {
1999   SetFReg<2>(state_.cpu, kNegNanDouble);
2000   RunInstruction(0x222110d3);  // fneg.s f1, f2
2001   EXPECT_EQ(GetFReg<1>(state_.cpu), kPosNanDouble);
2002 }
2003 
2004 TEST_F(TESTSUITE, OpFpFcvt) {
2005   // Fcvt.S.D
2006   TestOpFpSingleInput(0x401170d3, {std::tuple{1.0, 1.0f}});
2007   // Fcvt.D.S
2008   TestOpFpSingleInput(0x420100d3, {std::tuple{2.0f, 2.0}});
2009   // Fcvt.W.S
2010   TestOpFpGpRegisterTargetSingleInput(0xc00170d3, {std::tuple{3.0f, 3UL}});
2011   // Fcvt.WU.S
2012   TestOpFpGpRegisterTargetSingleInput(0xc01170d3, {std::tuple{3.0f, 3UL}});
2013   // Fcvt.L.S
2014   TestOpFpGpRegisterTargetSingleInput(0xc02170d3, {std::tuple{3.0f, 3UL}});
2015   // Fcvt.LU.S
2016   TestOpFpGpRegisterTargetSingleInput(0xc03170d3, {std::tuple{3.0f, 3UL}});
2017   // Fcvt.W.D
2018   TestOpFpGpRegisterTargetSingleInput(0xc20170d3, {std::tuple{3.0, 3UL}});
2019   // Fcvt.WU.D
2020   TestOpFpGpRegisterTargetSingleInput(0xc21170d3, {std::tuple{3.0, 3UL}});
2021   // Fcvt.L.D
2022   TestOpFpGpRegisterTargetSingleInput(0xc22170d3, {std::tuple{3.0, 3UL}});
2023   // Fcvt.LU.D
2024   TestOpFpGpRegisterTargetSingleInput(0xc23170d3, {std::tuple{3.0, 3UL}});
2025   // Fcvt.S.W
2026   TestOpFpGpRegisterSourceSingleInput(0xd00170d3, {std::tuple{3UL, 3.0f}});
2027   // Fcvt.S.WU
2028   TestOpFpGpRegisterSourceSingleInput(0xd01170d3, {std::tuple{3UL, 3.0f}});
2029   // Fcvt.S.L
2030   TestOpFpGpRegisterSourceSingleInput(0xd02170d3, {std::tuple{3UL, 3.0f}});
2031   // Fcvt.S.LU
2032   TestOpFpGpRegisterSourceSingleInput(0xd03170d3, {std::tuple{3UL, 3.0f}});
2033   // Fcvt.D.W
2034   TestOpFpGpRegisterSourceSingleInput(0xd20170d3, {std::tuple{3UL, 3.0}});
2035   // Fcvt.D.Wu
2036   TestOpFpGpRegisterSourceSingleInput(0xd21170d3, {std::tuple{3UL, 3.0}});
2037   // Fcvt.D.L
2038   TestOpFpGpRegisterSourceSingleInput(0xd22170d3, {std::tuple{3UL, 3.0}});
2039   // Fcvt.D.LU
2040   TestOpFpGpRegisterSourceSingleInput(0xd23170d3, {std::tuple{3UL, 3.0}});
2041 }
2042 
2043 TEST_F(TESTSUITE, OpFpGpRegisterTargetInstructions) {
2044   // Fle.S
2045   TestOpFpGpRegisterTarget(0xa03100d3,
2046                            {std::tuple{1.0f, 2.0f, 1UL}, {2.0f, 1.0f, 0UL}, {0.0f, 0.0f, 1UL}});
2047   // Fle.D
2048   TestOpFpGpRegisterTarget(0xa23100d3,
2049                            {std::tuple{1.0, 2.0, 1UL}, {2.0, 1.0, 0UL}, {0.0, 0.0, 1UL}});
2050   // Flt.S
2051   TestOpFpGpRegisterTarget(0xa03110d3,
2052                            {std::tuple{1.0f, 2.0f, 1UL}, {2.0f, 1.0f, 0UL}, {0.0f, 0.0f, 0UL}});
2053   // Flt.D
2054   TestOpFpGpRegisterTarget(0xa23110d3,
2055                            {std::tuple{1.0, 2.0, 1UL}, {2.0, 1.0, 0UL}, {0.0, 0.0, 0UL}});
2056   // Feq.S
2057   TestOpFpGpRegisterTarget(0xa03120d3,
2058                            {std::tuple{1.0f, 2.0f, 0UL}, {2.0f, 1.0f, 0UL}, {0.0f, 0.0f, 1UL}});
2059   // Feq.D
2060   TestOpFpGpRegisterTarget(0xa23120d3,
2061                            {std::tuple{1.0, 2.0, 0UL}, {2.0, 1.0, 0UL}, {0.0, 0.0, 1UL}});
2062 }
2063 
2064 TEST_F(TESTSUITE, TestOpFpGpRegisterTargetSingleInput) {
2065   // Fclass.S
2066   TestOpFpGpRegisterTargetSingleInput(
2067       0xe00110d3,
2068       {std::tuple{-std::numeric_limits<float>::infinity(), 0b00'0000'0001UL},
2069        {-1.0f, 0b00'0000'0010UL},
2070        {-std::numeric_limits<float>::denorm_min(), 0b00'0000'0100UL},
2071        {-0.0f, 0b00'0000'1000UL},
2072        {0.0f, 0b00'0001'0000UL},
2073        {std::numeric_limits<float>::denorm_min(), 0b00'0010'0000UL},
2074        {1.0f, 0b00'0100'0000UL},
2075        {std::numeric_limits<float>::infinity(), 0b00'1000'0000UL},
2076        {std::numeric_limits<float>::signaling_NaN(), 0b01'0000'0000UL},
2077        {std::numeric_limits<float>::quiet_NaN(), 0b10'0000'0000UL}});
2078   // Fclass.D
2079   TestOpFpGpRegisterTargetSingleInput(
2080       0xe20110d3,
2081       {std::tuple{-std::numeric_limits<double>::infinity(), 0b00'0000'0001UL},
2082        {-1.0, 0b00'0000'0010UL},
2083        {-std::numeric_limits<double>::denorm_min(), 0b00'0000'0100UL},
2084        {-0.0, 0b00'0000'1000UL},
2085        {0.0, 0b00'0001'0000UL},
2086        {std::numeric_limits<double>::denorm_min(), 0b00'0010'0000UL},
2087        {1.0, 0b00'0100'0000UL},
2088        {std::numeric_limits<double>::infinity(), 0b00'1000'0000UL},
2089        {std::numeric_limits<double>::signaling_NaN(), 0b01'0000'0000UL},
2090        {std::numeric_limits<double>::quiet_NaN(), 0b10'0000'0000UL}});
2091 }
2092 
2093 TEST_F(TESTSUITE, RoundingModeTest) {
2094   // FAdd.S
2095   TestOpFp(0x003100d3,
2096            // Test RNE
2097            {std::tuple{1.0000001f, 0.000000059604645f, 1.0000002f},
2098             {1.0000002f, 0.000000059604645f, 1.0000002f},
2099             {1.0000004f, 0.000000059604645f, 1.0000005f},
2100             {-1.0000001f, -0.000000059604645f, -1.0000002f},
2101             {-1.0000002f, -0.000000059604645f, -1.0000002f},
2102             {-1.0000004f, -0.000000059604645f, -1.0000005f}});
2103   // FAdd.S
2104   TestOpFp(0x003110d3,
2105            // Test RTZ
2106            {std::tuple{1.0000001f, 0.000000059604645f, 1.0000001f},
2107             {1.0000002f, 0.000000059604645f, 1.0000002f},
2108             {1.0000004f, 0.000000059604645f, 1.0000004f},
2109             {-1.0000001f, -0.000000059604645f, -1.0000001f},
2110             {-1.0000002f, -0.000000059604645f, -1.0000002f},
2111             {-1.0000004f, -0.000000059604645f, -1.0000004f}});
2112   // FAdd.S
2113   TestOpFp(0x003120d3,
2114            // Test RDN
2115            {std::tuple{1.0000001f, 0.000000059604645f, 1.0000001f},
2116             {1.0000002f, 0.000000059604645f, 1.0000002f},
2117             {1.0000004f, 0.000000059604645f, 1.0000004f},
2118             {-1.0000001f, -0.000000059604645f, -1.0000002f},
2119             {-1.0000002f, -0.000000059604645f, -1.0000004f},
2120             {-1.0000004f, -0.000000059604645f, -1.0000005f}});
2121   // FAdd.S
2122   TestOpFp(0x003130d3,
2123            // Test RUP
2124            {std::tuple{1.0000001f, 0.000000059604645f, 1.0000002f},
2125             {1.0000002f, 0.000000059604645f, 1.0000004f},
2126             {1.0000004f, 0.000000059604645f, 1.0000005f},
2127             {-1.0000001f, -0.000000059604645f, -1.0000001f},
2128             {-1.0000002f, -0.000000059604645f, -1.0000002f},
2129             {-1.0000004f, -0.000000059604645f, -1.0000004f}});
2130   // FAdd.S
2131   TestOpFp(0x003140d3,
2132            // Test RMM
2133            {std::tuple{1.0000001f, 0.000000059604645f, 1.0000002f},
2134             {1.0000002f, 0.000000059604645f, 1.0000004f},
2135             {1.0000004f, 0.000000059604645f, 1.0000005f},
2136             {-1.0000001f, -0.000000059604645f, -1.0000002f},
2137             {-1.0000002f, -0.000000059604645f, -1.0000004f},
2138             {-1.0000004f, -0.000000059604645f, -1.0000005f}});
2139 
2140   // FAdd.D
2141   TestOpFp(0x023100d3,
2142            // Test RNE
2143            {std::tuple{1.0000000000000002, 0.00000000000000011102230246251565, 1.0000000000000004},
2144             {1.0000000000000004, 0.00000000000000011102230246251565, 1.0000000000000004},
2145             {1.0000000000000007, 0.00000000000000011102230246251565, 1.0000000000000009},
2146             {-1.0000000000000002, -0.00000000000000011102230246251565, -1.0000000000000004},
2147             {-1.0000000000000004, -0.00000000000000011102230246251565, -1.0000000000000004},
2148             {-1.0000000000000007, -0.00000000000000011102230246251565, -1.0000000000000009}});
2149   // FAdd.D
2150   TestOpFp(0x023110d3,
2151            // Test RTZ
2152            {std::tuple{1.0000000000000002, 0.00000000000000011102230246251565, 1.0000000000000002},
2153             {1.0000000000000004, 0.00000000000000011102230246251565, 1.0000000000000004},
2154             {1.0000000000000007, 0.00000000000000011102230246251565, 1.0000000000000007},
2155             {-1.0000000000000002, -0.00000000000000011102230246251565, -1.0000000000000002},
2156             {-1.0000000000000004, -0.00000000000000011102230246251565, -1.0000000000000004},
2157             {-1.0000000000000007, -0.00000000000000011102230246251565, -1.0000000000000007}});
2158   // FAdd.D
2159   TestOpFp(0x023120d3,
2160            // Test RDN
2161            {std::tuple{1.0000000000000002, 0.00000000000000011102230246251565, 1.0000000000000002},
2162             {1.0000000000000004, 0.00000000000000011102230246251565, 1.0000000000000004},
2163             {1.0000000000000007, 0.00000000000000011102230246251565, 1.0000000000000007},
2164             {-1.0000000000000002, -0.00000000000000011102230246251565, -1.0000000000000004},
2165             {-1.0000000000000004, -0.00000000000000011102230246251565, -1.0000000000000007},
2166             {-1.0000000000000007, -0.00000000000000011102230246251565, -1.0000000000000009}});
2167   // FAdd.D
2168   TestOpFp(0x023130d3,
2169            // Test RUP
2170            {std::tuple{1.0000000000000002, 0.00000000000000011102230246251565, 1.0000000000000004},
2171             {1.0000000000000004, 0.00000000000000011102230246251565, 1.0000000000000007},
2172             {1.0000000000000007, 0.00000000000000011102230246251565, 1.0000000000000009},
2173             {-1.0000000000000002, -0.00000000000000011102230246251565, -1.0000000000000002},
2174             {-1.0000000000000004, -0.00000000000000011102230246251565, -1.0000000000000004},
2175             {-1.0000000000000007, -0.00000000000000011102230246251565, -1.0000000000000007}});
2176   // FAdd.D
2177   TestOpFp(0x023140d3,
2178            // Test RMM
2179            {std::tuple{1.0000000000000002, 0.00000000000000011102230246251565, 1.0000000000000004},
2180             {1.0000000000000004, 0.00000000000000011102230246251565, 1.0000000000000007},
2181             {1.0000000000000007, 0.00000000000000011102230246251565, 1.0000000000000009},
2182             {-1.0000000000000002, -0.00000000000000011102230246251565, -1.0000000000000004},
2183             {-1.0000000000000004, -0.00000000000000011102230246251565, -1.0000000000000007},
2184             {-1.0000000000000007, -0.00000000000000011102230246251565, -1.0000000000000009}});
2185 }
2186 
2187 TEST_F(TESTSUITE, LoadFpInstructions) {
2188   // Offset is always 8.
2189   TestLoadFp(0x00812087, kDataToLoad | 0xffffffff00000000ULL);
2190   TestLoadFp(0x00813087, kDataToLoad);
2191 }
2192 
2193 TEST_F(TESTSUITE, StoreFpInstructions) {
2194   // Offset is always 8.
2195   // Fsw
2196   TestStoreFp(0x0020a427, kDataToStore & 0xffff'ffffULL);
2197   // Fsd
2198   TestStoreFp(0x0020b427, kDataToStore);
2199 }
2200 
2201 #if defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
2202 
2203 TEST_F(TESTSUITE, TestVsetvl) {
2204   constexpr uint64_t kVill =
2205       0b1'0000000'00000000'00000000'00000000'00000000'00000000'00000000'00000000;
2206   // Vsetvl, rs1 != x0
2207   TestVsetvl(0x803170d7,
2208              {
2209                  // Valid combinations.
2210                  {~0ULL, ~0ULL, ~0ULL, 005, 2, 005},
2211                  {~0ULL, ~0ULL, ~0ULL, 006, 4, 006},
2212                  {~0ULL, ~0ULL, ~0ULL, 007, 8, 007},
2213                  {~0ULL, ~0ULL, ~0ULL, 000, 16, 000},
2214                  {~0ULL, ~0ULL, ~0ULL, 001, 32, 001},
2215                  {~0ULL, ~0ULL, ~0ULL, 002, 64, 002},
2216                  {~0ULL, ~0ULL, ~0ULL, 003, 128, 003},
2217                  {~0ULL, ~0ULL, ~0ULL, 015, 1, 015},
2218                  {~0ULL, ~0ULL, ~0ULL, 016, 2, 016},
2219                  {~0ULL, ~0ULL, ~0ULL, 017, 4, 017},
2220                  {~0ULL, ~0ULL, ~0ULL, 010, 8, 010},
2221                  {~0ULL, ~0ULL, ~0ULL, 011, 16, 011},
2222                  {~0ULL, ~0ULL, ~0ULL, 012, 32, 012},
2223                  {~0ULL, ~0ULL, ~0ULL, 013, 64, 013},
2224                  {~0ULL, ~0ULL, ~0ULL, 026, 1, 026},
2225                  {~0ULL, ~0ULL, ~0ULL, 027, 2, 027},
2226                  {~0ULL, ~0ULL, ~0ULL, 020, 4, 020},
2227                  {~0ULL, ~0ULL, ~0ULL, 021, 8, 021},
2228                  {~0ULL, ~0ULL, ~0ULL, 022, 16, 022},
2229                  {~0ULL, ~0ULL, ~0ULL, 023, 32, 023},
2230                  {~0ULL, ~0ULL, ~0ULL, 037, 1, 037},
2231                  {~0ULL, ~0ULL, ~0ULL, 030, 2, 030},
2232                  {~0ULL, ~0ULL, ~0ULL, 031, 4, 031},
2233                  {~0ULL, ~0ULL, ~0ULL, 032, 8, 032},
2234                  {~0ULL, ~0ULL, ~0ULL, 033, 16, 033},
2235                  // Invalid combinations.
2236                  {~0ULL, ~0ULL, ~0ULL, 004, 0, kVill},
2237                  {~0ULL, ~0ULL, ~0ULL, 014, 0, kVill},
2238                  {~0ULL, ~0ULL, ~0ULL, 024, 0, kVill},
2239                  {~0ULL, ~0ULL, ~0ULL, 025, 0, kVill},
2240                  {~0ULL, ~0ULL, ~0ULL, 034, 0, kVill},
2241                  {~0ULL, ~0ULL, ~0ULL, 035, 0, kVill},
2242                  {~0ULL, ~0ULL, ~0ULL, 036, 0, kVill},
2243                  // Invalid sizes.
2244                  {~0ULL, ~0ULL, ~0ULL, 040, 0, kVill},
2245                  {~0ULL, ~0ULL, ~0ULL, 041, 0, kVill},
2246                  {~0ULL, ~0ULL, ~0ULL, 042, 0, kVill},
2247                  {~0ULL, ~0ULL, ~0ULL, 043, 0, kVill},
2248                  {~0ULL, ~0ULL, ~0ULL, 044, 0, kVill},
2249                  {~0ULL, ~0ULL, ~0ULL, 045, 0, kVill},
2250                  {~0ULL, ~0ULL, ~0ULL, 046, 0, kVill},
2251                  {~0ULL, ~0ULL, ~0ULL, 047, 0, kVill},
2252                  {~0ULL, ~0ULL, ~0ULL, 050, 0, kVill},
2253                  {~0ULL, ~0ULL, ~0ULL, 051, 0, kVill},
2254                  {~0ULL, ~0ULL, ~0ULL, 052, 0, kVill},
2255                  {~0ULL, ~0ULL, ~0ULL, 053, 0, kVill},
2256                  {~0ULL, ~0ULL, ~0ULL, 054, 0, kVill},
2257                  {~0ULL, ~0ULL, ~0ULL, 055, 0, kVill},
2258                  {~0ULL, ~0ULL, ~0ULL, 056, 0, kVill},
2259                  {~0ULL, ~0ULL, ~0ULL, 057, 0, kVill},
2260                  {~0ULL, ~0ULL, ~0ULL, 060, 0, kVill},
2261                  {~0ULL, ~0ULL, ~0ULL, 061, 0, kVill},
2262                  {~0ULL, ~0ULL, ~0ULL, 062, 0, kVill},
2263                  {~0ULL, ~0ULL, ~0ULL, 063, 0, kVill},
2264                  {~0ULL, ~0ULL, ~0ULL, 064, 0, kVill},
2265                  {~0ULL, ~0ULL, ~0ULL, 065, 0, kVill},
2266                  {~0ULL, ~0ULL, ~0ULL, 066, 0, kVill},
2267                  {~0ULL, ~0ULL, ~0ULL, 067, 0, kVill},
2268                  {~0ULL, ~0ULL, ~0ULL, 070, 0, kVill},
2269                  {~0ULL, ~0ULL, ~0ULL, 071, 0, kVill},
2270                  {~0ULL, ~0ULL, ~0ULL, 072, 0, kVill},
2271                  {~0ULL, ~0ULL, ~0ULL, 073, 0, kVill},
2272                  {~0ULL, ~0ULL, ~0ULL, 074, 0, kVill},
2273                  {~0ULL, ~0ULL, ~0ULL, 075, 0, kVill},
2274                  {~0ULL, ~0ULL, ~0ULL, 076, 0, kVill},
2275                  {~0ULL, ~0ULL, ~0ULL, 077, 0, kVill},
2276                  // Vma/vta bits.
2277                  {~0ULL, ~0ULL, ~0ULL, 0100, 16, 0100},
2278                  {~0ULL, ~0ULL, ~0ULL, 0200, 16, 0200},
2279                  {~0ULL, ~0ULL, ~0ULL, 0300, 16, 0300},
2280                  // Extra bits ignored as permitted by RISC-V specification.
2281                  {~0ULL, ~0ULL, ~0ULL, 0400, 16, 0000},
2282                  {~0ULL, ~0ULL, ~0ULL, 0500, 16, 0100},
2283                  {~0ULL, ~0ULL, ~0ULL, 0600, 16, 0200},
2284                  {~0ULL, ~0ULL, ~0ULL, 0700, 16, 0300},
2285                  // Avl handling.
2286                  {~0ULL, ~0ULL, 67, 003, 67, 003},
2287                  {~0ULL, ~0ULL, 151, 003, 76, 003},
2288                  {~0ULL, ~0ULL, 256, 003, 128, 003},
2289                  {~0ULL, ~0ULL, 257, 003, 128, 003},
2290              });
2291   // vsetvl rs1 == x0, rd != x0
2292   TestVsetvl(0x803070d7, {{~0ULL, ~0ULL, 42, 000, 16, 000}});
2293   // vsetvl rs1 == x0, rd == x0
2294   TestVsetvl(0x80307057,
2295              {// Valid change of vtype.
2296               {9, 000, 128, 022, 9, 022},
2297               // Invalid change of vtype.
2298               {8, 001, 128, 022, 0, kVill}});
2299   // vsetvli rs1 != x0
2300   TestVsetvl(0x12170d7, {{~0ULL, ~0ULL, 128, 0, 16, 022}});
2301   // vsetvli rs1 == x0, rd != x0
2302   TestVsetvl(0x12070d7, {{~0ULL, ~0ULL, 42, 000, 16, 022}});
2303   // vsetvli, rs1 == x0, rd == x0
2304   TestVsetvl(0x1207057,
2305              {// Valid change of vtype.
2306               {9, 000, 128, ~0ULL, 9, 022},
2307               // Invalid change of vtype.
2308               {8, 001, 128, ~0ULL, 0, kVill}});
2309   // vsetivli rs1 != x0
2310   TestVsetvl(0xc12870d7, {{~0ULL, ~0ULL, 128, 0, 16, 022}});
2311   // vsetivli rs1 == x0, rd != x0
2312   TestVsetvl(0xc12070d7, {{~0ULL, ~0ULL, 42, 000, 16, 022}});
2313   // vsetivli, rs1 == x0, rd == x0
2314   TestVsetvl(0xc1207057,
2315              {// Valid change of vtype.
2316               {9, 000, 128, ~0ULL, 9, 022},
2317               // Invalid change of vtype.
2318               {8, 001, 128, ~0ULL, 0, kVill}});
2319 }
2320 
2321 #endif  // defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
2322