xref: /aosp_15_r20/art/compiler/utils/riscv64/assembler_riscv64.h (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
18 #define ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
19 
20 #include <cstdint>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 #include "arch/riscv64/instruction_set_features_riscv64.h"
26 #include "base/arena_containers.h"
27 #include "base/globals.h"
28 #include "base/macros.h"
29 #include "base/pointer_size.h"
30 #include "managed_register_riscv64.h"
31 #include "utils/assembler.h"
32 #include "utils/label.h"
33 
34 namespace art HIDDEN {
35 namespace riscv64 {
36 
37 class ScratchRegisterScope;
38 
39 static constexpr size_t kRiscv64HalfwordSize = 2;
40 static constexpr size_t kRiscv64WordSize = 4;
41 static constexpr size_t kRiscv64DoublewordSize = 8;
42 static constexpr size_t kRiscv64FloatRegSizeInBytes = 8;
43 
44 // The `Riscv64Extension` enumeration is used for restricting the instructions that the assembler
45 // can use. Some restrictions are checked only in debug mode (for example load and store
46 // instructions check `kLoadStore`), other restrictions are checked at run time and affect the
47 // emitted code (for example, the `SextW()` pseudo-instruction selects between an implementation
48 // from "Zcb", "Zbb" and a two-instruction sequence from the basic instruction set.
49 enum class Riscv64Extension : uint32_t {
50   kLoadStore,  // Pseudo-extension encompassing all loads and stores. Used to check that
51                // we do not have loads and stores in the middle of a LR/SC sequence.
52   kZifencei,
53   kM,
54   kA,
55   kZicsr,
56   kF,
57   kD,
58   kZba,
59   kZbb,
60   kZbs,
61   kV,
62   kZca,  // "C" extension instructions except floating point loads/stores.
63   kZcd,  // "C" extension double loads/stores.
64          // Note: RV64 cannot implement Zcf ("C" extension float loads/stores).
65   kZcb,  // Simple 16-bit operations not present in the original "C" extension.
66 
67   kLast = kZcb
68 };
69 
70 using Riscv64ExtensionMask = uint32_t;
71 
Riscv64ExtensionBit(Riscv64Extension ext)72 constexpr Riscv64ExtensionMask Riscv64ExtensionBit(Riscv64Extension ext) {
73   return 1u << enum_cast<>(ext);
74 }
75 
76 constexpr Riscv64ExtensionMask kRiscv64AllExtensionsMask =
77     MaxInt<Riscv64ExtensionMask>(enum_cast<>(Riscv64Extension::kLast) + 1);
78 
79 // Extensions allowed in a LR/SC sequence (between the LR and SC).
80 constexpr Riscv64ExtensionMask kRiscv64LrScSequenceExtensionsMask =
81     Riscv64ExtensionBit(Riscv64Extension::kZca);
82 
83 enum class FPRoundingMode : uint32_t {
84   kRNE = 0x0,  // Round to Nearest, ties to Even
85   kRTZ = 0x1,  // Round towards Zero
86   kRDN = 0x2,  // Round Down (towards −Infinity)
87   kRUP = 0x3,  // Round Up (towards +Infinity)
88   kRMM = 0x4,  // Round to Nearest, ties to Max Magnitude
89   kDYN = 0x7,  // Dynamic rounding mode
90   kDefault = kDYN,
91   // Some instructions never need to round even though the spec includes the RM field.
92   // To simplify testing, emit the RM as 0 by default for these instructions because that's what
93   // `clang` does and because the `llvm-objdump` fails to disassemble the other rounding modes.
94   kIgnored = 0
95 };
96 
97 enum class AqRl : uint32_t {
98   kNone    = 0x0,
99   kRelease = 0x1,
100   kAcquire = 0x2,
101   kAqRl    = kRelease | kAcquire
102 };
103 
104 // the type for fence
105 enum FenceType {
106   kFenceNone = 0,
107   kFenceWrite = 1,
108   kFenceRead = 2,
109   kFenceOutput = 4,
110   kFenceInput = 8,
111   kFenceDefault = 0xf,
112 };
113 
114 // Used to test the values returned by FClassS/FClassD.
115 enum FPClassMaskType {
116   kNegativeInfinity  = 0x001,
117   kNegativeNormal    = 0x002,
118   kNegativeSubnormal = 0x004,
119   kNegativeZero      = 0x008,
120   kPositiveZero      = 0x010,
121   kPositiveSubnormal = 0x020,
122   kPositiveNormal    = 0x040,
123   kPositiveInfinity  = 0x080,
124   kSignalingNaN      = 0x100,
125   kQuietNaN          = 0x200,
126 };
127 
128 enum class CSRAddress : uint32_t {
129   kVstart = 0x008,     // Vector start position, URW
130   kVxsat = 0x009,      // Fixed-Point Saturate Flag, URW
131   kVxrm = 0x00A,       // Fixed-Point Rounding Mode, URW
132   kReserved1 = 0x00B,  // Reserved for future vector CSRs
133   kReserved2 = 0x00C,
134   kReserved3 = 0x00D,
135   kReserved4 = 0x00E,
136   kVcsr = 0x00F,   // Vector control and status register, URW
137   kVl = 0xC20,     // Vector length, URO
138   kVtype = 0xC21,  // Vector data type register, URO
139   kVlenb = 0xC22,  // VLEN/8 (vector register length in bytes), URO
140 };
141 
142 class Riscv64Label : public Label {
143  public:
Riscv64Label()144   Riscv64Label() : prev_branch_id_(kNoPrevBranchId) {}
145 
Riscv64Label(Riscv64Label && src)146   Riscv64Label(Riscv64Label&& src) noexcept
147       // NOLINTNEXTLINE - src.prev_branch_id_ is valid after the move
148       : Label(std::move(src)), prev_branch_id_(src.prev_branch_id_) {}
149 
150  private:
151   static constexpr uint32_t kNoPrevBranchId = std::numeric_limits<uint32_t>::max();
152 
153   uint32_t prev_branch_id_;  // To get distance from preceding branch, if any.
154 
155   friend class Riscv64Assembler;
156   DISALLOW_COPY_AND_ASSIGN(Riscv64Label);
157 };
158 
159 // Assembler literal is a value embedded in code, retrieved using a PC-relative load.
160 class Literal {
161  public:
162   static constexpr size_t kMaxSize = 8;
163 
Literal(uint32_t size,const uint8_t * data)164   Literal(uint32_t size, const uint8_t* data) : label_(), size_(size) {
165     DCHECK_LE(size, Literal::kMaxSize);
166     memcpy(data_, data, size);
167   }
168 
169   template <typename T>
GetValue()170   T GetValue() const {
171     DCHECK_EQ(size_, sizeof(T));
172     T value;
173     memcpy(&value, data_, sizeof(T));
174     return value;
175   }
176 
GetSize()177   uint32_t GetSize() const { return size_; }
178 
GetData()179   const uint8_t* GetData() const { return data_; }
180 
GetLabel()181   Riscv64Label* GetLabel() { return &label_; }
182 
GetLabel()183   const Riscv64Label* GetLabel() const { return &label_; }
184 
185  private:
186   Riscv64Label label_;
187   const uint32_t size_;
188   uint8_t data_[kMaxSize];
189 
190   DISALLOW_COPY_AND_ASSIGN(Literal);
191 };
192 
193 // Jump table: table of labels emitted after the code and before the literals. Similar to literals.
194 class JumpTable {
195  public:
JumpTable(ArenaVector<Riscv64Label * > && labels)196   explicit JumpTable(ArenaVector<Riscv64Label*>&& labels) : label_(), labels_(std::move(labels)) {}
197 
GetSize()198   size_t GetSize() const { return labels_.size() * sizeof(int32_t); }
199 
GetData()200   const ArenaVector<Riscv64Label*>& GetData() const { return labels_; }
201 
GetLabel()202   Riscv64Label* GetLabel() { return &label_; }
203 
GetLabel()204   const Riscv64Label* GetLabel() const { return &label_; }
205 
206  private:
207   Riscv64Label label_;
208   ArenaVector<Riscv64Label*> labels_;
209 
210   DISALLOW_COPY_AND_ASSIGN(JumpTable);
211 };
212 
213 class Riscv64Assembler final : public Assembler {
214  public:
215   explicit Riscv64Assembler(ArenaAllocator* allocator,
216                             const Riscv64InstructionSetFeatures* instruction_set_features = nullptr)
217       : Riscv64Assembler(allocator,
218                          instruction_set_features != nullptr
219                              ? ConvertExtensions(instruction_set_features)
220                              : kRiscv64AllExtensionsMask) {}
221 
Riscv64Assembler(ArenaAllocator * allocator,Riscv64ExtensionMask enabled_extensions)222   Riscv64Assembler(ArenaAllocator* allocator, Riscv64ExtensionMask enabled_extensions)
223       : Assembler(allocator),
224         branches_(allocator->Adapter(kArenaAllocAssembler)),
225         finalized_(false),
226         overwriting_(false),
227         overwrite_location_(0),
228         literals_(allocator->Adapter(kArenaAllocAssembler)),
229         long_literals_(allocator->Adapter(kArenaAllocAssembler)),
230         jump_tables_(allocator->Adapter(kArenaAllocAssembler)),
231         last_position_adjustment_(0),
232         last_old_position_(0),
233         last_branch_id_(0),
234         enabled_extensions_(enabled_extensions),
235         available_scratch_core_registers_((1u << TMP) | (1u << TMP2)),
236         available_scratch_fp_registers_(1u << FTMP) {
237     cfi().DelayEmittingAdvancePCs();
238   }
239 
~Riscv64Assembler()240   virtual ~Riscv64Assembler() {
241     for (auto& branch : branches_) {
242       CHECK(branch.IsResolved());
243     }
244   }
245 
CodeSize()246   size_t CodeSize() const override { return Assembler::CodeSize(); }
cfi()247   DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
248 
IsExtensionEnabled(Riscv64Extension ext)249   bool IsExtensionEnabled(Riscv64Extension ext) const {
250     return (enabled_extensions_ & Riscv64ExtensionBit(ext)) != 0u;
251   }
252 
253   // According to "The RISC-V Instruction Set Manual"
254 
255   // LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37
256   // Note: These take a 20-bit unsigned value to align with the clang assembler for testing,
257   // but the value stored in the register shall actually be sign-extended to 64 bits.
258   void Lui(XRegister rd, uint32_t imm20);
259   void Auipc(XRegister rd, uint32_t imm20);
260 
261   // Jump instructions (RV32I), opcode = 0x67, 0x6f
262   void Jal(XRegister rd, int32_t offset);
263   void Jalr(XRegister rd, XRegister rs1, int32_t offset);
264 
265   // Branch instructions (RV32I), opcode = 0x63, funct3 from 0x0 ~ 0x1 and 0x4 ~ 0x7
266   void Beq(XRegister rs1, XRegister rs2, int32_t offset);
267   void Bne(XRegister rs1, XRegister rs2, int32_t offset);
268   void Blt(XRegister rs1, XRegister rs2, int32_t offset);
269   void Bge(XRegister rs1, XRegister rs2, int32_t offset);
270   void Bltu(XRegister rs1, XRegister rs2, int32_t offset);
271   void Bgeu(XRegister rs1, XRegister rs2, int32_t offset);
272 
273   // Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6
274   void Lb(XRegister rd, XRegister rs1, int32_t offset);
275   void Lh(XRegister rd, XRegister rs1, int32_t offset);
276   void Lw(XRegister rd, XRegister rs1, int32_t offset);
277   void Ld(XRegister rd, XRegister rs1, int32_t offset);
278   void Lbu(XRegister rd, XRegister rs1, int32_t offset);
279   void Lhu(XRegister rd, XRegister rs1, int32_t offset);
280   void Lwu(XRegister rd, XRegister rs1, int32_t offset);
281 
282   // Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3
283   void Sb(XRegister rs2, XRegister rs1, int32_t offset);
284   void Sh(XRegister rs2, XRegister rs1, int32_t offset);
285   void Sw(XRegister rs2, XRegister rs1, int32_t offset);
286   void Sd(XRegister rs2, XRegister rs1, int32_t offset);
287 
288   // IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7
289   void Addi(XRegister rd, XRegister rs1, int32_t imm12);
290   void Slti(XRegister rd, XRegister rs1, int32_t imm12);
291   void Sltiu(XRegister rd, XRegister rs1, int32_t imm12);
292   void Xori(XRegister rd, XRegister rs1, int32_t imm12);
293   void Ori(XRegister rd, XRegister rs1, int32_t imm12);
294   void Andi(XRegister rd, XRegister rs1, int32_t imm12);
295   void Slli(XRegister rd, XRegister rs1, int32_t shamt);
296   void Srli(XRegister rd, XRegister rs1, int32_t shamt);
297   void Srai(XRegister rd, XRegister rs1, int32_t shamt);
298 
299   // ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7
300   void Add(XRegister rd, XRegister rs1, XRegister rs2);
301   void Sub(XRegister rd, XRegister rs1, XRegister rs2);
302   void Slt(XRegister rd, XRegister rs1, XRegister rs2);
303   void Sltu(XRegister rd, XRegister rs1, XRegister rs2);
304   void Xor(XRegister rd, XRegister rs1, XRegister rs2);
305   void Or(XRegister rd, XRegister rs1, XRegister rs2);
306   void And(XRegister rd, XRegister rs1, XRegister rs2);
307   void Sll(XRegister rd, XRegister rs1, XRegister rs2);
308   void Srl(XRegister rd, XRegister rs1, XRegister rs2);
309   void Sra(XRegister rd, XRegister rs1, XRegister rs2);
310 
311   // 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5
312   void Addiw(XRegister rd, XRegister rs1, int32_t imm12);
313   void Slliw(XRegister rd, XRegister rs1, int32_t shamt);
314   void Srliw(XRegister rd, XRegister rs1, int32_t shamt);
315   void Sraiw(XRegister rd, XRegister rs1, int32_t shamt);
316 
317   // 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7
318   void Addw(XRegister rd, XRegister rs1, XRegister rs2);
319   void Subw(XRegister rd, XRegister rs1, XRegister rs2);
320   void Sllw(XRegister rd, XRegister rs1, XRegister rs2);
321   void Srlw(XRegister rd, XRegister rs1, XRegister rs2);
322   void Sraw(XRegister rd, XRegister rs1, XRegister rs2);
323 
324   // Environment call and breakpoint (RV32I), opcode = 0x73
325   void Ecall();
326   void Ebreak();
327 
328   // Fence instruction (RV32I): opcode = 0xf, funct3 = 0
329   void Fence(uint32_t pred = kFenceDefault, uint32_t succ = kFenceDefault);
330   void FenceTso();
331 
332   // "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1
333   void FenceI();
334 
335   // RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7
336   void Mul(XRegister rd, XRegister rs1, XRegister rs2);
337   void Mulh(XRegister rd, XRegister rs1, XRegister rs2);
338   void Mulhsu(XRegister rd, XRegister rs1, XRegister rs2);
339   void Mulhu(XRegister rd, XRegister rs1, XRegister rs2);
340   void Div(XRegister rd, XRegister rs1, XRegister rs2);
341   void Divu(XRegister rd, XRegister rs1, XRegister rs2);
342   void Rem(XRegister rd, XRegister rs1, XRegister rs2);
343   void Remu(XRegister rd, XRegister rs1, XRegister rs2);
344 
345   // RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7
346   void Mulw(XRegister rd, XRegister rs1, XRegister rs2);
347   void Divw(XRegister rd, XRegister rs1, XRegister rs2);
348   void Divuw(XRegister rd, XRegister rs1, XRegister rs2);
349   void Remw(XRegister rd, XRegister rs1, XRegister rs2);
350   void Remuw(XRegister rd, XRegister rs1, XRegister rs2);
351 
352   // RV32A/RV64A Standard Extension
353   void LrW(XRegister rd, XRegister rs1, AqRl aqrl);
354   void LrD(XRegister rd, XRegister rs1, AqRl aqrl);
355   void ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
356   void ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
357   void AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
358   void AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
359   void AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
360   void AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
361   void AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
362   void AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
363   void AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
364   void AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
365   void AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
366   void AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
367   void AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
368   void AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
369   void AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
370   void AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
371   void AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
372   void AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
373   void AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
374   void AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
375 
376   // "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7
377   void Csrrw(XRegister rd, uint32_t csr, XRegister rs1);
378   void Csrrs(XRegister rd, uint32_t csr, XRegister rs1);
379   void Csrrc(XRegister rd, uint32_t csr, XRegister rs1);
380   void Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5);
381   void Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5);
382   void Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5);
383 
384   // FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27
385   void FLw(FRegister rd, XRegister rs1, int32_t offset);
386   void FLd(FRegister rd, XRegister rs1, int32_t offset);
387   void FSw(FRegister rs2, XRegister rs1, int32_t offset);
388   void FSd(FRegister rs2, XRegister rs1, int32_t offset);
389 
390   // FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f
391   void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
392   void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
393   void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
394   void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
395   void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
396   void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
397   void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
398   void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
399 
400   // FP FMA instruction helpers passing the default rounding mode.
FMAddS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)401   void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
402     FMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
403   }
FMAddD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)404   void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
405     FMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
406   }
FMSubS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)407   void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
408     FMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
409   }
FMSubD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)410   void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
411     FMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
412   }
FNMSubS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)413   void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
414     FNMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
415   }
FNMSubD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)416   void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
417     FNMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
418   }
FNMAddS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)419   void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
420     FNMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
421   }
FNMAddD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)422   void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
423     FNMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
424   }
425 
426   // Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D
427   void FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
428   void FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
429   void FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
430   void FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
431   void FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
432   void FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
433   void FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
434   void FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
435   void FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm);
436   void FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm);
437   void FSgnjS(FRegister rd, FRegister rs1, FRegister rs2);
438   void FSgnjD(FRegister rd, FRegister rs1, FRegister rs2);
439   void FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2);
440   void FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2);
441   void FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2);
442   void FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2);
443   void FMinS(FRegister rd, FRegister rs1, FRegister rs2);
444   void FMinD(FRegister rd, FRegister rs1, FRegister rs2);
445   void FMaxS(FRegister rd, FRegister rs1, FRegister rs2);
446   void FMaxD(FRegister rd, FRegister rs1, FRegister rs2);
447   void FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm);
448   void FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm);
449 
450   // Simple FP instruction helpers passing the default rounding mode.
FAddS(FRegister rd,FRegister rs1,FRegister rs2)451   void FAddS(FRegister rd, FRegister rs1, FRegister rs2) {
452     FAddS(rd, rs1, rs2, FPRoundingMode::kDefault);
453   }
FAddD(FRegister rd,FRegister rs1,FRegister rs2)454   void FAddD(FRegister rd, FRegister rs1, FRegister rs2) {
455     FAddD(rd, rs1, rs2, FPRoundingMode::kDefault);
456   }
FSubS(FRegister rd,FRegister rs1,FRegister rs2)457   void FSubS(FRegister rd, FRegister rs1, FRegister rs2) {
458     FSubS(rd, rs1, rs2, FPRoundingMode::kDefault);
459   }
FSubD(FRegister rd,FRegister rs1,FRegister rs2)460   void FSubD(FRegister rd, FRegister rs1, FRegister rs2) {
461     FSubD(rd, rs1, rs2, FPRoundingMode::kDefault);
462   }
FMulS(FRegister rd,FRegister rs1,FRegister rs2)463   void FMulS(FRegister rd, FRegister rs1, FRegister rs2) {
464     FMulS(rd, rs1, rs2, FPRoundingMode::kDefault);
465   }
FMulD(FRegister rd,FRegister rs1,FRegister rs2)466   void FMulD(FRegister rd, FRegister rs1, FRegister rs2) {
467     FMulD(rd, rs1, rs2, FPRoundingMode::kDefault);
468   }
FDivS(FRegister rd,FRegister rs1,FRegister rs2)469   void FDivS(FRegister rd, FRegister rs1, FRegister rs2) {
470     FDivS(rd, rs1, rs2, FPRoundingMode::kDefault);
471   }
FDivD(FRegister rd,FRegister rs1,FRegister rs2)472   void FDivD(FRegister rd, FRegister rs1, FRegister rs2) {
473     FDivD(rd, rs1, rs2, FPRoundingMode::kDefault);
474   }
FSqrtS(FRegister rd,FRegister rs1)475   void FSqrtS(FRegister rd, FRegister rs1) {
476     FSqrtS(rd, rs1, FPRoundingMode::kDefault);
477   }
FSqrtD(FRegister rd,FRegister rs1)478   void FSqrtD(FRegister rd, FRegister rs1) {
479     FSqrtD(rd, rs1, FPRoundingMode::kDefault);
480   }
FCvtSD(FRegister rd,FRegister rs1)481   void FCvtSD(FRegister rd, FRegister rs1) {
482     FCvtSD(rd, rs1, FPRoundingMode::kDefault);
483   }
FCvtDS(FRegister rd,FRegister rs1)484   void FCvtDS(FRegister rd, FRegister rs1) {
485     FCvtDS(rd, rs1, FPRoundingMode::kIgnored);
486   }
487 
488   // FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D
489   void FEqS(XRegister rd, FRegister rs1, FRegister rs2);
490   void FEqD(XRegister rd, FRegister rs1, FRegister rs2);
491   void FLtS(XRegister rd, FRegister rs1, FRegister rs2);
492   void FLtD(XRegister rd, FRegister rs1, FRegister rs2);
493   void FLeS(XRegister rd, FRegister rs1, FRegister rs2);
494   void FLeD(XRegister rd, FRegister rs1, FRegister rs2);
495 
496   // FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D
497   void FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm);
498   void FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm);
499   void FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
500   void FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
501   void FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm);
502   void FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm);
503   void FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
504   void FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
505   void FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm);
506   void FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm);
507   void FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
508   void FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
509   void FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm);
510   void FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm);
511   void FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
512   void FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
513 
514   // FP conversion instruction helpers passing the default rounding mode.
FCvtWS(XRegister rd,FRegister rs1)515   void FCvtWS(XRegister rd, FRegister rs1) { FCvtWS(rd, rs1, FPRoundingMode::kDefault); }
FCvtWD(XRegister rd,FRegister rs1)516   void FCvtWD(XRegister rd, FRegister rs1) { FCvtWD(rd, rs1, FPRoundingMode::kDefault); }
FCvtWuS(XRegister rd,FRegister rs1)517   void FCvtWuS(XRegister rd, FRegister rs1) { FCvtWuS(rd, rs1, FPRoundingMode::kDefault); }
FCvtWuD(XRegister rd,FRegister rs1)518   void FCvtWuD(XRegister rd, FRegister rs1) { FCvtWuD(rd, rs1, FPRoundingMode::kDefault); }
FCvtLS(XRegister rd,FRegister rs1)519   void FCvtLS(XRegister rd, FRegister rs1) { FCvtLS(rd, rs1, FPRoundingMode::kDefault); }
FCvtLD(XRegister rd,FRegister rs1)520   void FCvtLD(XRegister rd, FRegister rs1) { FCvtLD(rd, rs1, FPRoundingMode::kDefault); }
FCvtLuS(XRegister rd,FRegister rs1)521   void FCvtLuS(XRegister rd, FRegister rs1) { FCvtLuS(rd, rs1, FPRoundingMode::kDefault); }
FCvtLuD(XRegister rd,FRegister rs1)522   void FCvtLuD(XRegister rd, FRegister rs1) { FCvtLuD(rd, rs1, FPRoundingMode::kDefault); }
FCvtSW(FRegister rd,XRegister rs1)523   void FCvtSW(FRegister rd, XRegister rs1) { FCvtSW(rd, rs1, FPRoundingMode::kDefault); }
FCvtDW(FRegister rd,XRegister rs1)524   void FCvtDW(FRegister rd, XRegister rs1) { FCvtDW(rd, rs1, FPRoundingMode::kIgnored); }
FCvtSWu(FRegister rd,XRegister rs1)525   void FCvtSWu(FRegister rd, XRegister rs1) { FCvtSWu(rd, rs1, FPRoundingMode::kDefault); }
FCvtDWu(FRegister rd,XRegister rs1)526   void FCvtDWu(FRegister rd, XRegister rs1) { FCvtDWu(rd, rs1, FPRoundingMode::kIgnored); }
FCvtSL(FRegister rd,XRegister rs1)527   void FCvtSL(FRegister rd, XRegister rs1) { FCvtSL(rd, rs1, FPRoundingMode::kDefault); }
FCvtDL(FRegister rd,XRegister rs1)528   void FCvtDL(FRegister rd, XRegister rs1) { FCvtDL(rd, rs1, FPRoundingMode::kDefault); }
FCvtSLu(FRegister rd,XRegister rs1)529   void FCvtSLu(FRegister rd, XRegister rs1) { FCvtSLu(rd, rs1, FPRoundingMode::kDefault); }
FCvtDLu(FRegister rd,XRegister rs1)530   void FCvtDLu(FRegister rd, XRegister rs1) { FCvtDLu(rd, rs1, FPRoundingMode::kDefault); }
531 
532   // FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D
533   void FMvXW(XRegister rd, FRegister rs1);
534   void FMvXD(XRegister rd, FRegister rs1);
535   void FMvWX(FRegister rd, XRegister rs1);
536   void FMvDX(FRegister rd, XRegister rs1);
537 
538   // FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D
539   void FClassS(XRegister rd, FRegister rs1);
540   void FClassD(XRegister rd, FRegister rs1);
541 
542   // "C" Standard Extension, Compresseed Instructions
543   void CLwsp(XRegister rd, int32_t offset);
544   void CLdsp(XRegister rd, int32_t offset);
545   void CFLdsp(FRegister rd, int32_t offset);
546   void CSwsp(XRegister rs2, int32_t offset);
547   void CSdsp(XRegister rs2, int32_t offset);
548   void CFSdsp(FRegister rs2, int32_t offset);
549 
550   void CLw(XRegister rd_s, XRegister rs1_s, int32_t offset);
551   void CLd(XRegister rd_s, XRegister rs1_s, int32_t offset);
552   void CFLd(FRegister rd_s, XRegister rs1_s, int32_t offset);
553   void CSw(XRegister rs2_s, XRegister rs1_s, int32_t offset);
554   void CSd(XRegister rs2_s, XRegister rs1_s, int32_t offset);
555   void CFSd(FRegister rs2_s, XRegister rs1_s, int32_t offset);
556 
557   void CLi(XRegister rd, int32_t imm);
558   void CLui(XRegister rd, uint32_t nzimm6);
559   void CAddi(XRegister rd, int32_t nzimm);
560   void CAddiw(XRegister rd, int32_t imm);
561   void CAddi16Sp(int32_t nzimm);
562   void CAddi4Spn(XRegister rd_s, uint32_t nzuimm);
563   void CSlli(XRegister rd, int32_t shamt);
564   void CSrli(XRegister rd_s, int32_t shamt);
565   void CSrai(XRegister rd_s, int32_t shamt);
566   void CAndi(XRegister rd_s, int32_t imm);
567   void CMv(XRegister rd, XRegister rs2);
568   void CAdd(XRegister rd, XRegister rs2);
569   void CAnd(XRegister rd_s, XRegister rs2_s);
570   void COr(XRegister rd_s, XRegister rs2_s);
571   void CXor(XRegister rd_s, XRegister rs2_s);
572   void CSub(XRegister rd_s, XRegister rs2_s);
573   void CAddw(XRegister rd_s, XRegister rs2_s);
574   void CSubw(XRegister rd_s, XRegister rs2_s);
575 
576   // "Zcb" Standard Extension, part of "C", opcode = 0b00, 0b01, funct3 = 0b100.
577   void CLbu(XRegister rd_s, XRegister rs1_s, int32_t offset);
578   void CLhu(XRegister rd_s, XRegister rs1_s, int32_t offset);
579   void CLh(XRegister rd_s, XRegister rs1_s, int32_t offset);
580   void CSb(XRegister rd_s, XRegister rs1_s, int32_t offset);
581   void CSh(XRegister rd_s, XRegister rs1_s, int32_t offset);
582   void CZextB(XRegister rd_rs1_s);
583   void CSextB(XRegister rd_rs1_s);
584   void CZextH(XRegister rd_rs1_s);
585   void CSextH(XRegister rd_rs1_s);
586   void CZextW(XRegister rd_rs1_s);
587   void CNot(XRegister rd_rs1_s);
588   void CMul(XRegister rd_s, XRegister rs2_s);
589   // "Zcb" Standard Extension End; resume "C" Standard Extension.
590   // TODO(riscv64): Reorder "Zcb" after remaining "C" instructions.
591 
592   void CJ(int32_t offset);
593   void CJr(XRegister rs1);
594   void CJalr(XRegister rs1);
595   void CBeqz(XRegister rs1_s, int32_t offset);
596   void CBnez(XRegister rs1_s, int32_t offset);
597 
598   void CEbreak();
599   void CNop();
600   void CUnimp();
601 
602   // "Zba" Standard Extension, opcode = 0x1b, 0x33 or 0x3b, funct3 and funct7 varies.
603   void AddUw(XRegister rd, XRegister rs1, XRegister rs2);
604   void Sh1Add(XRegister rd, XRegister rs1, XRegister rs2);
605   void Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2);
606   void Sh2Add(XRegister rd, XRegister rs1, XRegister rs2);
607   void Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2);
608   void Sh3Add(XRegister rd, XRegister rs1, XRegister rs2);
609   void Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2);
610   void SlliUw(XRegister rd, XRegister rs1, int32_t shamt);
611 
612   // "Zbb" Standard Extension, opcode = 0x13, 0x1b, 0x33 or 0x3b, funct3 and funct7 varies.
613   // Note: 32-bit sext.b, sext.h and zext.h from the Zbb extension are explicitly
614   // prefixed with "Zbb" to differentiate them from the utility macros.
615   void Andn(XRegister rd, XRegister rs1, XRegister rs2);
616   void Orn(XRegister rd, XRegister rs1, XRegister rs2);
617   void Xnor(XRegister rd, XRegister rs1, XRegister rs2);
618   void Clz(XRegister rd, XRegister rs1);
619   void Clzw(XRegister rd, XRegister rs1);
620   void Ctz(XRegister rd, XRegister rs1);
621   void Ctzw(XRegister rd, XRegister rs1);
622   void Cpop(XRegister rd, XRegister rs1);
623   void Cpopw(XRegister rd, XRegister rs1);
624   void Min(XRegister rd, XRegister rs1, XRegister rs2);
625   void Minu(XRegister rd, XRegister rs1, XRegister rs2);
626   void Max(XRegister rd, XRegister rs1, XRegister rs2);
627   void Maxu(XRegister rd, XRegister rs1, XRegister rs2);
628   void Rol(XRegister rd, XRegister rs1, XRegister rs2);
629   void Rolw(XRegister rd, XRegister rs1, XRegister rs2);
630   void Ror(XRegister rd, XRegister rs1, XRegister rs2);
631   void Rorw(XRegister rd, XRegister rs1, XRegister rs2);
632   void Rori(XRegister rd, XRegister rs1, int32_t shamt);
633   void Roriw(XRegister rd, XRegister rs1, int32_t shamt);
634   void OrcB(XRegister rd, XRegister rs1);
635   void Rev8(XRegister rd, XRegister rs1);
636   void ZbbSextB(XRegister rd, XRegister rs1);
637   void ZbbSextH(XRegister rd, XRegister rs1);
638   void ZbbZextH(XRegister rd, XRegister rs1);
639 
640   // "Zbs" Standard Extension, opcode = 0x13, or 0x33, funct3 and funct7 varies.
641   void Bclr(XRegister rd, XRegister rs1, XRegister rs2);
642   void Bclri(XRegister rd, XRegister rs1, int32_t shamt);
643   void Bext(XRegister rd, XRegister rs1, XRegister rs2);
644   void Bexti(XRegister rd, XRegister rs1, int32_t shamt);
645   void Binv(XRegister rd, XRegister rs1, XRegister rs2);
646   void Binvi(XRegister rd, XRegister rs1, int32_t shamt);
647   void Bset(XRegister rd, XRegister rs1, XRegister rs2);
648   void Bseti(XRegister rd, XRegister rs1, int32_t shamt);
649 
650   ////////////////////////////// RISC-V Vector Instructions  START ///////////////////////////////
651   enum class LengthMultiplier : uint32_t {
652     kM1Over8 = 0b101,
653     kM1Over4 = 0b110,
654     kM1Over2 = 0b111,
655     kM1 = 0b000,
656     kM2 = 0b001,
657     kM4 = 0b010,
658     kM8 = 0b011,
659 
660     kReserved1 = 0b100,
661   };
662 
663   enum class SelectedElementWidth : uint32_t {
664     kE8 = 0b000,
665     kE16 = 0b001,
666     kE32 = 0b010,
667     kE64 = 0b011,
668 
669     kReserved1 = 0b100,
670     kReserved2 = 0b101,
671     kReserved3 = 0b110,
672     kReserved4 = 0b111,
673   };
674 
675   enum class VectorMaskAgnostic : uint32_t {
676     kUndisturbed = 0,
677     kAgnostic = 1,
678   };
679 
680   enum class VectorTailAgnostic : uint32_t {
681     kUndisturbed = 0,
682     kAgnostic = 1,
683   };
684 
685   enum class VM : uint32_t {  // Vector mask
686     kV0_t = 0b0,
687     kUnmasked = 0b1
688   };
689 
690   // Vector Conguration-Setting Instructions, opcode = 0x57, funct3 = 0x3
691   void VSetvli(XRegister rd, XRegister rs1, uint32_t vtypei);
692   void VSetivli(XRegister rd, uint32_t uimm, uint32_t vtypei);
693   void VSetvl(XRegister rd, XRegister rs1, XRegister rs2);
694 
VTypeiValue(VectorMaskAgnostic vma,VectorTailAgnostic vta,SelectedElementWidth sew,LengthMultiplier lmul)695   static uint32_t VTypeiValue(VectorMaskAgnostic vma,
696                               VectorTailAgnostic vta,
697                               SelectedElementWidth sew,
698                               LengthMultiplier lmul) {
699     return static_cast<uint32_t>(vma) << 7 | static_cast<uint32_t>(vta) << 6 |
700            static_cast<uint32_t>(sew) << 3 | static_cast<uint32_t>(lmul);
701   }
702 
703   // Vector Unit-Stride Load/Store Instructions
704   void VLe8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
705   void VLe16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
706   void VLe32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
707   void VLe64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
708   void VLm(VRegister vd, XRegister rs1);
709 
710   void VSe8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
711   void VSe16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
712   void VSe32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
713   void VSe64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
714   void VSm(VRegister vs3, XRegister rs1);
715 
716   // Vector unit-stride fault-only-first Instructions
717   void VLe8ff(VRegister vd, XRegister rs1);
718   void VLe16ff(VRegister vd, XRegister rs1);
719   void VLe32ff(VRegister vd, XRegister rs1);
720   void VLe64ff(VRegister vd, XRegister rs1);
721 
722   // Vector Strided Load/Store Instructions
723   void VLse8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
724   void VLse16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
725   void VLse32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
726   void VLse64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
727 
728   void VSse8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
729   void VSse16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
730   void VSse32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
731   void VSse64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
732 
733   // Vector Indexed Load/Store Instructions
734   void VLoxei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
735   void VLoxei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
736   void VLoxei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
737   void VLoxei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
738 
739   void VLuxei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
740   void VLuxei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
741   void VLuxei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
742   void VLuxei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
743 
744   void VSoxei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
745   void VSoxei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
746   void VSoxei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
747   void VSoxei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
748 
749   void VSuxei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
750   void VSuxei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
751   void VSuxei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
752   void VSuxei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
753 
754   // Vector Segment Load/Store
755 
756   // Vector Unit-Stride Segment Loads/Stores
757 
758   void VLseg2e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
759   void VLseg2e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
760   void VLseg2e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
761   void VLseg2e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
762   void VLseg3e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
763   void VLseg3e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
764   void VLseg3e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
765   void VLseg3e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
766   void VLseg4e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
767   void VLseg4e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
768   void VLseg4e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
769   void VLseg4e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
770   void VLseg5e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
771   void VLseg5e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
772   void VLseg5e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
773   void VLseg5e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
774   void VLseg6e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
775   void VLseg6e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
776   void VLseg6e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
777   void VLseg6e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
778   void VLseg7e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
779   void VLseg7e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
780   void VLseg7e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
781   void VLseg7e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
782   void VLseg8e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
783   void VLseg8e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
784   void VLseg8e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
785   void VLseg8e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
786 
787   void VSseg2e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
788   void VSseg2e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
789   void VSseg2e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
790   void VSseg2e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
791   void VSseg3e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
792   void VSseg3e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
793   void VSseg3e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
794   void VSseg3e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
795   void VSseg4e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
796   void VSseg4e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
797   void VSseg4e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
798   void VSseg4e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
799   void VSseg5e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
800   void VSseg5e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
801   void VSseg5e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
802   void VSseg5e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
803   void VSseg6e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
804   void VSseg6e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
805   void VSseg6e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
806   void VSseg6e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
807   void VSseg7e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
808   void VSseg7e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
809   void VSseg7e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
810   void VSseg7e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
811   void VSseg8e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
812   void VSseg8e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
813   void VSseg8e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
814   void VSseg8e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
815 
816   // Vector Unit-Stride Fault-only-First Segment Loads
817 
818   void VLseg2e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
819   void VLseg2e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
820   void VLseg2e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
821   void VLseg2e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
822   void VLseg3e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
823   void VLseg3e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
824   void VLseg3e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
825   void VLseg3e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
826   void VLseg4e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
827   void VLseg4e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
828   void VLseg4e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
829   void VLseg4e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
830   void VLseg5e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
831   void VLseg5e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
832   void VLseg5e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
833   void VLseg5e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
834   void VLseg6e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
835   void VLseg6e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
836   void VLseg6e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
837   void VLseg6e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
838   void VLseg7e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
839   void VLseg7e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
840   void VLseg7e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
841   void VLseg7e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
842   void VLseg8e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
843   void VLseg8e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
844   void VLseg8e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
845   void VLseg8e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
846 
847   // Vector Strided Segment Loads/Stores
848 
849   void VLsseg2e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
850   void VLsseg2e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
851   void VLsseg2e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
852   void VLsseg2e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
853   void VLsseg3e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
854   void VLsseg3e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
855   void VLsseg3e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
856   void VLsseg3e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
857   void VLsseg4e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
858   void VLsseg4e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
859   void VLsseg4e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
860   void VLsseg4e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
861   void VLsseg5e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
862   void VLsseg5e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
863   void VLsseg5e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
864   void VLsseg5e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
865   void VLsseg6e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
866   void VLsseg6e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
867   void VLsseg6e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
868   void VLsseg6e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
869   void VLsseg7e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
870   void VLsseg7e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
871   void VLsseg7e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
872   void VLsseg7e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
873   void VLsseg8e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
874   void VLsseg8e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
875   void VLsseg8e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
876   void VLsseg8e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
877 
878   void VSsseg2e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
879   void VSsseg2e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
880   void VSsseg2e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
881   void VSsseg2e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
882   void VSsseg3e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
883   void VSsseg3e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
884   void VSsseg3e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
885   void VSsseg3e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
886   void VSsseg4e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
887   void VSsseg4e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
888   void VSsseg4e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
889   void VSsseg4e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
890   void VSsseg5e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
891   void VSsseg5e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
892   void VSsseg5e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
893   void VSsseg5e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
894   void VSsseg6e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
895   void VSsseg6e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
896   void VSsseg6e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
897   void VSsseg6e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
898   void VSsseg7e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
899   void VSsseg7e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
900   void VSsseg7e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
901   void VSsseg7e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
902   void VSsseg8e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
903   void VSsseg8e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
904   void VSsseg8e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
905   void VSsseg8e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
906 
907   // Vector Indexed-unordered Segment Loads/Stores
908 
909   void VLuxseg2ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
910   void VLuxseg2ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
911   void VLuxseg2ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
912   void VLuxseg2ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
913   void VLuxseg3ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
914   void VLuxseg3ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
915   void VLuxseg3ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
916   void VLuxseg3ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
917   void VLuxseg4ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
918   void VLuxseg4ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
919   void VLuxseg4ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
920   void VLuxseg4ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
921   void VLuxseg5ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
922   void VLuxseg5ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
923   void VLuxseg5ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
924   void VLuxseg5ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
925   void VLuxseg6ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
926   void VLuxseg6ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
927   void VLuxseg6ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
928   void VLuxseg6ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
929   void VLuxseg7ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
930   void VLuxseg7ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
931   void VLuxseg7ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
932   void VLuxseg7ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
933   void VLuxseg8ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
934   void VLuxseg8ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
935   void VLuxseg8ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
936   void VLuxseg8ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
937 
938   void VSuxseg2ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
939   void VSuxseg2ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
940   void VSuxseg2ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
941   void VSuxseg2ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
942   void VSuxseg3ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
943   void VSuxseg3ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
944   void VSuxseg3ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
945   void VSuxseg3ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
946   void VSuxseg4ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
947   void VSuxseg4ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
948   void VSuxseg4ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
949   void VSuxseg4ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
950   void VSuxseg5ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
951   void VSuxseg5ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
952   void VSuxseg5ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
953   void VSuxseg5ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
954   void VSuxseg6ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
955   void VSuxseg6ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
956   void VSuxseg6ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
957   void VSuxseg6ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
958   void VSuxseg7ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
959   void VSuxseg7ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
960   void VSuxseg7ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
961   void VSuxseg7ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
962   void VSuxseg8ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
963   void VSuxseg8ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
964   void VSuxseg8ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
965   void VSuxseg8ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
966 
967   // Vector Indexed-ordered Segment Loads/Stores
968 
969   void VLoxseg2ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
970   void VLoxseg2ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
971   void VLoxseg2ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
972   void VLoxseg2ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
973   void VLoxseg3ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
974   void VLoxseg3ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
975   void VLoxseg3ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
976   void VLoxseg3ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
977   void VLoxseg4ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
978   void VLoxseg4ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
979   void VLoxseg4ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
980   void VLoxseg4ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
981   void VLoxseg5ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
982   void VLoxseg5ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
983   void VLoxseg5ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
984   void VLoxseg5ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
985   void VLoxseg6ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
986   void VLoxseg6ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
987   void VLoxseg6ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
988   void VLoxseg6ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
989   void VLoxseg7ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
990   void VLoxseg7ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
991   void VLoxseg7ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
992   void VLoxseg7ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
993   void VLoxseg8ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
994   void VLoxseg8ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
995   void VLoxseg8ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
996   void VLoxseg8ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
997 
998   void VSoxseg2ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
999   void VSoxseg2ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1000   void VSoxseg2ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1001   void VSoxseg2ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1002   void VSoxseg3ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1003   void VSoxseg3ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1004   void VSoxseg3ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1005   void VSoxseg3ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1006   void VSoxseg4ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1007   void VSoxseg4ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1008   void VSoxseg4ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1009   void VSoxseg4ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1010   void VSoxseg5ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1011   void VSoxseg5ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1012   void VSoxseg5ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1013   void VSoxseg5ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1014   void VSoxseg6ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1015   void VSoxseg6ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1016   void VSoxseg6ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1017   void VSoxseg6ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1018   void VSoxseg7ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1019   void VSoxseg7ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1020   void VSoxseg7ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1021   void VSoxseg7ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1022   void VSoxseg8ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1023   void VSoxseg8ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1024   void VSoxseg8ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1025   void VSoxseg8ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1026 
1027   // Vector Whole Register Load/Store Instructions
1028 
1029   void VL1re8(VRegister vd, XRegister rs1);
1030   void VL1re16(VRegister vd, XRegister rs1);
1031   void VL1re32(VRegister vd, XRegister rs1);
1032   void VL1re64(VRegister vd, XRegister rs1);
1033 
1034   void VL2re8(VRegister vd, XRegister rs1);
1035   void VL2re16(VRegister vd, XRegister rs1);
1036   void VL2re32(VRegister vd, XRegister rs1);
1037   void VL2re64(VRegister vd, XRegister rs1);
1038 
1039   void VL4re8(VRegister vd, XRegister rs1);
1040   void VL4re16(VRegister vd, XRegister rs1);
1041   void VL4re32(VRegister vd, XRegister rs1);
1042   void VL4re64(VRegister vd, XRegister rs1);
1043 
1044   void VL8re8(VRegister vd, XRegister rs1);
1045   void VL8re16(VRegister vd, XRegister rs1);
1046   void VL8re32(VRegister vd, XRegister rs1);
1047   void VL8re64(VRegister vd, XRegister rs1);
1048 
1049   void VL1r(VRegister vd, XRegister rs1);  // Pseudoinstruction equal to VL1re8
1050   void VL2r(VRegister vd, XRegister rs1);  // Pseudoinstruction equal to VL2re8
1051   void VL4r(VRegister vd, XRegister rs1);  // Pseudoinstruction equal to VL4re8
1052   void VL8r(VRegister vd, XRegister rs1);  // Pseudoinstruction equal to VL8re8
1053 
1054   void VS1r(VRegister vs3, XRegister rs1);  // Store {vs3} to address in a1
1055   void VS2r(VRegister vs3, XRegister rs1);  // Store {vs3}-{vs3 + 1} to address in a1
1056   void VS4r(VRegister vs3, XRegister rs1);  // Store {vs3}-{vs3 + 3} to address in a1
1057   void VS8r(VRegister vs3, XRegister rs1);  // Store {vs3}-{vs3 + 7} to address in a1
1058 
1059   // Vector Arithmetic Instruction
1060 
1061   // Vector vadd instructions, funct6 = 0b000000
1062   void VAdd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1063   void VAdd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1064   void VAdd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1065 
1066   // Vector vsub instructions, funct6 = 0b000010
1067   void VSub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1068   void VSub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1069 
1070   // Vector vrsub instructions, funct6 = 0b000011
1071   void VRsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1072   void VRsub_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1073 
1074   // Pseudo-instruction over VRsub_vi
1075   void VNeg_v(VRegister vd, VRegister vs2);
1076 
1077   // Vector vminu instructions, funct6 = 0b000100
1078   void VMinu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1079   void VMinu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1080 
1081   // Vector vmin instructions, funct6 = 0b000101
1082   void VMin_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1083   void VMin_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1084 
1085   // Vector vmaxu instructions, funct6 = 0b000110
1086   void VMaxu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1087   void VMaxu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1088 
1089   // Vector vmax instructions, funct6 = 0b000111
1090   void VMax_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1091   void VMax_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1092 
1093   // Vector vand instructions, funct6 = 0b001001
1094   void VAnd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1095   void VAnd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1096   void VAnd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1097 
1098   // Vector vor instructions, funct6 = 0b001010
1099   void VOr_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1100   void VOr_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1101   void VOr_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1102 
1103   // Vector vxor instructions, funct6 = 0b001011
1104   void VXor_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1105   void VXor_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1106   void VXor_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1107 
1108   // Pseudo-instruction over VXor_vi
1109   void VNot_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1110 
1111   // Vector vrgather instructions, funct6 = 0b001100
1112   void VRgather_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1113   void VRgather_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1114   void VRgather_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1115 
1116   // Vector vslideup instructions, funct6 = 0b001110
1117   void VSlideup_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1118   void VSlideup_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1119 
1120   // Vector vrgatherei16 instructions, funct6 = 0b001110
1121   void VRgatherei16_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1122 
1123   // Vector vslidedown instructions, funct6 = 0b001111
1124   void VSlidedown_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1125   void VSlidedown_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1126 
1127   // Vector vadc instructions, funct6 = 0b010000
1128   void VAdc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1129   void VAdc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1130   void VAdc_vim(VRegister vd, VRegister vs2, int32_t imm5);
1131 
1132   // Vector vmadc instructions, funct6 = 0b010001
1133   void VMadc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1134   void VMadc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1135   void VMadc_vim(VRegister vd, VRegister vs2, int32_t imm5);
1136 
1137   // Vector vmadc instructions, funct6 = 0b010001
1138   void VMadc_vv(VRegister vd, VRegister vs2, VRegister vs1);
1139   void VMadc_vx(VRegister vd, VRegister vs2, XRegister rs1);
1140   void VMadc_vi(VRegister vd, VRegister vs2, int32_t imm5);
1141 
1142   // Vector vsbc instructions, funct6 = 0b010010
1143   void VSbc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1144   void VSbc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1145 
1146   // Vector vmsbc instructions, funct6 = 0b010011
1147   void VMsbc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1148   void VMsbc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1149   void VMsbc_vv(VRegister vd, VRegister vs2, VRegister vs1);
1150   void VMsbc_vx(VRegister vd, VRegister vs2, XRegister rs1);
1151 
1152   // Vector vmerge instructions, funct6 = 0b010111, vm = 0
1153   void VMerge_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1154   void VMerge_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1155   void VMerge_vim(VRegister vd, VRegister vs2, int32_t imm5);
1156 
1157   // Vector vmv instructions, funct6 = 0b010111, vm = 1, vs2 = v0
1158   void VMv_vv(VRegister vd, VRegister vs1);
1159   void VMv_vx(VRegister vd, XRegister rs1);
1160   void VMv_vi(VRegister vd, int32_t imm5);
1161 
1162   // Vector vmseq instructions, funct6 = 0b011000
1163   void VMseq_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1164   void VMseq_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1165   void VMseq_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1166 
1167   // Vector vmsne instructions, funct6 = 0b011001
1168   void VMsne_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1169   void VMsne_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1170   void VMsne_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1171 
1172   // Vector vmsltu instructions, funct6 = 0b011010
1173   void VMsltu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1174   void VMsltu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1175 
1176   // Pseudo-instruction over VMsltu_vv
1177   void VMsgtu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1178 
1179   // Vector vmslt instructions, funct6 = 0b011011
1180   void VMslt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1181   void VMslt_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1182 
1183   // Pseudo-instruction over VMslt_vv
1184   void VMsgt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1185 
1186   // Vector vmsleu instructions, funct6 = 0b011100
1187   void VMsleu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1188   void VMsleu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1189   void VMsleu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1190 
1191   // Pseudo-instructions over VMsleu_*
1192   void VMsgeu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1193   void VMsltu_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1194 
1195   // Vector vmsle instructions, funct6 = 0b011101
1196   void VMsle_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1197   void VMsle_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1198   void VMsle_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1199 
1200   // Pseudo-instructions over VMsle_*
1201   void VMsge_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1202   void VMslt_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1203 
1204   // Vector vmsgtu instructions, funct6 = 0b011110
1205   void VMsgtu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1206   void VMsgtu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1207 
1208   // Pseudo-instruction over VMsgtu_vi
1209   void VMsgeu_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1210 
1211   // Vector vmsgt instructions, funct6 = 0b011111
1212   void VMsgt_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1213   void VMsgt_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1214 
1215   // Pseudo-instruction over VMsgt_vi
1216   void VMsge_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1217 
1218   // Vector vsaddu instructions, funct6 = 0b100000
1219   void VSaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1220   void VSaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1221   void VSaddu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1222 
1223   // Vector vsadd instructions, funct6 = 0b100001
1224   void VSadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1225   void VSadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1226   void VSadd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1227 
1228   // Vector vssubu instructions, funct6 = 0b100010
1229   void VSsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1230   void VSsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1231 
1232   // Vector vssub instructions, funct6 = 0b100011
1233   void VSsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1234   void VSsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1235 
1236   // Vector vsll instructions, funct6 = 0b100101
1237   void VSll_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1238   void VSll_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1239   void VSll_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1240 
1241   // Vector vsmul instructions, funct6 = 0b100111
1242   void VSmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1243   void VSmul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1244 
1245   // Vector vmv<nr>r.v instructions, funct6 = 0b100111
1246   void Vmv1r_v(VRegister vd, VRegister vs2);
1247   void Vmv2r_v(VRegister vd, VRegister vs2);
1248   void Vmv4r_v(VRegister vd, VRegister vs2);
1249   void Vmv8r_v(VRegister vd, VRegister vs2);
1250 
1251   // Vector vsrl instructions, funct6 = 0b101000
1252   void VSrl_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1253   void VSrl_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1254   void VSrl_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1255 
1256   // Vector vsra instructions, funct6 = 0b101001
1257   void VSra_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1258   void VSra_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1259   void VSra_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1260 
1261   // Vector vssrl instructions, funct6 = 0b101010
1262   void VSsrl_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1263   void VSsrl_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1264   void VSsrl_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1265 
1266   // Vector vssra instructions, funct6 = 0b101011
1267   void VSsra_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1268   void VSsra_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1269   void VSsra_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1270 
1271   // Vector vnsrl instructions, funct6 = 0b101100
1272   void VNsrl_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1273   void VNsrl_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1274   void VNsrl_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1275 
1276   // Pseudo-instruction over VNsrl_wx
1277   void VNcvt_x_x_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1278 
1279   // Vector vnsra instructions, funct6 = 0b101101
1280   void VNsra_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1281   void VNsra_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1282   void VNsra_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1283 
1284   // Vector vnclipu instructions, funct6 = 0b101110
1285   void VNclipu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1286   void VNclipu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1287   void VNclipu_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1288 
1289   // Vector vnclip instructions, funct6 = 0b101111
1290   void VNclip_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1291   void VNclip_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1292   void VNclip_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1293 
1294   // Vector vwredsumu instructions, funct6 = 0b110000
1295   void VWredsumu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1296 
1297   // Vector vwredsum instructions, funct6 = 0b110001
1298   void VWredsum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1299 
1300   // Vector vredsum instructions, funct6 = 0b000000
1301   void VRedsum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1302 
1303   // Vector vredand instructions, funct6 = 0b000001
1304   void VRedand_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1305 
1306   // Vector vredor instructions, funct6 = 0b000010
1307   void VRedor_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1308 
1309   // Vector vredxor instructions, funct6 = 0b000011
1310   void VRedxor_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1311 
1312   // Vector vredminu instructions, funct6 = 0b000100
1313   void VRedminu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1314 
1315   // Vector vredmin instructions, funct6 = 0b000101
1316   void VRedmin_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1317 
1318   // Vector vredmaxu instructions, funct6 = 0b000110
1319   void VRedmaxu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1320 
1321   // Vector vredmax instructions, funct6 = 0b000111
1322   void VRedmax_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1323 
1324   // Vector vaaddu instructions, funct6 = 0b001000
1325   void VAaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1326   void VAaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1327 
1328   // Vector vaadd instructions, funct6 = 0b001001
1329   void VAadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1330   void VAadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1331 
1332   // Vector vasubu instructions, funct6 = 0b001010
1333   void VAsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1334   void VAsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1335 
1336   // Vector vasub instructions, funct6 = 0b001011
1337   void VAsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1338   void VAsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1339 
1340   // Vector vslide1up instructions, funct6 = 0b001110
1341   void VSlide1up_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1342 
1343   // Vector vslide1down instructions, funct6 = 0b001111
1344   void VSlide1down_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1345 
1346   // Vector vcompress instructions, funct6 = 0b010111
1347   void VCompress_vm(VRegister vd, VRegister vs2, VRegister vs1);
1348 
1349   // Vector vmandn instructions, funct6 = 0b011000
1350   void VMandn_mm(VRegister vd, VRegister vs2, VRegister vs1);
1351 
1352   // Vector vmand instructions, funct6 = 0b011001
1353   void VMand_mm(VRegister vd, VRegister vs2, VRegister vs1);
1354 
1355   // Pseudo-instruction over VMand_mm
1356   void VMmv_m(VRegister vd, VRegister vs2);
1357 
1358   // Vector vmor instructions, funct6 = 0b011010
1359   void VMor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1360 
1361   // Vector vmxor instructions, funct6 = 0b011011
1362   void VMxor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1363 
1364   // Pseudo-instruction over VMxor_mm
1365   void VMclr_m(VRegister vd);
1366 
1367   // Vector vmorn instructions, funct6 = 0b011100
1368   void VMorn_mm(VRegister vd, VRegister vs2, VRegister vs1);
1369 
1370   // Vector vmnand instructions, funct6 = 0b011101
1371   void VMnand_mm(VRegister vd, VRegister vs2, VRegister vs1);
1372 
1373   // Pseudo-instruction over VMnand_mm
1374   void VMnot_m(VRegister vd, VRegister vs2);
1375 
1376   // Vector vmnor instructions, funct6 = 0b011110
1377   void VMnor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1378 
1379   // Vector vmxnor instructions, funct6 = 0b011111
1380   void VMxnor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1381 
1382   // Pseudo-instruction over VMxnor_mm
1383   void VMset_m(VRegister vd);
1384 
1385   // Vector vdivu instructions, funct6 = 0b100000
1386   void VDivu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1387   void VDivu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1388 
1389   // Vector vdiv instructions, funct6 = 0b100001
1390   void VDiv_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1391   void VDiv_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1392 
1393   // Vector vremu instructions, funct6 = 0b100010
1394   void VRemu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1395   void VRemu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1396 
1397   // Vector vrem instructions, funct6 = 0b100011
1398   void VRem_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1399   void VRem_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1400 
1401   // Vector vmulhu instructions, funct6 = 0b100100
1402   void VMulhu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1403   void VMulhu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1404 
1405   // Vector vmul instructions, funct6 = 0b100101
1406   void VMul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1407   void VMul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1408 
1409   // Vector vmulhsu instructions, funct6 = 0b100110
1410   void VMulhsu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1411   void VMulhsu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1412 
1413   // Vector vmulh instructions, funct6 = 0b100111
1414   void VMulh_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1415   void VMulh_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1416 
1417   // Vector vmadd instructions, funct6 = 0b101001
1418   void VMadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1419   void VMadd_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1420 
1421   // Vector vnmsub instructions, funct6 = 0b101011
1422   void VNmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1423   void VNmsub_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1424 
1425   // Vector vmacc instructions, funct6 = 0b101101
1426   void VMacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1427   void VMacc_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1428 
1429   // Vector vnmsac instructions, funct6 = 0b101111
1430   void VNmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1431   void VNmsac_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1432 
1433   // Vector vwaddu instructions, funct6 = 0b110000
1434   void VWaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1435   void VWaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1436 
1437   // Pseudo-instruction over VWaddu_vx
1438   void VWcvtu_x_x_v(VRegister vd, VRegister vs, VM vm = VM::kUnmasked);
1439 
1440   // Vector vwadd instructions, funct6 = 0b110001
1441   void VWadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1442   void VWadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1443 
1444   // Pseudo-instruction over VWadd_vx
1445   void VWcvt_x_x_v(VRegister vd, VRegister vs, VM vm = VM::kUnmasked);
1446 
1447   // Vector vwsubu instructions, funct6 = 0b110010
1448   void VWsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1449   void VWsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1450 
1451   // Vector vwsub instructions, funct6 = 0b110011
1452   void VWsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1453   void VWsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1454 
1455   // Vector vwaddu.w instructions, funct6 = 0b110100
1456   void VWaddu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1457   void VWaddu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1458 
1459   // Vector vwadd.w instructions, funct6 = 0b110101
1460   void VWadd_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1461   void VWadd_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1462 
1463   // Vector vwsubu.w instructions, funct6 = 0b110110
1464   void VWsubu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1465   void VWsubu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1466 
1467   // Vector vwsub.w instructions, funct6 = 0b110111
1468   void VWsub_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1469   void VWsub_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1470 
1471   // Vector vwmulu instructions, funct6 = 0b111000
1472   void VWmulu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1473   void VWmulu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1474 
1475   // Vector vwmulsu instructions, funct6 = 0b111010
1476   void VWmulsu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1477   void VWmulsu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1478 
1479   // Vector vwmul instructions, funct6 = 0b111011
1480   void VWmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1481   void VWmul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1482 
1483   // Vector vwmaccu instructions, funct6 = 0b111100
1484   void VWmaccu_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1485   void VWmaccu_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1486 
1487   // Vector vwmacc instructions, funct6 = 0b111101
1488   void VWmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1489   void VWmacc_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1490 
1491   // Vector vwmaccus instructions, funct6 = 0b111110
1492   void VWmaccus_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1493 
1494   // Vector vwmaccsu instructions, funct6 = 0b111111
1495   void VWmaccsu_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1496   void VWmaccsu_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1497 
1498   // Vector vfadd instructions, funct6 = 0b000000
1499   void VFadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1500   void VFadd_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1501 
1502   // Vector vfredusum instructions, funct6 = 0b000001
1503   void VFredusum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1504 
1505   // Vector vfsub instructions, funct6 = 0b000010
1506   void VFsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1507   void VFsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1508 
1509   // Vector vfredosum instructions, funct6 = 0b000011
1510   void VFredosum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1511 
1512   // Vector vfmin instructions, funct6 = 0b000100
1513   void VFmin_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1514   void VFmin_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1515 
1516   // Vector vfredmin instructions, funct6 = 0b000101
1517   void VFredmin_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1518 
1519   // Vector vfmax instructions, funct6 = 0b000110
1520   void VFmax_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1521   void VFmax_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1522 
1523   // Vector vfredmax instructions, funct6 = 0b000111
1524   void VFredmax_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1525 
1526   // Vector vfsgnj instructions, funct6 = 0b001000
1527   void VFsgnj_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1528   void VFsgnj_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1529 
1530   // Vector vfsgnjn instructions, funct6 = 0b001001
1531   void VFsgnjn_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1532   void VFsgnjn_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1533 
1534   // Pseudo-instruction over VFsgnjn_vv
1535   void VFneg_v(VRegister vd, VRegister vs);
1536 
1537   // Vector vfsgnjx instructions, funct6 = 0b001010
1538   void VFsgnjx_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1539   void VFsgnjx_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1540 
1541   // Pseudo-instruction over VFsgnjx_vv
1542   void VFabs_v(VRegister vd, VRegister vs);
1543 
1544   // Vector vfslide1up instructions, funct6 = 0b001110
1545   void VFslide1up_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1546 
1547   // Vector vfslide1down instructions, funct6 = 0b001111
1548   void VFslide1down_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1549 
1550   // Vector vfmerge/vfmv instructions, funct6 = 0b010111
1551   void VFmerge_vfm(VRegister vd, VRegister vs2, FRegister fs1);
1552   void VFmv_v_f(VRegister vd, FRegister fs1);
1553 
1554   // Vector vmfeq instructions, funct6 = 0b011000
1555   void VMfeq_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1556   void VMfeq_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1557 
1558   // Vector vmfle instructions, funct6 = 0b011001
1559   void VMfle_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1560   void VMfle_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1561 
1562   // Pseudo-instruction over VMfle_vv
1563   void VMfge_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1564 
1565   // Vector vmflt instructions, funct6 = 0b011011
1566   void VMflt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1567   void VMflt_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1568 
1569   // Pseudo-instruction over VMflt_vv
1570   void VMfgt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1571 
1572   // Vector vmfne instructions, funct6 = 0b011100
1573   void VMfne_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1574   void VMfne_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1575 
1576   // Vector vmfgt instructions, funct6 = 0b011101
1577   void VMfgt_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1578 
1579   // Vector vmfge instructions, funct6 = 0b011111
1580   void VMfge_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1581 
1582   // Vector vfdiv instructions, funct6 = 0b100000
1583   void VFdiv_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1584   void VFdiv_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1585 
1586   // Vector vfrdiv instructions, funct6 = 0b100001
1587   void VFrdiv_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1588 
1589   // Vector vfmul instructions, funct6 = 0b100100
1590   void VFmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1591   void VFmul_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1592 
1593   // Vector vfrsub instructions, funct6 = 0b100111
1594   void VFrsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1595 
1596   // Vector vfmadd instructions, funct6 = 0b101000
1597   void VFmadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1598   void VFmadd_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1599 
1600   // Vector vfnmadd instructions, funct6 = 0b101001
1601   void VFnmadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1602   void VFnmadd_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1603 
1604   // Vector vfmsub instructions, funct6 = 0b101010
1605   void VFmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1606   void VFmsub_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1607 
1608   // Vector vfnmsub instructions, funct6 = 0b101011
1609   void VFnmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1610   void VFnmsub_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1611 
1612   // Vector vfmacc instructions, funct6 = 0b101100
1613   void VFmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1614   void VFmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1615 
1616   // Vector vfnmacc instructions, funct6 = 0b101101
1617   void VFnmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1618   void VFnmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1619 
1620   // Vector vfmsac instructions, funct6 = 0b101110
1621   void VFmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1622   void VFmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1623 
1624   // Vector vfnmsac instructions, funct6 = 0b101111
1625   void VFnmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1626   void VFnmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1627 
1628   // Vector vfwadd instructions, funct6 = 0b110000
1629   void VFwadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1630   void VFwadd_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1631 
1632   // Vector vfwredusum instructions, funct6 = 0b110001
1633   void VFwredusum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1634 
1635   // Vector vfwsub instructions, funct6 = 0b110010
1636   void VFwsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1637   void VFwsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1638 
1639   // Vector vfwredosum instructions, funct6 = 0b110011
1640   void VFwredosum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1641 
1642   // Vector vfwadd.w instructions, funct6 = 0b110100
1643   void VFwadd_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1644   void VFwadd_wf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1645 
1646   // Vector vfwsub.w instructions, funct6 = 0b110110
1647   void VFwsub_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1648   void VFwsub_wf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1649 
1650   // Vector vfwmul instructions, funct6 = 0b111000
1651   void VFwmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1652   void VFwmul_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1653 
1654   // Vector vfwmacc instructions, funct6 = 0b111100
1655   void VFwmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1656   void VFwmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1657 
1658   // Vector vfwnmacc instructions, funct6 = 0b111101
1659   void VFwnmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1660   void VFwnmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1661 
1662   // Vector vfwmsac instructions, funct6 = 0b111110
1663   void VFwmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1664   void VFwmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1665 
1666   // Vector vfwnmsac instructions, funct6 = 0b111111
1667   void VFwnmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1668   void VFwnmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1669 
1670   // Vector VRXUNARY0 kind instructions, funct6 = 0b010000
1671   void VMv_s_x(VRegister vd, XRegister rs1);
1672 
1673   // Vector VWXUNARY0 kind instructions, funct6 = 0b010000
1674   void VMv_x_s(XRegister rd, VRegister vs2);
1675   void VCpop_m(XRegister rd, VRegister vs2, VM vm = VM::kUnmasked);
1676   void VFirst_m(XRegister rd, VRegister vs2, VM vm = VM::kUnmasked);
1677 
1678   // Vector VXUNARY0 kind instructions, funct6 = 0b010010
1679   void VZext_vf8(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1680   void VSext_vf8(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1681   void VZext_vf4(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1682   void VSext_vf4(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1683   void VZext_vf2(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1684   void VSext_vf2(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1685 
1686   // Vector VRFUNARY0 kind instructions, funct6 = 0b010000
1687   void VFmv_s_f(VRegister vd, FRegister fs1);
1688 
1689   // Vector VWFUNARY0 kind instructions, funct6 = 0b010000
1690   void VFmv_f_s(FRegister fd, VRegister vs2);
1691 
1692   // Vector VFUNARY0 kind instructions, funct6 = 0b010010
1693   void VFcvt_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1694   void VFcvt_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1695   void VFcvt_f_xu_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1696   void VFcvt_f_x_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1697   void VFcvt_rtz_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1698   void VFcvt_rtz_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1699   void VFwcvt_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1700   void VFwcvt_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1701   void VFwcvt_f_xu_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1702   void VFwcvt_f_x_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1703   void VFwcvt_f_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1704   void VFwcvt_rtz_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1705   void VFwcvt_rtz_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1706   void VFncvt_xu_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1707   void VFncvt_x_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1708   void VFncvt_f_xu_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1709   void VFncvt_f_x_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1710   void VFncvt_f_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1711   void VFncvt_rod_f_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1712   void VFncvt_rtz_xu_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1713   void VFncvt_rtz_x_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1714 
1715   // Vector VFUNARY1 kind instructions, funct6 = 0b010011
1716   void VFsqrt_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1717   void VFrsqrt7_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1718   void VFrec7_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1719   void VFclass_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1720 
1721   // Vector VMUNARY0 kind instructions, funct6 = 0b010100
1722   void VMsbf_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1723   void VMsof_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1724   void VMsif_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1725   void VIota_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1726   void VId_v(VRegister vd, VM vm = VM::kUnmasked);
1727 
1728   ////////////////////////////// RISC-V Vector Instructions  END //////////////////////////////
1729 
1730   ////////////////////////////// RV64 MACRO Instructions  START ///////////////////////////////
1731   // These pseudo instructions are from "RISC-V Assembly Programmer's Manual".
1732 
1733   void Nop();
1734   void Li(XRegister rd, int64_t imm);
1735   void Mv(XRegister rd, XRegister rs);
1736   void Not(XRegister rd, XRegister rs);
1737   void Neg(XRegister rd, XRegister rs);
1738   void NegW(XRegister rd, XRegister rs);
1739   void SextB(XRegister rd, XRegister rs);
1740   void SextH(XRegister rd, XRegister rs);
1741   void SextW(XRegister rd, XRegister rs);
1742   void ZextB(XRegister rd, XRegister rs);
1743   void ZextH(XRegister rd, XRegister rs);
1744   void ZextW(XRegister rd, XRegister rs);
1745   void Seqz(XRegister rd, XRegister rs);
1746   void Snez(XRegister rd, XRegister rs);
1747   void Sltz(XRegister rd, XRegister rs);
1748   void Sgtz(XRegister rd, XRegister rs);
1749   void FMvS(FRegister rd, FRegister rs);
1750   void FAbsS(FRegister rd, FRegister rs);
1751   void FNegS(FRegister rd, FRegister rs);
1752   void FMvD(FRegister rd, FRegister rs);
1753   void FAbsD(FRegister rd, FRegister rs);
1754   void FNegD(FRegister rd, FRegister rs);
1755 
1756   // Branch pseudo instructions
1757   void Beqz(XRegister rs, int32_t offset);
1758   void Bnez(XRegister rs, int32_t offset);
1759   void Blez(XRegister rs, int32_t offset);
1760   void Bgez(XRegister rs, int32_t offset);
1761   void Bltz(XRegister rs, int32_t offset);
1762   void Bgtz(XRegister rs, int32_t offset);
1763   void Bgt(XRegister rs, XRegister rt, int32_t offset);
1764   void Ble(XRegister rs, XRegister rt, int32_t offset);
1765   void Bgtu(XRegister rs, XRegister rt, int32_t offset);
1766   void Bleu(XRegister rs, XRegister rt, int32_t offset);
1767 
1768   // Jump pseudo instructions
1769   void J(int32_t offset);
1770   void Jal(int32_t offset);
1771   void Jr(XRegister rs);
1772   void Jalr(XRegister rs);
1773   void Jalr(XRegister rd, XRegister rs);
1774   void Ret();
1775 
1776   // Pseudo instructions for accessing control and status registers
1777   void RdCycle(XRegister rd);
1778   void RdTime(XRegister rd);
1779   void RdInstret(XRegister rd);
1780   void Csrr(XRegister rd, uint32_t csr);
1781   void Csrw(uint32_t csr, XRegister rs);
1782   void Csrs(uint32_t csr, XRegister rs);
1783   void Csrc(uint32_t csr, XRegister rs);
1784   void Csrwi(uint32_t csr, uint32_t uimm5);
1785   void Csrsi(uint32_t csr, uint32_t uimm5);
1786   void Csrci(uint32_t csr, uint32_t uimm5);
1787 
1788   // Load/store macros for arbitrary 32-bit offsets.
1789   void Loadb(XRegister rd, XRegister rs1, int32_t offset);
1790   void Loadh(XRegister rd, XRegister rs1, int32_t offset);
1791   void Loadw(XRegister rd, XRegister rs1, int32_t offset);
1792   void Loadd(XRegister rd, XRegister rs1, int32_t offset);
1793   void Loadbu(XRegister rd, XRegister rs1, int32_t offset);
1794   void Loadhu(XRegister rd, XRegister rs1, int32_t offset);
1795   void Loadwu(XRegister rd, XRegister rs1, int32_t offset);
1796   void Storeb(XRegister rs2, XRegister rs1, int32_t offset);
1797   void Storeh(XRegister rs2, XRegister rs1, int32_t offset);
1798   void Storew(XRegister rs2, XRegister rs1, int32_t offset);
1799   void Stored(XRegister rs2, XRegister rs1, int32_t offset);
1800   void FLoadw(FRegister rd, XRegister rs1, int32_t offset);
1801   void FLoadd(FRegister rd, XRegister rs1, int32_t offset);
1802   void FStorew(FRegister rs2, XRegister rs1, int32_t offset);
1803   void FStored(FRegister rs2, XRegister rs1, int32_t offset);
1804 
1805   // Macros for loading constants.
1806   void LoadConst32(XRegister rd, int32_t value);
1807   void LoadConst64(XRegister rd, int64_t value);
1808 
1809   // Macros for adding constants.
1810   void AddConst32(XRegister rd, XRegister rs1, int32_t value);
1811   void AddConst64(XRegister rd, XRegister rs1, int64_t value);
1812 
1813   // Jumps and branches to a label.
1814   void Beqz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1815   void Bnez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1816   void Blez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1817   void Bgez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1818   void Bltz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1819   void Bgtz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1820   void Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1821   void Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1822   void Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1823   void Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1824   void Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1825   void Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1826   void Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1827   void Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1828   void Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1829   void Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1830   void Jal(XRegister rd, Riscv64Label* label, bool is_bare = false);
1831   void J(Riscv64Label* label, bool is_bare = false);
1832   void Jal(Riscv64Label* label, bool is_bare = false);
1833 
1834   // Literal load.
1835   void Loadw(XRegister rd, Literal* literal);
1836   void Loadwu(XRegister rd, Literal* literal);
1837   void Loadd(XRegister rd, Literal* literal);
1838   void FLoadw(FRegister rd, Literal* literal);
1839   void FLoadd(FRegister rd, Literal* literal);
1840 
1841   // Illegal instruction that triggers SIGILL.
1842   void Unimp();
1843 
1844   /////////////////////////////// RV64 MACRO Instructions END ///////////////////////////////
1845 
Bind(Label * label)1846   void Bind(Label* label) override { Bind(down_cast<Riscv64Label*>(label)); }
1847 
Jump(Label * label)1848   void Jump([[maybe_unused]] Label* label) override {
1849     UNIMPLEMENTED(FATAL) << "Do not use Jump for RISCV64";
1850   }
1851 
Jump(Riscv64Label * label)1852   void Jump(Riscv64Label* label) {
1853     J(label);
1854   }
1855 
1856   void Bind(Riscv64Label* label);
1857 
1858   // Load label address using PC-relative loads.
1859   void LoadLabelAddress(XRegister rd, Riscv64Label* label);
1860 
1861   // Create a new literal with a given value.
1862   // NOTE:Use `Identity<>` to force the template parameter to be explicitly specified.
1863   template <typename T>
NewLiteral(typename Identity<T>::type value)1864   Literal* NewLiteral(typename Identity<T>::type value) {
1865     static_assert(std::is_integral<T>::value, "T must be an integral type.");
1866     return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
1867   }
1868 
1869   // Create a new literal with the given data.
1870   Literal* NewLiteral(size_t size, const uint8_t* data);
1871 
1872   // Create a jump table for the given labels that will be emitted when finalizing.
1873   // When the table is emitted, offsets will be relative to the location of the table.
1874   // The table location is determined by the location of its label (the label precedes
1875   // the table data) and should be loaded using LoadLabelAddress().
1876   JumpTable* CreateJumpTable(ArenaVector<Riscv64Label*>&& labels);
1877 
1878  public:
1879   // Emit slow paths queued during assembly, promote short branches to long if needed,
1880   // and emit branches.
1881   void FinalizeCode() override;
1882 
1883   template <typename Reg>
IsShortReg(Reg reg)1884   static inline bool IsShortReg(Reg reg) {
1885     static_assert(std::is_same_v<Reg, XRegister> || std::is_same_v<Reg, FRegister>);
1886     uint32_t uv = enum_cast<uint32_t>(reg) - 8u;
1887     return IsUint<3>(uv);
1888   }
1889 
1890   // Returns the current location of a label.
1891   //
1892   // This function must be used instead of `Riscv64Label::GetPosition()`
1893   // which returns assembler's internal data instead of an actual location.
1894   //
1895   // The location can change during branch fixup in `FinalizeCode()`. Before that,
1896   // the location is not final and therefore not very useful to external users,
1897   // so they should preferably retrieve the location only after `FinalizeCode()`.
1898   uint32_t GetLabelLocation(const Riscv64Label* label) const;
1899 
1900   // Get the final position of a label after local fixup based on the old position
1901   // recorded before FinalizeCode().
1902   uint32_t GetAdjustedPosition(uint32_t old_position);
1903 
1904  private:
ConvertExtensions(const Riscv64InstructionSetFeatures * instruction_set_features)1905   static uint32_t ConvertExtensions(
1906       const Riscv64InstructionSetFeatures* instruction_set_features) {
1907     // The `Riscv64InstructionSetFeatures` currently does not support "Zcb",
1908     // only the original "C" extension. For riscv64 that means "Zca" and "Zcd".
1909     constexpr Riscv64ExtensionMask kCompressedExtensionsMask =
1910         Riscv64ExtensionBit(Riscv64Extension::kZca) | Riscv64ExtensionBit(Riscv64Extension::kZcd);
1911     return
1912         (Riscv64ExtensionBit(Riscv64Extension::kLoadStore)) |
1913         (Riscv64ExtensionBit(Riscv64Extension::kZifencei)) |
1914         (Riscv64ExtensionBit(Riscv64Extension::kM)) |
1915         (Riscv64ExtensionBit(Riscv64Extension::kA)) |
1916         (Riscv64ExtensionBit(Riscv64Extension::kZicsr)) |
1917         (Riscv64ExtensionBit(Riscv64Extension::kF)) |
1918         (Riscv64ExtensionBit(Riscv64Extension::kD)) |
1919         (instruction_set_features->HasZba() ? Riscv64ExtensionBit(Riscv64Extension::kZba) : 0u) |
1920         (instruction_set_features->HasZbb() ? Riscv64ExtensionBit(Riscv64Extension::kZbb) : 0u) |
1921         (instruction_set_features->HasZbs() ? Riscv64ExtensionBit(Riscv64Extension::kZbs) : 0u) |
1922         (instruction_set_features->HasVector() ? Riscv64ExtensionBit(Riscv64Extension::kV) : 0u) |
1923         (instruction_set_features->HasCompressed() ? kCompressedExtensionsMask : 0u);
1924   }
1925 
AssertExtensionsEnabled(Riscv64Extension ext)1926   void AssertExtensionsEnabled(Riscv64Extension ext) {
1927     DCHECK(IsExtensionEnabled(ext))
1928         << "ext=" << enum_cast<>(ext) << " enabled=0x" << std::hex << enabled_extensions_;
1929   }
1930 
1931   template <typename... OtherExt>
AssertExtensionsEnabled(Riscv64Extension ext,OtherExt...other_ext)1932   void AssertExtensionsEnabled(Riscv64Extension ext, OtherExt... other_ext) {
1933     AssertExtensionsEnabled(ext);
1934     AssertExtensionsEnabled(other_ext...);
1935   }
1936 
1937   enum BranchCondition : uint8_t {
1938     kCondEQ,
1939     kCondNE,
1940     kCondLT,
1941     kCondGE,
1942     kCondLE,
1943     kCondGT,
1944     kCondLTU,
1945     kCondGEU,
1946     kCondLEU,
1947     kCondGTU,
1948     kUncond,
1949   };
1950 
1951   // Note that PC-relative literal loads are handled as pseudo branches because they need
1952   // to be emitted after branch relocation to use correct offsets.
1953   class Branch {
1954    public:
1955     enum Type : uint8_t {
1956       // Compressed branches (can be promoted to longer)
1957       kCondCBranch,
1958       kUncondCBranch,
1959       // Compressed branches (can't be promoted to longer)
1960       kBareCondCBranch,
1961       kBareUncondCBranch,
1962 
1963       // Short branches (can be promoted to longer).
1964       kCondBranch,
1965       kUncondBranch,
1966       kCall,
1967       // Short branches (can't be promoted to longer).
1968       kBareCondBranch,
1969       kBareUncondBranch,
1970       kBareCall,
1971 
1972       // Medium branches (can be promoted to long).
1973       // Compressed version
1974       kCondCBranch21,
1975       kCondBranch21,
1976 
1977       // Long branches.
1978       kLongCondCBranch,
1979       kLongCondBranch,
1980       kLongUncondBranch,
1981       kLongCall,
1982 
1983       // Label.
1984       kLabel,
1985 
1986       // Literals.
1987       kLiteral,
1988       kLiteralUnsigned,
1989       kLiteralLong,
1990       kLiteralFloat,
1991       kLiteralDouble,
1992     };
1993 
1994     // Bit sizes of offsets defined as enums to minimize chance of typos.
1995     enum OffsetBits {
1996       kOffset9 = 9,
1997       kOffset12 = 12,
1998       kOffset13 = 13,
1999       kOffset21 = 21,
2000       kOffset32 = 32,
2001     };
2002 
2003     static constexpr uint32_t kUnresolved = 0xffffffff;  // Unresolved target_
2004     static constexpr uint32_t kMaxBranchLength = 12;  // In bytes.
2005 
2006     struct BranchInfo {
2007       // Branch length in bytes.
2008       uint32_t length;
2009       // The offset in bytes of the PC used in the (only) PC-relative instruction from
2010       // the start of the branch sequence. RISC-V always uses the address of the PC-relative
2011       // instruction as the PC, so this is essentially the offset of that instruction.
2012       uint32_t pc_offset;
2013       // How large (in bits) a PC-relative offset can be for a given type of branch.
2014       OffsetBits offset_size;
2015     };
2016     static const BranchInfo branch_info_[/* Type */];
2017 
2018     // Unconditional branch or call.
2019     Branch(
2020         uint32_t location, uint32_t target, XRegister rd, bool is_bare, bool compression_allowed);
2021     // Conditional branch.
2022     Branch(uint32_t location,
2023            uint32_t target,
2024            BranchCondition condition,
2025            XRegister lhs_reg,
2026            XRegister rhs_reg,
2027            bool is_bare,
2028            bool compression_allowed);
2029     // Label address or literal.
2030     Branch(uint32_t location, uint32_t target, XRegister rd, Type label_or_literal_type);
2031     Branch(uint32_t location, uint32_t target, FRegister rd, Type literal_type);
2032 
2033     // Some conditional branches with lhs = rhs are effectively NOPs, while some
2034     // others are effectively unconditional.
2035     static bool IsNop(BranchCondition condition, XRegister lhs, XRegister rhs);
2036     static bool IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs);
2037     static bool IsCompressed(Type type);
2038 
2039     static BranchCondition OppositeCondition(BranchCondition cond);
2040 
2041     Type GetType() const;
2042     Type GetOldType() const;
2043     BranchCondition GetCondition() const;
2044     XRegister GetLeftRegister() const;
2045     XRegister GetRightRegister() const;
2046     XRegister GetNonZeroRegister() const;
2047     FRegister GetFRegister() const;
2048     uint32_t GetTarget() const;
2049     uint32_t GetLocation() const;
2050     uint32_t GetOldLocation() const;
2051     uint32_t GetLength() const;
2052     uint32_t GetOldLength() const;
2053     uint32_t GetEndLocation() const;
2054     uint32_t GetOldEndLocation() const;
2055     bool IsBare() const;
2056     bool IsResolved() const;
2057 
2058     uint32_t NextBranchId() const;
2059 
2060     // Checks if condition meets compression requirements
2061     bool IsCompressableCondition() const;
2062 
2063     // Returns the bit size of the signed offset that the branch instruction can handle.
2064     OffsetBits GetOffsetSize() const;
2065 
2066     // Calculates the distance between two byte locations in the assembler buffer and
2067     // returns the number of bits needed to represent the distance as a signed integer.
2068     static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
2069 
2070     // Resolve a branch when the target is known.
2071     void Resolve(uint32_t target);
2072 
2073     // Relocate a branch by a given delta if needed due to expansion of this or another
2074     // branch at a given location by this delta (just changes location_ and target_).
2075     void Relocate(uint32_t expand_location, uint32_t delta);
2076 
2077     // If necessary, updates the type by promoting a short branch to a longer branch
2078     // based on the branch location and target. Returns the amount (in bytes) by
2079     // which the branch size has increased.
2080     uint32_t PromoteIfNeeded();
2081 
2082     // Returns the offset into assembler buffer that shall be used as the base PC for
2083     // offset calculation. RISC-V always uses the address of the PC-relative instruction
2084     // as the PC, so this is essentially the location of that instruction.
2085     uint32_t GetOffsetLocation() const;
2086 
2087     // Calculates and returns the offset ready for encoding in the branch instruction(s).
2088     int32_t GetOffset() const;
2089 
2090     // Link with the next branch
2091     void LinkToList(uint32_t next_branch_id);
2092 
2093    private:
2094     // Completes branch construction by determining and recording its type.
2095     void InitializeType(Type initial_type);
2096     // Helper for the above.
2097     void InitShortOrLong(OffsetBits ofs_size, std::initializer_list<Type> types);
2098 
2099     uint32_t old_location_;  // Offset into assembler buffer in bytes.
2100     uint32_t location_;      // Offset into assembler buffer in bytes.
2101     uint32_t target_;        // Offset into assembler buffer in bytes.
2102 
2103     XRegister lhs_reg_;          // Left-hand side register in conditional branches or
2104                                  // destination register in calls or literals.
2105     XRegister rhs_reg_;          // Right-hand side register in conditional branches.
2106     FRegister freg_;             // Destination register in FP literals.
2107     BranchCondition condition_;  // Condition for conditional branches.
2108 
2109     Type type_;      // Current type of the branch.
2110     Type old_type_;  // Initial type of the branch.
2111 
2112     bool compression_allowed_;
2113 
2114     // Id of the next branch bound to the same label in singly-linked zero-terminated list
2115     // NOTE: encoded the same way as a position in a linked Label (id + sizeof(void*))
2116     // Label itself is used to hold the 'head' of this list
2117     uint32_t next_branch_id_;
2118   };
2119 
2120   // Branch and literal fixup.
2121 
2122   void EmitBcond(BranchCondition cond, XRegister rs, XRegister rt, int32_t offset);
2123   void EmitBranch(Branch* branch);
2124   void EmitBranches();
2125   void EmitJumpTables();
2126   void EmitLiterals();
2127 
2128   void FinalizeLabeledBranch(Riscv64Label* label);
2129   void Bcond(Riscv64Label* label,
2130              bool is_bare,
2131              BranchCondition condition,
2132              XRegister lhs,
2133              XRegister rhs);
2134   void Buncond(Riscv64Label* label, XRegister rd, bool is_bare);
2135   template <typename XRegisterOrFRegister>
2136   void LoadLiteral(Literal* literal, XRegisterOrFRegister rd, Branch::Type literal_type);
2137 
2138   Branch* GetBranch(uint32_t branch_id);
2139   const Branch* GetBranch(uint32_t branch_id) const;
2140 
2141   void ReserveJumpTableSpace();
2142   void PromoteBranches();
2143   void PatchCFI();
2144 
2145   // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
2146   template <typename T>
Emit(T value)2147   void Emit(T value) {
2148     static_assert(std::is_same_v<T, uint32_t> || std::is_same_v<T, uint16_t>,
2149                   "Only Integer types are allowed");
2150     if (overwriting_) {
2151       // Branches to labels are emitted into their placeholders here.
2152       buffer_.Store<T>(overwrite_location_, value);
2153       overwrite_location_ += sizeof(T);
2154     } else {
2155       // Other instructions are simply appended at the end here.
2156       AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2157       buffer_.Emit<T>(value);
2158     }
2159   }
2160 
Emit16(uint32_t value)2161   void Emit16(uint32_t value) { Emit(dchecked_integral_cast<uint16_t>(value)); }
Emit32(uint32_t value)2162   void Emit32(uint32_t value) { Emit(value); }
2163 
2164   // Adjust base register and offset if needed for load/store with a large offset.
2165   void AdjustBaseAndOffset(XRegister& base, int32_t& offset, ScratchRegisterScope& srs);
2166 
2167   // Helper templates for loads/stores with 32-bit offsets.
2168   template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
2169   void LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset);
2170   template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
2171   void StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset);
2172   template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
2173   void FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset);
2174   template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
2175   void FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset);
2176 
2177   // Implementation helper for `Li()`, `LoadConst32()` and `LoadConst64()`.
2178   void LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp);
2179 
2180   // RVV constants and helpers
2181 
2182   enum class Nf : uint32_t {
2183     k1 = 0b000,
2184     k2 = 0b001,
2185     k3 = 0b010,
2186     k4 = 0b011,
2187     k5 = 0b100,
2188     k6 = 0b101,
2189     k7 = 0b110,
2190     k8 = 0b111,
2191   };
2192 
2193   enum class VAIEncoding : uint32_t {
2194                      // ----Operands---- | Type of Scalar                | Instruction type
2195     kOPIVV = 0b000,  // vector-vector    | --                            | R-type
2196     kOPFVV = 0b001,  // vector-vector    | --                            | R-type
2197     kOPMVV = 0b010,  // vector-vector    | --                            | R-type
2198     kOPIVI = 0b011,  // vector-immediate | imm[4:0]                      | R-type
2199     kOPIVX = 0b100,  // vector-scalar    | GPR x register rs1            | R-type
2200     kOPFVF = 0b101,  // vector-scalar    | FP f register rs1             | R-type
2201     kOPMVX = 0b110,  // vector-scalar    | GPR x register rs1            | R-type
2202     kOPCFG = 0b111,  // scalars-imms     | GPR x register rs1 & rs2/imm  | R/I-type
2203   };
2204 
2205   enum class MemAddressMode : uint32_t {
2206     kUnitStride = 0b00,
2207     kIndexedUnordered = 0b01,
2208     kStrided = 0b10,
2209     kIndexedOrdered = 0b11,
2210   };
2211 
2212   enum class VectorWidth : uint32_t {
2213     k8 = 0b000,
2214     k16 = 0b101,
2215     k32 = 0b110,
2216     k64 = 0b111,
2217 
2218     kMask = 0b000,
2219     kWholeR = 0b000,
2220   };
2221 
EncodeRVVMemF7(const Nf nf,const uint32_t mew,const MemAddressMode mop,const VM vm)2222   static constexpr uint32_t EncodeRVVMemF7(const Nf nf,
2223                                            const uint32_t mew,
2224                                            const MemAddressMode mop,
2225                                            const VM vm) {
2226     DCHECK(IsUint<3>(enum_cast<uint32_t>(nf)));
2227     DCHECK(IsUint<1>(mew));
2228     DCHECK(IsUint<2>(enum_cast<uint32_t>(mop)));
2229     DCHECK(IsUint<1>(enum_cast<uint32_t>(vm)));
2230 
2231     return enum_cast<uint32_t>(nf) << 4 | mew << 3 | enum_cast<uint32_t>(mop) << 1 |
2232            enum_cast<uint32_t>(vm);
2233   }
2234 
EncodeRVVF7(const uint32_t funct6,const VM vm)2235   static constexpr uint32_t EncodeRVVF7(const uint32_t funct6, const VM vm) {
2236     DCHECK(IsUint<6>(funct6));
2237     return funct6 << 1 | enum_cast<uint32_t>(vm);
2238   }
2239 
2240   template <unsigned kWidth>
EncodeIntWidth(const int32_t imm)2241   static constexpr uint32_t EncodeIntWidth(const int32_t imm) {
2242     DCHECK(IsInt<kWidth>(imm));
2243     return static_cast<uint32_t>(imm) & MaskLeastSignificant<uint32_t>(kWidth);
2244   }
2245 
EncodeInt5(const int32_t imm)2246   static constexpr uint32_t EncodeInt5(const int32_t imm) { return EncodeIntWidth<5>(imm); }
EncodeInt6(const int32_t imm)2247   static constexpr uint32_t EncodeInt6(const int32_t imm) { return EncodeIntWidth<6>(imm); }
2248 
2249   template <typename Reg>
EncodeShortReg(const Reg reg)2250   static constexpr uint32_t EncodeShortReg(const Reg reg) {
2251     DCHECK(IsShortReg(reg));
2252     return enum_cast<uint32_t>(reg) - 8u;
2253   }
2254 
2255   // Rearrange given offset in the way {offset[0] | offset[1]}
EncodeOffset0_1(int32_t offset)2256   static constexpr uint32_t EncodeOffset0_1(int32_t offset) {
2257     uint32_t u_offset = static_cast<uint32_t>(offset);
2258     DCHECK(IsUint<2>(u_offset));
2259 
2260     return u_offset >> 1 | (u_offset & 1u) << 1;
2261   }
2262 
2263   // Rearrange given offset, scaled by 4, in the way {offset[5:2] | offset[7:6]}
ExtractOffset52_76(int32_t offset)2264   static constexpr uint32_t ExtractOffset52_76(int32_t offset) {
2265     DCHECK(IsAligned<4>(offset)) << "Offset should be scalable by 4";
2266 
2267     uint32_t u_offset = static_cast<uint32_t>(offset);
2268     DCHECK(IsUint<6 + 2>(u_offset));
2269 
2270     uint32_t imm_52 = BitFieldExtract(u_offset, 2, 4);
2271     uint32_t imm_76 = BitFieldExtract(u_offset, 6, 2);
2272 
2273     return BitFieldInsert(imm_76, imm_52, 2, 4);
2274   }
2275 
2276   // Rearrange given offset, scaled by 8, in the way {offset[5:3] | offset[8:6]}
ExtractOffset53_86(int32_t offset)2277   static constexpr uint32_t ExtractOffset53_86(int32_t offset) {
2278     DCHECK(IsAligned<8>(offset)) << "Offset should be scalable by 8";
2279 
2280     uint32_t u_offset = static_cast<uint32_t>(offset);
2281     DCHECK(IsUint<6 + 3>(u_offset));
2282 
2283     uint32_t imm_53 = BitFieldExtract(u_offset, 3, 3);
2284     uint32_t imm_86 = BitFieldExtract(u_offset, 6, 3);
2285 
2286     return BitFieldInsert(imm_86, imm_53, 3, 3);
2287   }
2288 
2289   // Rearrange given offset, scaled by 4, in the way {offset[5:2] | offset[6]}
ExtractOffset52_6(int32_t offset)2290   static constexpr uint32_t ExtractOffset52_6(int32_t offset) {
2291     DCHECK(IsAligned<4>(offset)) << "Offset should be scalable by 4";
2292 
2293     uint32_t u_offset = static_cast<uint32_t>(offset);
2294     DCHECK(IsUint<5 + 2>(u_offset));
2295 
2296     uint32_t imm_52 = BitFieldExtract(u_offset, 2, 4);
2297     uint32_t imm_6  = BitFieldExtract(u_offset, 6, 1);
2298 
2299     return BitFieldInsert(imm_6, imm_52, 1, 4);
2300   }
2301 
2302   // Rearrange given offset, scaled by 8, in the way {offset[5:3], offset[7:6]}
ExtractOffset53_76(int32_t offset)2303   static constexpr uint32_t ExtractOffset53_76(int32_t offset) {
2304     DCHECK(IsAligned<8>(offset)) << "Offset should be scalable by 4";
2305 
2306     uint32_t u_offset = static_cast<uint32_t>(offset);
2307     DCHECK(IsUint<5 + 3>(u_offset));
2308 
2309     uint32_t imm_53 = BitFieldExtract(u_offset, 3, 3);
2310     uint32_t imm_76 = BitFieldExtract(u_offset, 6, 2);
2311 
2312     return BitFieldInsert(imm_76, imm_53, 2, 3);
2313   }
2314 
IsImmCLuiEncodable(uint32_t uimm)2315   static constexpr bool IsImmCLuiEncodable(uint32_t uimm) {
2316     // Instruction c.lui is odd and its immediate value is a bit tricky
2317     // Its value is not a full 32 bits value, but its bits [31:12]
2318     // (where the bit 17 marks the sign bit) shifted towards the bottom i.e. bits [19:0]
2319     // are the meaningful ones. Since that we want a signed non-zero 6-bit immediate to
2320     // keep values in the range [0, 0x1f], and the range [0xfffe0, 0xfffff] for negative values
2321     // since the sign bit was bit 17 (which is now bit 5 and replicated in the higher bits too)
2322     // Also encoding with immediate = 0 is reserved
2323     // For more details please see 16.5 chapter is the specification
2324 
2325     return uimm != 0u && (IsUint<5>(uimm) || IsUint<5>(uimm - 0xfffe0u));
2326   }
2327 
2328   // Emit helpers.
2329 
2330   // I-type instruction:
2331   //
2332   //    31                   20 19     15 14 12 11      7 6           0
2333   //   -----------------------------------------------------------------
2334   //   [ . . . . . . . . . . . | . . . . | . . | . . . . | . . . . . . ]
2335   //   [        imm11:0            rs1   funct3     rd        opcode   ]
2336   //   -----------------------------------------------------------------
2337   template <typename Reg1, typename Reg2>
EmitI(int32_t imm12,Reg1 rs1,uint32_t funct3,Reg2 rd,uint32_t opcode)2338   void EmitI(int32_t imm12, Reg1 rs1, uint32_t funct3, Reg2 rd, uint32_t opcode) {
2339     DCHECK(IsInt<12>(imm12)) << imm12;
2340     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2341     DCHECK(IsUint<3>(funct3));
2342     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2343     DCHECK(IsUint<7>(opcode));
2344     uint32_t encoding = static_cast<uint32_t>(imm12) << 20 | static_cast<uint32_t>(rs1) << 15 |
2345                         funct3 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
2346     Emit32(encoding);
2347   }
2348 
2349   // R-type instruction:
2350   //
2351   //    31         25 24     20 19     15 14 12 11      7 6           0
2352   //   -----------------------------------------------------------------
2353   //   [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2354   //   [   funct7        rs2       rs1   funct3     rd        opcode   ]
2355   //   -----------------------------------------------------------------
2356   template <typename Reg1, typename Reg2, typename Reg3>
EmitR(uint32_t funct7,Reg1 rs2,Reg2 rs1,uint32_t funct3,Reg3 rd,uint32_t opcode)2357   void EmitR(uint32_t funct7, Reg1 rs2, Reg2 rs1, uint32_t funct3, Reg3 rd, uint32_t opcode) {
2358     DCHECK(IsUint<7>(funct7));
2359     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2360     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2361     DCHECK(IsUint<3>(funct3));
2362     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2363     DCHECK(IsUint<7>(opcode));
2364     uint32_t encoding = funct7 << 25 | static_cast<uint32_t>(rs2) << 20 |
2365                         static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
2366                         static_cast<uint32_t>(rd) << 7 | opcode;
2367     Emit32(encoding);
2368   }
2369 
2370   // R-type instruction variant for floating-point fused multiply-add/sub (F[N]MADD/ F[N]MSUB):
2371   //
2372   //    31     27  25 24     20 19     15 14 12 11      7 6           0
2373   //   -----------------------------------------------------------------
2374   //   [ . . . . | . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2375   //   [  rs3     fmt    rs2       rs1   funct3     rd        opcode   ]
2376   //   -----------------------------------------------------------------
2377   template <typename Reg1, typename Reg2, typename Reg3, typename Reg4>
EmitR4(Reg1 rs3,uint32_t fmt,Reg2 rs2,Reg3 rs1,uint32_t funct3,Reg4 rd,uint32_t opcode)2378   void EmitR4(
2379       Reg1 rs3, uint32_t fmt, Reg2 rs2, Reg3 rs1, uint32_t funct3, Reg4 rd, uint32_t opcode) {
2380     DCHECK(IsUint<5>(static_cast<uint32_t>(rs3)));
2381     DCHECK(IsUint<2>(fmt));
2382     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2383     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2384     DCHECK(IsUint<3>(funct3));
2385     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2386     DCHECK(IsUint<7>(opcode));
2387     uint32_t encoding = static_cast<uint32_t>(rs3) << 27 | static_cast<uint32_t>(fmt) << 25 |
2388                         static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2389                         static_cast<uint32_t>(funct3) << 12 | static_cast<uint32_t>(rd) << 7 |
2390                         opcode;
2391     Emit32(encoding);
2392   }
2393 
2394   // S-type instruction:
2395   //
2396   //    31         25 24     20 19     15 14 12 11      7 6           0
2397   //   -----------------------------------------------------------------
2398   //   [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2399   //   [   imm11:5       rs2       rs1   funct3   imm4:0      opcode   ]
2400   //   -----------------------------------------------------------------
2401   template <typename Reg1, typename Reg2>
EmitS(int32_t imm12,Reg1 rs2,Reg2 rs1,uint32_t funct3,uint32_t opcode)2402   void EmitS(int32_t imm12, Reg1 rs2, Reg2 rs1, uint32_t funct3, uint32_t opcode) {
2403     DCHECK(IsInt<12>(imm12)) << imm12;
2404     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2405     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2406     DCHECK(IsUint<3>(funct3));
2407     DCHECK(IsUint<7>(opcode));
2408     uint32_t encoding = (static_cast<uint32_t>(imm12) & 0xFE0) << 20 |
2409                         static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2410                         static_cast<uint32_t>(funct3) << 12 |
2411                         (static_cast<uint32_t>(imm12) & 0x1F) << 7 | opcode;
2412     Emit32(encoding);
2413   }
2414 
2415   // I-type instruction variant for shifts (SLLI / SRLI / SRAI):
2416   //
2417   //    31       26 25       20 19     15 14 12 11      7 6           0
2418   //   -----------------------------------------------------------------
2419   //   [ . . . . . | . . . . . | . . . . | . . | . . . . | . . . . . . ]
2420   //   [  imm11:6  imm5:0(shamt)   rs1   funct3     rd        opcode   ]
2421   //   -----------------------------------------------------------------
EmitI6(uint32_t funct6,uint32_t imm6,XRegister rs1,uint32_t funct3,XRegister rd,uint32_t opcode)2422   void EmitI6(uint32_t funct6,
2423               uint32_t imm6,
2424               XRegister rs1,
2425               uint32_t funct3,
2426               XRegister rd,
2427               uint32_t opcode) {
2428     DCHECK(IsUint<6>(funct6));
2429     DCHECK(IsUint<6>(imm6)) << imm6;
2430     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2431     DCHECK(IsUint<3>(funct3));
2432     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2433     DCHECK(IsUint<7>(opcode));
2434     uint32_t encoding = funct6 << 26 | static_cast<uint32_t>(imm6) << 20 |
2435                         static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
2436                         static_cast<uint32_t>(rd) << 7 | opcode;
2437     Emit32(encoding);
2438   }
2439 
2440   // B-type instruction:
2441   //
2442   //   31 30       25 24     20 19     15 14 12 11    8 7 6           0
2443   //   -----------------------------------------------------------------
2444   //   [ | . . . . . | . . . . | . . . . | . . | . . . | | . . . . . . ]
2445   //  imm12 imm11:5      rs2       rs1   funct3 imm4:1 imm11  opcode   ]
2446   //   -----------------------------------------------------------------
EmitB(int32_t offset,XRegister rs2,XRegister rs1,uint32_t funct3,uint32_t opcode)2447   void EmitB(int32_t offset, XRegister rs2, XRegister rs1, uint32_t funct3, uint32_t opcode) {
2448     DCHECK_ALIGNED(offset, 2);
2449     DCHECK(IsInt<13>(offset)) << offset;
2450     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2451     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2452     DCHECK(IsUint<3>(funct3));
2453     DCHECK(IsUint<7>(opcode));
2454     uint32_t imm12 = (static_cast<uint32_t>(offset) >> 1) & 0xfffu;
2455     uint32_t encoding = (imm12 & 0x800u) << (31 - 11) | (imm12 & 0x03f0u) << (25 - 4) |
2456                         static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2457                         static_cast<uint32_t>(funct3) << 12 |
2458                         (imm12 & 0xfu) << 8 | (imm12 & 0x400u) >> (10 - 7) | opcode;
2459     Emit32(encoding);
2460   }
2461 
2462   // U-type instruction:
2463   //
2464   //    31                                   12 11      7 6           0
2465   //   -----------------------------------------------------------------
2466   //   [ . . . . . . . . . . . . . . . . . . . | . . . . | . . . . . . ]
2467   //   [                imm31:12                    rd        opcode   ]
2468   //   -----------------------------------------------------------------
EmitU(uint32_t imm20,XRegister rd,uint32_t opcode)2469   void EmitU(uint32_t imm20, XRegister rd, uint32_t opcode) {
2470     CHECK(IsUint<20>(imm20)) << imm20;
2471     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2472     DCHECK(IsUint<7>(opcode));
2473     uint32_t encoding = imm20 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
2474     Emit32(encoding);
2475   }
2476 
2477   // J-type instruction:
2478   //
2479   //   31 30               21   19           12 11      7 6           0
2480   //   -----------------------------------------------------------------
2481   //   [ | . . . . . . . . . | | . . . . . . . | . . . . | . . . . . . ]
2482   //  imm20    imm10:1      imm11   imm19:12        rd        opcode   ]
2483   //   -----------------------------------------------------------------
EmitJ(int32_t offset,XRegister rd,uint32_t opcode)2484   void EmitJ(int32_t offset, XRegister rd, uint32_t opcode) {
2485     DCHECK_ALIGNED(offset, 2);
2486     CHECK(IsInt<21>(offset)) << offset;
2487     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2488     DCHECK(IsUint<7>(opcode));
2489     uint32_t imm20 = (static_cast<uint32_t>(offset) >> 1) & 0xfffffu;
2490     uint32_t encoding = (imm20 & 0x80000u) << (31 - 19) | (imm20 & 0x03ffu) << 21 |
2491                         (imm20 & 0x400u) << (20 - 10) | (imm20 & 0x7f800u) << (12 - 11) |
2492                         static_cast<uint32_t>(rd) << 7 | opcode;
2493     Emit32(encoding);
2494   }
2495 
2496   // Compressed Instruction Encodings
2497 
2498   // CR-type instruction:
2499   //
2500   //   15    12 11      7 6       2 1 0
2501   //   ---------------------------------
2502   //   [ . . . | . . . . | . . . . | . ]
2503   //   [ func4   rd/rs1      rs2    op ]
2504   //   ---------------------------------
2505   //
EmitCR(uint32_t funct4,XRegister rd_rs1,XRegister rs2,uint32_t opcode)2506   void EmitCR(uint32_t funct4, XRegister rd_rs1, XRegister rs2, uint32_t opcode) {
2507     DCHECK(IsUint<4>(funct4));
2508     DCHECK(IsUint<5>(static_cast<uint32_t>(rd_rs1)));
2509     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2510     DCHECK(IsUint<2>(opcode));
2511 
2512     uint32_t encoding = funct4 << 12 | static_cast<uint32_t>(rd_rs1) << 7 |
2513                         static_cast<uint32_t>(rs2) << 2 | opcode;
2514     Emit16(encoding);
2515   }
2516 
2517   // CI-type instruction:
2518   //
2519   //   15  13   11      7 6       2 1 0
2520   //   ---------------------------------
2521   //   [ . . | | . . . . | . . . . | . ]
2522   //   [func3 imm rd/rs1     imm    op ]
2523   //   ---------------------------------
2524   //
2525   template <typename Reg>
EmitCI(uint32_t funct3,Reg rd_rs1,uint32_t imm6,uint32_t opcode)2526   void EmitCI(uint32_t funct3, Reg rd_rs1, uint32_t imm6, uint32_t opcode) {
2527     DCHECK(IsUint<3>(funct3));
2528     DCHECK(IsUint<5>(static_cast<uint32_t>(rd_rs1)));
2529     DCHECK(IsUint<6>(imm6));
2530     DCHECK(IsUint<2>(opcode));
2531 
2532     uint32_t immH1 = BitFieldExtract(imm6, 5, 1);
2533     uint32_t immL5 = BitFieldExtract(imm6, 0, 5);
2534 
2535     uint32_t encoding =
2536         funct3 << 13 | immH1 << 12 | static_cast<uint32_t>(rd_rs1) << 7 | immL5 << 2 | opcode;
2537     Emit16(encoding);
2538   }
2539 
2540   // CSS-type instruction:
2541   //
2542   //   15  13 12        7 6       2 1 0
2543   //   ---------------------------------
2544   //   [ . . | . . . . . | . . . . | . ]
2545   //   [func3     imm6      rs2     op ]
2546   //   ---------------------------------
2547   //
2548   template <typename Reg>
EmitCSS(uint32_t funct3,uint32_t offset6,Reg rs2,uint32_t opcode)2549   void EmitCSS(uint32_t funct3, uint32_t offset6, Reg rs2, uint32_t opcode) {
2550     DCHECK(IsUint<3>(funct3));
2551     DCHECK(IsUint<6>(offset6));
2552     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2553     DCHECK(IsUint<2>(opcode));
2554 
2555     uint32_t encoding = funct3 << 13 | offset6 << 7 | static_cast<uint32_t>(rs2) << 2 | opcode;
2556     Emit16(encoding);
2557   }
2558 
2559   // CIW-type instruction:
2560   //
2561   //   15  13 12            5 4   2 1 0
2562   //   ---------------------------------
2563   //   [ . . | . . . . . . . | . . | . ]
2564   //   [func3     imm8         rd'  op ]
2565   //   ---------------------------------
2566   //
EmitCIW(uint32_t funct3,uint32_t imm8,XRegister rd_s,uint32_t opcode)2567   void EmitCIW(uint32_t funct3, uint32_t imm8, XRegister rd_s, uint32_t opcode) {
2568     DCHECK(IsUint<3>(funct3));
2569     DCHECK(IsUint<8>(imm8));
2570     DCHECK(IsShortReg(rd_s)) << rd_s;
2571     DCHECK(IsUint<2>(opcode));
2572 
2573     uint32_t encoding = funct3 << 13 | imm8 << 5 | EncodeShortReg(rd_s) << 2 | opcode;
2574     Emit16(encoding);
2575   }
2576 
2577   // CL/S-type instruction:
2578   //
2579   //   15  13 12  10 9  7 6 5 4   2 1 0
2580   //   ---------------------------------
2581   //   [ . . | . . | . . | . | . . | . ]
2582   //   [func3  imm   rs1' imm rds2' op ]
2583   //   ---------------------------------
2584   //
2585   template <typename Reg>
EmitCM(uint32_t funct3,uint32_t imm5,XRegister rs1_s,Reg rd_rs2_s,uint32_t opcode)2586   void EmitCM(uint32_t funct3, uint32_t imm5, XRegister rs1_s, Reg rd_rs2_s, uint32_t opcode) {
2587     DCHECK(IsUint<3>(funct3));
2588     DCHECK(IsUint<5>(imm5));
2589     DCHECK(IsShortReg(rs1_s)) << rs1_s;
2590     DCHECK(IsShortReg(rd_rs2_s)) << rd_rs2_s;
2591     DCHECK(IsUint<2>(opcode));
2592 
2593     uint32_t immH3 = BitFieldExtract(imm5, 2, 3);
2594     uint32_t immL2 = BitFieldExtract(imm5, 0, 2);
2595 
2596     uint32_t encoding = funct3 << 13 | immH3 << 10 | EncodeShortReg(rs1_s) << 7 | immL2 << 5 |
2597                         EncodeShortReg(rd_rs2_s) << 2 | opcode;
2598     Emit16(encoding);
2599   }
2600 
2601   // CA-type instruction:
2602   //
2603   //   15         10 9  7 6 5 4   2 1 0
2604   //   ---------------------------------
2605   //   [ . . . . . | . . | . | . . | . ]
2606   //   [    funct6 rds1' funct2 rs2' op]
2607   //   ---------------------------------
2608   //
EmitCA(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,uint32_t rs2_v,uint32_t opcode)2609   void EmitCA(
2610       uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, uint32_t rs2_v, uint32_t opcode) {
2611     DCHECK(IsUint<6>(funct6));
2612     DCHECK(IsShortReg(rd_rs1_s)) << rd_rs1_s;
2613     DCHECK(IsUint<2>(funct2));
2614     DCHECK(IsUint<3>(rs2_v));
2615     DCHECK(IsUint<2>(opcode));
2616 
2617     uint32_t encoding =
2618         funct6 << 10 | EncodeShortReg(rd_rs1_s) << 7 | funct2 << 5  | rs2_v << 2 | opcode;
2619     Emit16(encoding);
2620   }
2621 
EmitCAReg(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,XRegister rs2_s,uint32_t opcode)2622   void EmitCAReg(
2623       uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, XRegister rs2_s, uint32_t opcode) {
2624     DCHECK(IsShortReg(rs2_s)) << rs2_s;
2625     EmitCA(funct6, rd_rs1_s, funct2, EncodeShortReg(rs2_s), opcode);
2626   }
2627 
EmitCAImm(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,uint32_t funct3,uint32_t opcode)2628   void EmitCAImm(
2629       uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, uint32_t funct3, uint32_t opcode) {
2630     EmitCA(funct6, rd_rs1_s, funct2, funct3, opcode);
2631   }
2632 
2633   // CB-type instruction:
2634   //
2635   //   15  13 12  10 9  7 6       2 1 0
2636   //   ---------------------------------
2637   //   [ . . | . . | . . | . . . . | . ]
2638   //   [func3 offset rs1'   offset  op ]
2639   //   ---------------------------------
2640   //
EmitCB(uint32_t funct3,int32_t offset8,XRegister rd_rs1_s,uint32_t opcode)2641   void EmitCB(uint32_t funct3, int32_t offset8, XRegister rd_rs1_s, uint32_t opcode) {
2642     DCHECK(IsUint<3>(funct3));
2643     DCHECK(IsUint<8>(offset8));
2644     DCHECK(IsShortReg(rd_rs1_s)) << rd_rs1_s;
2645     DCHECK(IsUint<2>(opcode));
2646 
2647     uint32_t offsetH3 = BitFieldExtract<uint32_t>(offset8, 5, 3);
2648     uint32_t offsetL5 = BitFieldExtract<uint32_t>(offset8, 0, 5);
2649 
2650     uint32_t encoding =
2651         funct3 << 13 | offsetH3 << 10 | EncodeShortReg(rd_rs1_s) << 7 | offsetL5 << 2 | opcode;
2652     Emit16(encoding);
2653   }
2654 
2655   // Wrappers for EmitCB with different imm bit permutation
EmitCBBranch(uint32_t funct3,int32_t offset,XRegister rs1_s,uint32_t opcode)2656   void EmitCBBranch(uint32_t funct3, int32_t offset, XRegister rs1_s, uint32_t opcode) {
2657     DCHECK(IsInt<9>(offset));
2658     DCHECK_ALIGNED(offset, 2);
2659 
2660     uint32_t u_offset = static_cast<uint32_t>(offset);
2661 
2662     // offset[8|4:3]
2663     uint32_t offsetH3 = (BitFieldExtract(u_offset, 8, 1) << 2) |
2664                          BitFieldExtract(u_offset, 3, 2);
2665     // offset[7:6|2:1|5]
2666     uint32_t offsetL5 = (BitFieldExtract(u_offset, 6, 2) << 3) |
2667                         (BitFieldExtract(u_offset, 1, 2) << 1) |
2668                          BitFieldExtract(u_offset, 5, 1);
2669 
2670     EmitCB(funct3, BitFieldInsert(offsetL5, offsetH3, 5, 3), rs1_s, opcode);
2671   }
2672 
EmitCBArithmetic(uint32_t funct3,uint32_t funct2,uint32_t imm,XRegister rd_s,uint32_t opcode)2673   void EmitCBArithmetic(
2674       uint32_t funct3, uint32_t funct2, uint32_t imm, XRegister rd_s, uint32_t opcode) {
2675     uint32_t imm_5 = BitFieldExtract(imm, 5, 1);
2676     uint32_t immH3 = BitFieldInsert(funct2, imm_5, 2, 1);
2677     uint32_t immL5 = BitFieldExtract(imm, 0, 5);
2678 
2679     EmitCB(funct3, BitFieldInsert(immL5, immH3, 5, 3), rd_s, opcode);
2680   }
2681 
2682   // CJ-type instruction:
2683   //
2684   //   15  13 12                  2 1 0
2685   //   ---------------------------------
2686   //   [ . . | . . . . . . . . . . | . ]
2687   //   [func3    jump target 11     op ]
2688   //   ---------------------------------
2689   //
EmitCJ(uint32_t funct3,int32_t offset,uint32_t opcode)2690   void EmitCJ(uint32_t funct3, int32_t offset, uint32_t opcode) {
2691     DCHECK_ALIGNED(offset, 2);
2692     DCHECK(IsInt<12>(offset)) << offset;
2693     DCHECK(IsUint<3>(funct3));
2694     DCHECK(IsUint<2>(opcode));
2695 
2696     uint32_t uoffset = static_cast<uint32_t>(offset);
2697     // offset[11|4|9:8|10|6|7|3:1|5]
2698     uint32_t jumpt = (BitFieldExtract(uoffset, 11, 1) << 10) |
2699                      (BitFieldExtract(uoffset, 4, 1) << 9)   |
2700                      (BitFieldExtract(uoffset, 8, 2) << 7)   |
2701                      (BitFieldExtract(uoffset, 10, 1) << 6)  |
2702                      (BitFieldExtract(uoffset, 6, 1) << 5)   |
2703                      (BitFieldExtract(uoffset, 7, 1) << 4)   |
2704                      (BitFieldExtract(uoffset, 1, 3) << 1)   |
2705                       BitFieldExtract(uoffset, 5, 1);
2706 
2707     DCHECK(IsUint<11>(jumpt));
2708 
2709     uint32_t encoding = funct3 << 13 | jumpt << 2 | opcode;
2710     Emit16(encoding);
2711   }
2712 
2713   ArenaVector<Branch> branches_;
2714 
2715   // For checking that we finalize the code only once.
2716   bool finalized_;
2717 
2718   // Whether appending instructions at the end of the buffer or overwriting the existing ones.
2719   bool overwriting_;
2720   // The current overwrite location.
2721   uint32_t overwrite_location_;
2722 
2723   // Use `std::deque<>` for literal labels to allow insertions at the end
2724   // without invalidating pointers and references to existing elements.
2725   ArenaDeque<Literal> literals_;
2726   ArenaDeque<Literal> long_literals_;  // 64-bit literals separated for alignment reasons.
2727 
2728   // Jump table list.
2729   ArenaDeque<JumpTable> jump_tables_;
2730 
2731   // Data for `GetAdjustedPosition()`, see the description there.
2732   uint32_t last_position_adjustment_;
2733   uint32_t last_old_position_;
2734   uint32_t last_branch_id_;
2735 
2736   Riscv64ExtensionMask enabled_extensions_;
2737   uint32_t available_scratch_core_registers_;
2738   uint32_t available_scratch_fp_registers_;
2739 
2740   static constexpr uint32_t kXlen = 64;
2741 
2742   friend class ScopedExtensionsOverride;
2743   friend class ScratchRegisterScope;
2744 
2745   DISALLOW_COPY_AND_ASSIGN(Riscv64Assembler);
2746 };
2747 
2748 class ScopedExtensionsOverride {
2749  public:
ScopedExtensionsOverride(Riscv64Assembler * assembler,Riscv64ExtensionMask enabled_extensions)2750   ScopedExtensionsOverride(Riscv64Assembler* assembler, Riscv64ExtensionMask enabled_extensions)
2751       : assembler_(assembler),
2752         old_enabled_extensions_(assembler->enabled_extensions_) {
2753     assembler->enabled_extensions_ = enabled_extensions;
2754   }
2755 
~ScopedExtensionsOverride()2756   ~ScopedExtensionsOverride() {
2757     assembler_->enabled_extensions_ = old_enabled_extensions_;
2758   }
2759 
2760  protected:
GetEnabledExtensions(Riscv64Assembler * assembler)2761   static Riscv64ExtensionMask GetEnabledExtensions(Riscv64Assembler* assembler) {
2762     return assembler->enabled_extensions_;
2763   }
2764 
2765  private:
2766   Riscv64Assembler* const assembler_;
2767   const Riscv64ExtensionMask old_enabled_extensions_;
2768 };
2769 
2770 template <Riscv64ExtensionMask kMask>
2771 class ScopedExtensionsRestriction : public ScopedExtensionsOverride {
2772  public:
ScopedExtensionsRestriction(Riscv64Assembler * assembler)2773   explicit ScopedExtensionsRestriction(Riscv64Assembler* assembler)
2774       : ScopedExtensionsOverride(assembler, GetEnabledExtensions(assembler) & kMask) {}
2775 };
2776 
2777 template <Riscv64ExtensionMask kMask>
2778 class ScopedExtensionsInclusion : public ScopedExtensionsOverride {
2779  public:
ScopedExtensionsInclusion(Riscv64Assembler * assembler)2780   explicit ScopedExtensionsInclusion(Riscv64Assembler* assembler)
2781       : ScopedExtensionsOverride(assembler, GetEnabledExtensions(assembler) | kMask) {}
2782 };
2783 
2784 template <Riscv64ExtensionMask kMask>
2785 using ScopedExtensionsExclusion = ScopedExtensionsRestriction<~kMask>;
2786 
2787 using ScopedLrScExtensionsRestriction =
2788     ScopedExtensionsRestriction<kRiscv64LrScSequenceExtensionsMask>;
2789 
2790 class ScratchRegisterScope {
2791  public:
ScratchRegisterScope(Riscv64Assembler * assembler)2792   explicit ScratchRegisterScope(Riscv64Assembler* assembler)
2793       : assembler_(assembler),
2794         old_available_scratch_core_registers_(assembler->available_scratch_core_registers_),
2795         old_available_scratch_fp_registers_(assembler->available_scratch_fp_registers_) {}
2796 
~ScratchRegisterScope()2797   ~ScratchRegisterScope() {
2798     assembler_->available_scratch_core_registers_ = old_available_scratch_core_registers_;
2799     assembler_->available_scratch_fp_registers_ = old_available_scratch_fp_registers_;
2800   }
2801 
2802   // Alocate a scratch `XRegister`. There must be an available register to allocate.
AllocateXRegister()2803   XRegister AllocateXRegister() {
2804     CHECK_NE(assembler_->available_scratch_core_registers_, 0u);
2805     // Allocate the highest available scratch register (prefer TMP(T6) over TMP2(T5)).
2806     uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_core_registers_) - 1u) -
2807                        CLZ(assembler_->available_scratch_core_registers_);
2808     assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
2809     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2810     return enum_cast<XRegister>(reg_num);
2811   }
2812 
2813   // Free a previously unavailable core register for use as a scratch register.
2814   // This can be an arbitrary register, not necessarly the usual `TMP` or `TMP2`.
FreeXRegister(XRegister reg)2815   void FreeXRegister(XRegister reg) {
2816     uint32_t reg_num = enum_cast<uint32_t>(reg);
2817     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2818     CHECK_EQ((1u << reg_num) & assembler_->available_scratch_core_registers_, 0u);
2819     assembler_->available_scratch_core_registers_ |= 1u << reg_num;
2820   }
2821 
2822   // The number of available scratch core registers.
AvailableXRegisters()2823   size_t AvailableXRegisters() {
2824     return POPCOUNT(assembler_->available_scratch_core_registers_);
2825   }
2826 
2827   // Make sure a core register is available for use as a scratch register.
IncludeXRegister(XRegister reg)2828   void IncludeXRegister(XRegister reg) {
2829     uint32_t reg_num = enum_cast<uint32_t>(reg);
2830     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2831     assembler_->available_scratch_core_registers_ |= 1u << reg_num;
2832   }
2833 
2834   // Make sure a core register is not available for use as a scratch register.
ExcludeXRegister(XRegister reg)2835   void ExcludeXRegister(XRegister reg) {
2836     uint32_t reg_num = enum_cast<uint32_t>(reg);
2837     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2838     assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
2839   }
2840 
2841   // Alocate a scratch `FRegister`. There must be an available register to allocate.
AllocateFRegister()2842   FRegister AllocateFRegister() {
2843     CHECK_NE(assembler_->available_scratch_fp_registers_, 0u);
2844     // Allocate the highest available scratch register (same as for core registers).
2845     uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_fp_registers_) - 1u) -
2846                        CLZ(assembler_->available_scratch_fp_registers_);
2847     assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
2848     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2849     return enum_cast<FRegister>(reg_num);
2850   }
2851 
2852   // Free a previously unavailable FP register for use as a scratch register.
2853   // This can be an arbitrary register, not necessarly the usual `FTMP`.
FreeFRegister(FRegister reg)2854   void FreeFRegister(FRegister reg) {
2855     uint32_t reg_num = enum_cast<uint32_t>(reg);
2856     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2857     CHECK_EQ((1u << reg_num) & assembler_->available_scratch_fp_registers_, 0u);
2858     assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
2859   }
2860 
2861   // The number of available scratch FP registers.
AvailableFRegisters()2862   size_t AvailableFRegisters() {
2863     return POPCOUNT(assembler_->available_scratch_fp_registers_);
2864   }
2865 
2866   // Make sure an FP register is available for use as a scratch register.
IncludeFRegister(FRegister reg)2867   void IncludeFRegister(FRegister reg) {
2868     uint32_t reg_num = enum_cast<uint32_t>(reg);
2869     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2870     assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
2871   }
2872 
2873   // Make sure an FP register is not available for use as a scratch register.
ExcludeFRegister(FRegister reg)2874   void ExcludeFRegister(FRegister reg) {
2875     uint32_t reg_num = enum_cast<uint32_t>(reg);
2876     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2877     assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
2878   }
2879 
2880  private:
2881   Riscv64Assembler* const assembler_;
2882   const uint32_t old_available_scratch_core_registers_;
2883   const uint32_t old_available_scratch_fp_registers_;
2884 
2885   DISALLOW_COPY_AND_ASSIGN(ScratchRegisterScope);
2886 };
2887 
2888 constexpr Riscv64ExtensionMask kRiscv64CompressedExtensionsMask =
2889     Riscv64ExtensionBit(Riscv64Extension::kZca) |
2890     Riscv64ExtensionBit(Riscv64Extension::kZcd) |
2891     Riscv64ExtensionBit(Riscv64Extension::kZcb);
2892 
2893 using ScopedNoCInstructions = ScopedExtensionsExclusion<kRiscv64CompressedExtensionsMask>;
2894 using ScopedUseCInstructions = ScopedExtensionsInclusion<kRiscv64CompressedExtensionsMask>;
2895 
2896 }  // namespace riscv64
2897 }  // namespace art
2898 
2899 #endif  // ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
2900