1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
18 #define ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
19
20 #include <cstdint>
21 #include <string>
22 #include <utility>
23 #include <vector>
24
25 #include "arch/riscv64/instruction_set_features_riscv64.h"
26 #include "base/arena_containers.h"
27 #include "base/globals.h"
28 #include "base/macros.h"
29 #include "base/pointer_size.h"
30 #include "managed_register_riscv64.h"
31 #include "utils/assembler.h"
32 #include "utils/label.h"
33
34 namespace art HIDDEN {
35 namespace riscv64 {
36
37 class ScratchRegisterScope;
38
39 static constexpr size_t kRiscv64HalfwordSize = 2;
40 static constexpr size_t kRiscv64WordSize = 4;
41 static constexpr size_t kRiscv64DoublewordSize = 8;
42 static constexpr size_t kRiscv64FloatRegSizeInBytes = 8;
43
44 // The `Riscv64Extension` enumeration is used for restricting the instructions that the assembler
45 // can use. Some restrictions are checked only in debug mode (for example load and store
46 // instructions check `kLoadStore`), other restrictions are checked at run time and affect the
47 // emitted code (for example, the `SextW()` pseudo-instruction selects between an implementation
48 // from "Zcb", "Zbb" and a two-instruction sequence from the basic instruction set.
49 enum class Riscv64Extension : uint32_t {
50 kLoadStore, // Pseudo-extension encompassing all loads and stores. Used to check that
51 // we do not have loads and stores in the middle of a LR/SC sequence.
52 kZifencei,
53 kM,
54 kA,
55 kZicsr,
56 kF,
57 kD,
58 kZba,
59 kZbb,
60 kZbs,
61 kV,
62 kZca, // "C" extension instructions except floating point loads/stores.
63 kZcd, // "C" extension double loads/stores.
64 // Note: RV64 cannot implement Zcf ("C" extension float loads/stores).
65 kZcb, // Simple 16-bit operations not present in the original "C" extension.
66
67 kLast = kZcb
68 };
69
70 using Riscv64ExtensionMask = uint32_t;
71
Riscv64ExtensionBit(Riscv64Extension ext)72 constexpr Riscv64ExtensionMask Riscv64ExtensionBit(Riscv64Extension ext) {
73 return 1u << enum_cast<>(ext);
74 }
75
76 constexpr Riscv64ExtensionMask kRiscv64AllExtensionsMask =
77 MaxInt<Riscv64ExtensionMask>(enum_cast<>(Riscv64Extension::kLast) + 1);
78
79 // Extensions allowed in a LR/SC sequence (between the LR and SC).
80 constexpr Riscv64ExtensionMask kRiscv64LrScSequenceExtensionsMask =
81 Riscv64ExtensionBit(Riscv64Extension::kZca);
82
83 enum class FPRoundingMode : uint32_t {
84 kRNE = 0x0, // Round to Nearest, ties to Even
85 kRTZ = 0x1, // Round towards Zero
86 kRDN = 0x2, // Round Down (towards −Infinity)
87 kRUP = 0x3, // Round Up (towards +Infinity)
88 kRMM = 0x4, // Round to Nearest, ties to Max Magnitude
89 kDYN = 0x7, // Dynamic rounding mode
90 kDefault = kDYN,
91 // Some instructions never need to round even though the spec includes the RM field.
92 // To simplify testing, emit the RM as 0 by default for these instructions because that's what
93 // `clang` does and because the `llvm-objdump` fails to disassemble the other rounding modes.
94 kIgnored = 0
95 };
96
97 enum class AqRl : uint32_t {
98 kNone = 0x0,
99 kRelease = 0x1,
100 kAcquire = 0x2,
101 kAqRl = kRelease | kAcquire
102 };
103
104 // the type for fence
105 enum FenceType {
106 kFenceNone = 0,
107 kFenceWrite = 1,
108 kFenceRead = 2,
109 kFenceOutput = 4,
110 kFenceInput = 8,
111 kFenceDefault = 0xf,
112 };
113
114 // Used to test the values returned by FClassS/FClassD.
115 enum FPClassMaskType {
116 kNegativeInfinity = 0x001,
117 kNegativeNormal = 0x002,
118 kNegativeSubnormal = 0x004,
119 kNegativeZero = 0x008,
120 kPositiveZero = 0x010,
121 kPositiveSubnormal = 0x020,
122 kPositiveNormal = 0x040,
123 kPositiveInfinity = 0x080,
124 kSignalingNaN = 0x100,
125 kQuietNaN = 0x200,
126 };
127
128 enum class CSRAddress : uint32_t {
129 kVstart = 0x008, // Vector start position, URW
130 kVxsat = 0x009, // Fixed-Point Saturate Flag, URW
131 kVxrm = 0x00A, // Fixed-Point Rounding Mode, URW
132 kReserved1 = 0x00B, // Reserved for future vector CSRs
133 kReserved2 = 0x00C,
134 kReserved3 = 0x00D,
135 kReserved4 = 0x00E,
136 kVcsr = 0x00F, // Vector control and status register, URW
137 kVl = 0xC20, // Vector length, URO
138 kVtype = 0xC21, // Vector data type register, URO
139 kVlenb = 0xC22, // VLEN/8 (vector register length in bytes), URO
140 };
141
142 class Riscv64Label : public Label {
143 public:
Riscv64Label()144 Riscv64Label() : prev_branch_id_(kNoPrevBranchId) {}
145
Riscv64Label(Riscv64Label && src)146 Riscv64Label(Riscv64Label&& src) noexcept
147 // NOLINTNEXTLINE - src.prev_branch_id_ is valid after the move
148 : Label(std::move(src)), prev_branch_id_(src.prev_branch_id_) {}
149
150 private:
151 static constexpr uint32_t kNoPrevBranchId = std::numeric_limits<uint32_t>::max();
152
153 uint32_t prev_branch_id_; // To get distance from preceding branch, if any.
154
155 friend class Riscv64Assembler;
156 DISALLOW_COPY_AND_ASSIGN(Riscv64Label);
157 };
158
159 // Assembler literal is a value embedded in code, retrieved using a PC-relative load.
160 class Literal {
161 public:
162 static constexpr size_t kMaxSize = 8;
163
Literal(uint32_t size,const uint8_t * data)164 Literal(uint32_t size, const uint8_t* data) : label_(), size_(size) {
165 DCHECK_LE(size, Literal::kMaxSize);
166 memcpy(data_, data, size);
167 }
168
169 template <typename T>
GetValue()170 T GetValue() const {
171 DCHECK_EQ(size_, sizeof(T));
172 T value;
173 memcpy(&value, data_, sizeof(T));
174 return value;
175 }
176
GetSize()177 uint32_t GetSize() const { return size_; }
178
GetData()179 const uint8_t* GetData() const { return data_; }
180
GetLabel()181 Riscv64Label* GetLabel() { return &label_; }
182
GetLabel()183 const Riscv64Label* GetLabel() const { return &label_; }
184
185 private:
186 Riscv64Label label_;
187 const uint32_t size_;
188 uint8_t data_[kMaxSize];
189
190 DISALLOW_COPY_AND_ASSIGN(Literal);
191 };
192
193 // Jump table: table of labels emitted after the code and before the literals. Similar to literals.
194 class JumpTable {
195 public:
JumpTable(ArenaVector<Riscv64Label * > && labels)196 explicit JumpTable(ArenaVector<Riscv64Label*>&& labels) : label_(), labels_(std::move(labels)) {}
197
GetSize()198 size_t GetSize() const { return labels_.size() * sizeof(int32_t); }
199
GetData()200 const ArenaVector<Riscv64Label*>& GetData() const { return labels_; }
201
GetLabel()202 Riscv64Label* GetLabel() { return &label_; }
203
GetLabel()204 const Riscv64Label* GetLabel() const { return &label_; }
205
206 private:
207 Riscv64Label label_;
208 ArenaVector<Riscv64Label*> labels_;
209
210 DISALLOW_COPY_AND_ASSIGN(JumpTable);
211 };
212
213 class Riscv64Assembler final : public Assembler {
214 public:
215 explicit Riscv64Assembler(ArenaAllocator* allocator,
216 const Riscv64InstructionSetFeatures* instruction_set_features = nullptr)
217 : Riscv64Assembler(allocator,
218 instruction_set_features != nullptr
219 ? ConvertExtensions(instruction_set_features)
220 : kRiscv64AllExtensionsMask) {}
221
Riscv64Assembler(ArenaAllocator * allocator,Riscv64ExtensionMask enabled_extensions)222 Riscv64Assembler(ArenaAllocator* allocator, Riscv64ExtensionMask enabled_extensions)
223 : Assembler(allocator),
224 branches_(allocator->Adapter(kArenaAllocAssembler)),
225 finalized_(false),
226 overwriting_(false),
227 overwrite_location_(0),
228 literals_(allocator->Adapter(kArenaAllocAssembler)),
229 long_literals_(allocator->Adapter(kArenaAllocAssembler)),
230 jump_tables_(allocator->Adapter(kArenaAllocAssembler)),
231 last_position_adjustment_(0),
232 last_old_position_(0),
233 last_branch_id_(0),
234 enabled_extensions_(enabled_extensions),
235 available_scratch_core_registers_((1u << TMP) | (1u << TMP2)),
236 available_scratch_fp_registers_(1u << FTMP) {
237 cfi().DelayEmittingAdvancePCs();
238 }
239
~Riscv64Assembler()240 virtual ~Riscv64Assembler() {
241 for (auto& branch : branches_) {
242 CHECK(branch.IsResolved());
243 }
244 }
245
CodeSize()246 size_t CodeSize() const override { return Assembler::CodeSize(); }
cfi()247 DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
248
IsExtensionEnabled(Riscv64Extension ext)249 bool IsExtensionEnabled(Riscv64Extension ext) const {
250 return (enabled_extensions_ & Riscv64ExtensionBit(ext)) != 0u;
251 }
252
253 // According to "The RISC-V Instruction Set Manual"
254
255 // LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37
256 // Note: These take a 20-bit unsigned value to align with the clang assembler for testing,
257 // but the value stored in the register shall actually be sign-extended to 64 bits.
258 void Lui(XRegister rd, uint32_t imm20);
259 void Auipc(XRegister rd, uint32_t imm20);
260
261 // Jump instructions (RV32I), opcode = 0x67, 0x6f
262 void Jal(XRegister rd, int32_t offset);
263 void Jalr(XRegister rd, XRegister rs1, int32_t offset);
264
265 // Branch instructions (RV32I), opcode = 0x63, funct3 from 0x0 ~ 0x1 and 0x4 ~ 0x7
266 void Beq(XRegister rs1, XRegister rs2, int32_t offset);
267 void Bne(XRegister rs1, XRegister rs2, int32_t offset);
268 void Blt(XRegister rs1, XRegister rs2, int32_t offset);
269 void Bge(XRegister rs1, XRegister rs2, int32_t offset);
270 void Bltu(XRegister rs1, XRegister rs2, int32_t offset);
271 void Bgeu(XRegister rs1, XRegister rs2, int32_t offset);
272
273 // Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6
274 void Lb(XRegister rd, XRegister rs1, int32_t offset);
275 void Lh(XRegister rd, XRegister rs1, int32_t offset);
276 void Lw(XRegister rd, XRegister rs1, int32_t offset);
277 void Ld(XRegister rd, XRegister rs1, int32_t offset);
278 void Lbu(XRegister rd, XRegister rs1, int32_t offset);
279 void Lhu(XRegister rd, XRegister rs1, int32_t offset);
280 void Lwu(XRegister rd, XRegister rs1, int32_t offset);
281
282 // Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3
283 void Sb(XRegister rs2, XRegister rs1, int32_t offset);
284 void Sh(XRegister rs2, XRegister rs1, int32_t offset);
285 void Sw(XRegister rs2, XRegister rs1, int32_t offset);
286 void Sd(XRegister rs2, XRegister rs1, int32_t offset);
287
288 // IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7
289 void Addi(XRegister rd, XRegister rs1, int32_t imm12);
290 void Slti(XRegister rd, XRegister rs1, int32_t imm12);
291 void Sltiu(XRegister rd, XRegister rs1, int32_t imm12);
292 void Xori(XRegister rd, XRegister rs1, int32_t imm12);
293 void Ori(XRegister rd, XRegister rs1, int32_t imm12);
294 void Andi(XRegister rd, XRegister rs1, int32_t imm12);
295 void Slli(XRegister rd, XRegister rs1, int32_t shamt);
296 void Srli(XRegister rd, XRegister rs1, int32_t shamt);
297 void Srai(XRegister rd, XRegister rs1, int32_t shamt);
298
299 // ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7
300 void Add(XRegister rd, XRegister rs1, XRegister rs2);
301 void Sub(XRegister rd, XRegister rs1, XRegister rs2);
302 void Slt(XRegister rd, XRegister rs1, XRegister rs2);
303 void Sltu(XRegister rd, XRegister rs1, XRegister rs2);
304 void Xor(XRegister rd, XRegister rs1, XRegister rs2);
305 void Or(XRegister rd, XRegister rs1, XRegister rs2);
306 void And(XRegister rd, XRegister rs1, XRegister rs2);
307 void Sll(XRegister rd, XRegister rs1, XRegister rs2);
308 void Srl(XRegister rd, XRegister rs1, XRegister rs2);
309 void Sra(XRegister rd, XRegister rs1, XRegister rs2);
310
311 // 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5
312 void Addiw(XRegister rd, XRegister rs1, int32_t imm12);
313 void Slliw(XRegister rd, XRegister rs1, int32_t shamt);
314 void Srliw(XRegister rd, XRegister rs1, int32_t shamt);
315 void Sraiw(XRegister rd, XRegister rs1, int32_t shamt);
316
317 // 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7
318 void Addw(XRegister rd, XRegister rs1, XRegister rs2);
319 void Subw(XRegister rd, XRegister rs1, XRegister rs2);
320 void Sllw(XRegister rd, XRegister rs1, XRegister rs2);
321 void Srlw(XRegister rd, XRegister rs1, XRegister rs2);
322 void Sraw(XRegister rd, XRegister rs1, XRegister rs2);
323
324 // Environment call and breakpoint (RV32I), opcode = 0x73
325 void Ecall();
326 void Ebreak();
327
328 // Fence instruction (RV32I): opcode = 0xf, funct3 = 0
329 void Fence(uint32_t pred = kFenceDefault, uint32_t succ = kFenceDefault);
330 void FenceTso();
331
332 // "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1
333 void FenceI();
334
335 // RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7
336 void Mul(XRegister rd, XRegister rs1, XRegister rs2);
337 void Mulh(XRegister rd, XRegister rs1, XRegister rs2);
338 void Mulhsu(XRegister rd, XRegister rs1, XRegister rs2);
339 void Mulhu(XRegister rd, XRegister rs1, XRegister rs2);
340 void Div(XRegister rd, XRegister rs1, XRegister rs2);
341 void Divu(XRegister rd, XRegister rs1, XRegister rs2);
342 void Rem(XRegister rd, XRegister rs1, XRegister rs2);
343 void Remu(XRegister rd, XRegister rs1, XRegister rs2);
344
345 // RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7
346 void Mulw(XRegister rd, XRegister rs1, XRegister rs2);
347 void Divw(XRegister rd, XRegister rs1, XRegister rs2);
348 void Divuw(XRegister rd, XRegister rs1, XRegister rs2);
349 void Remw(XRegister rd, XRegister rs1, XRegister rs2);
350 void Remuw(XRegister rd, XRegister rs1, XRegister rs2);
351
352 // RV32A/RV64A Standard Extension
353 void LrW(XRegister rd, XRegister rs1, AqRl aqrl);
354 void LrD(XRegister rd, XRegister rs1, AqRl aqrl);
355 void ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
356 void ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
357 void AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
358 void AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
359 void AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
360 void AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
361 void AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
362 void AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
363 void AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
364 void AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
365 void AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
366 void AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
367 void AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
368 void AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
369 void AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
370 void AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
371 void AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
372 void AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
373 void AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
374 void AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
375
376 // "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7
377 void Csrrw(XRegister rd, uint32_t csr, XRegister rs1);
378 void Csrrs(XRegister rd, uint32_t csr, XRegister rs1);
379 void Csrrc(XRegister rd, uint32_t csr, XRegister rs1);
380 void Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5);
381 void Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5);
382 void Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5);
383
384 // FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27
385 void FLw(FRegister rd, XRegister rs1, int32_t offset);
386 void FLd(FRegister rd, XRegister rs1, int32_t offset);
387 void FSw(FRegister rs2, XRegister rs1, int32_t offset);
388 void FSd(FRegister rs2, XRegister rs1, int32_t offset);
389
390 // FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f
391 void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
392 void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
393 void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
394 void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
395 void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
396 void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
397 void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
398 void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
399
400 // FP FMA instruction helpers passing the default rounding mode.
FMAddS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)401 void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
402 FMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
403 }
FMAddD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)404 void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
405 FMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
406 }
FMSubS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)407 void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
408 FMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
409 }
FMSubD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)410 void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
411 FMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
412 }
FNMSubS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)413 void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
414 FNMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
415 }
FNMSubD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)416 void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
417 FNMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
418 }
FNMAddS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)419 void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
420 FNMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
421 }
FNMAddD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)422 void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
423 FNMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
424 }
425
426 // Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D
427 void FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
428 void FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
429 void FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
430 void FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
431 void FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
432 void FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
433 void FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
434 void FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
435 void FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm);
436 void FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm);
437 void FSgnjS(FRegister rd, FRegister rs1, FRegister rs2);
438 void FSgnjD(FRegister rd, FRegister rs1, FRegister rs2);
439 void FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2);
440 void FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2);
441 void FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2);
442 void FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2);
443 void FMinS(FRegister rd, FRegister rs1, FRegister rs2);
444 void FMinD(FRegister rd, FRegister rs1, FRegister rs2);
445 void FMaxS(FRegister rd, FRegister rs1, FRegister rs2);
446 void FMaxD(FRegister rd, FRegister rs1, FRegister rs2);
447 void FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm);
448 void FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm);
449
450 // Simple FP instruction helpers passing the default rounding mode.
FAddS(FRegister rd,FRegister rs1,FRegister rs2)451 void FAddS(FRegister rd, FRegister rs1, FRegister rs2) {
452 FAddS(rd, rs1, rs2, FPRoundingMode::kDefault);
453 }
FAddD(FRegister rd,FRegister rs1,FRegister rs2)454 void FAddD(FRegister rd, FRegister rs1, FRegister rs2) {
455 FAddD(rd, rs1, rs2, FPRoundingMode::kDefault);
456 }
FSubS(FRegister rd,FRegister rs1,FRegister rs2)457 void FSubS(FRegister rd, FRegister rs1, FRegister rs2) {
458 FSubS(rd, rs1, rs2, FPRoundingMode::kDefault);
459 }
FSubD(FRegister rd,FRegister rs1,FRegister rs2)460 void FSubD(FRegister rd, FRegister rs1, FRegister rs2) {
461 FSubD(rd, rs1, rs2, FPRoundingMode::kDefault);
462 }
FMulS(FRegister rd,FRegister rs1,FRegister rs2)463 void FMulS(FRegister rd, FRegister rs1, FRegister rs2) {
464 FMulS(rd, rs1, rs2, FPRoundingMode::kDefault);
465 }
FMulD(FRegister rd,FRegister rs1,FRegister rs2)466 void FMulD(FRegister rd, FRegister rs1, FRegister rs2) {
467 FMulD(rd, rs1, rs2, FPRoundingMode::kDefault);
468 }
FDivS(FRegister rd,FRegister rs1,FRegister rs2)469 void FDivS(FRegister rd, FRegister rs1, FRegister rs2) {
470 FDivS(rd, rs1, rs2, FPRoundingMode::kDefault);
471 }
FDivD(FRegister rd,FRegister rs1,FRegister rs2)472 void FDivD(FRegister rd, FRegister rs1, FRegister rs2) {
473 FDivD(rd, rs1, rs2, FPRoundingMode::kDefault);
474 }
FSqrtS(FRegister rd,FRegister rs1)475 void FSqrtS(FRegister rd, FRegister rs1) {
476 FSqrtS(rd, rs1, FPRoundingMode::kDefault);
477 }
FSqrtD(FRegister rd,FRegister rs1)478 void FSqrtD(FRegister rd, FRegister rs1) {
479 FSqrtD(rd, rs1, FPRoundingMode::kDefault);
480 }
FCvtSD(FRegister rd,FRegister rs1)481 void FCvtSD(FRegister rd, FRegister rs1) {
482 FCvtSD(rd, rs1, FPRoundingMode::kDefault);
483 }
FCvtDS(FRegister rd,FRegister rs1)484 void FCvtDS(FRegister rd, FRegister rs1) {
485 FCvtDS(rd, rs1, FPRoundingMode::kIgnored);
486 }
487
488 // FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D
489 void FEqS(XRegister rd, FRegister rs1, FRegister rs2);
490 void FEqD(XRegister rd, FRegister rs1, FRegister rs2);
491 void FLtS(XRegister rd, FRegister rs1, FRegister rs2);
492 void FLtD(XRegister rd, FRegister rs1, FRegister rs2);
493 void FLeS(XRegister rd, FRegister rs1, FRegister rs2);
494 void FLeD(XRegister rd, FRegister rs1, FRegister rs2);
495
496 // FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D
497 void FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm);
498 void FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm);
499 void FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
500 void FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
501 void FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm);
502 void FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm);
503 void FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
504 void FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
505 void FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm);
506 void FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm);
507 void FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
508 void FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
509 void FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm);
510 void FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm);
511 void FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
512 void FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
513
514 // FP conversion instruction helpers passing the default rounding mode.
FCvtWS(XRegister rd,FRegister rs1)515 void FCvtWS(XRegister rd, FRegister rs1) { FCvtWS(rd, rs1, FPRoundingMode::kDefault); }
FCvtWD(XRegister rd,FRegister rs1)516 void FCvtWD(XRegister rd, FRegister rs1) { FCvtWD(rd, rs1, FPRoundingMode::kDefault); }
FCvtWuS(XRegister rd,FRegister rs1)517 void FCvtWuS(XRegister rd, FRegister rs1) { FCvtWuS(rd, rs1, FPRoundingMode::kDefault); }
FCvtWuD(XRegister rd,FRegister rs1)518 void FCvtWuD(XRegister rd, FRegister rs1) { FCvtWuD(rd, rs1, FPRoundingMode::kDefault); }
FCvtLS(XRegister rd,FRegister rs1)519 void FCvtLS(XRegister rd, FRegister rs1) { FCvtLS(rd, rs1, FPRoundingMode::kDefault); }
FCvtLD(XRegister rd,FRegister rs1)520 void FCvtLD(XRegister rd, FRegister rs1) { FCvtLD(rd, rs1, FPRoundingMode::kDefault); }
FCvtLuS(XRegister rd,FRegister rs1)521 void FCvtLuS(XRegister rd, FRegister rs1) { FCvtLuS(rd, rs1, FPRoundingMode::kDefault); }
FCvtLuD(XRegister rd,FRegister rs1)522 void FCvtLuD(XRegister rd, FRegister rs1) { FCvtLuD(rd, rs1, FPRoundingMode::kDefault); }
FCvtSW(FRegister rd,XRegister rs1)523 void FCvtSW(FRegister rd, XRegister rs1) { FCvtSW(rd, rs1, FPRoundingMode::kDefault); }
FCvtDW(FRegister rd,XRegister rs1)524 void FCvtDW(FRegister rd, XRegister rs1) { FCvtDW(rd, rs1, FPRoundingMode::kIgnored); }
FCvtSWu(FRegister rd,XRegister rs1)525 void FCvtSWu(FRegister rd, XRegister rs1) { FCvtSWu(rd, rs1, FPRoundingMode::kDefault); }
FCvtDWu(FRegister rd,XRegister rs1)526 void FCvtDWu(FRegister rd, XRegister rs1) { FCvtDWu(rd, rs1, FPRoundingMode::kIgnored); }
FCvtSL(FRegister rd,XRegister rs1)527 void FCvtSL(FRegister rd, XRegister rs1) { FCvtSL(rd, rs1, FPRoundingMode::kDefault); }
FCvtDL(FRegister rd,XRegister rs1)528 void FCvtDL(FRegister rd, XRegister rs1) { FCvtDL(rd, rs1, FPRoundingMode::kDefault); }
FCvtSLu(FRegister rd,XRegister rs1)529 void FCvtSLu(FRegister rd, XRegister rs1) { FCvtSLu(rd, rs1, FPRoundingMode::kDefault); }
FCvtDLu(FRegister rd,XRegister rs1)530 void FCvtDLu(FRegister rd, XRegister rs1) { FCvtDLu(rd, rs1, FPRoundingMode::kDefault); }
531
532 // FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D
533 void FMvXW(XRegister rd, FRegister rs1);
534 void FMvXD(XRegister rd, FRegister rs1);
535 void FMvWX(FRegister rd, XRegister rs1);
536 void FMvDX(FRegister rd, XRegister rs1);
537
538 // FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D
539 void FClassS(XRegister rd, FRegister rs1);
540 void FClassD(XRegister rd, FRegister rs1);
541
542 // "C" Standard Extension, Compresseed Instructions
543 void CLwsp(XRegister rd, int32_t offset);
544 void CLdsp(XRegister rd, int32_t offset);
545 void CFLdsp(FRegister rd, int32_t offset);
546 void CSwsp(XRegister rs2, int32_t offset);
547 void CSdsp(XRegister rs2, int32_t offset);
548 void CFSdsp(FRegister rs2, int32_t offset);
549
550 void CLw(XRegister rd_s, XRegister rs1_s, int32_t offset);
551 void CLd(XRegister rd_s, XRegister rs1_s, int32_t offset);
552 void CFLd(FRegister rd_s, XRegister rs1_s, int32_t offset);
553 void CSw(XRegister rs2_s, XRegister rs1_s, int32_t offset);
554 void CSd(XRegister rs2_s, XRegister rs1_s, int32_t offset);
555 void CFSd(FRegister rs2_s, XRegister rs1_s, int32_t offset);
556
557 void CLi(XRegister rd, int32_t imm);
558 void CLui(XRegister rd, uint32_t nzimm6);
559 void CAddi(XRegister rd, int32_t nzimm);
560 void CAddiw(XRegister rd, int32_t imm);
561 void CAddi16Sp(int32_t nzimm);
562 void CAddi4Spn(XRegister rd_s, uint32_t nzuimm);
563 void CSlli(XRegister rd, int32_t shamt);
564 void CSrli(XRegister rd_s, int32_t shamt);
565 void CSrai(XRegister rd_s, int32_t shamt);
566 void CAndi(XRegister rd_s, int32_t imm);
567 void CMv(XRegister rd, XRegister rs2);
568 void CAdd(XRegister rd, XRegister rs2);
569 void CAnd(XRegister rd_s, XRegister rs2_s);
570 void COr(XRegister rd_s, XRegister rs2_s);
571 void CXor(XRegister rd_s, XRegister rs2_s);
572 void CSub(XRegister rd_s, XRegister rs2_s);
573 void CAddw(XRegister rd_s, XRegister rs2_s);
574 void CSubw(XRegister rd_s, XRegister rs2_s);
575
576 // "Zcb" Standard Extension, part of "C", opcode = 0b00, 0b01, funct3 = 0b100.
577 void CLbu(XRegister rd_s, XRegister rs1_s, int32_t offset);
578 void CLhu(XRegister rd_s, XRegister rs1_s, int32_t offset);
579 void CLh(XRegister rd_s, XRegister rs1_s, int32_t offset);
580 void CSb(XRegister rd_s, XRegister rs1_s, int32_t offset);
581 void CSh(XRegister rd_s, XRegister rs1_s, int32_t offset);
582 void CZextB(XRegister rd_rs1_s);
583 void CSextB(XRegister rd_rs1_s);
584 void CZextH(XRegister rd_rs1_s);
585 void CSextH(XRegister rd_rs1_s);
586 void CZextW(XRegister rd_rs1_s);
587 void CNot(XRegister rd_rs1_s);
588 void CMul(XRegister rd_s, XRegister rs2_s);
589 // "Zcb" Standard Extension End; resume "C" Standard Extension.
590 // TODO(riscv64): Reorder "Zcb" after remaining "C" instructions.
591
592 void CJ(int32_t offset);
593 void CJr(XRegister rs1);
594 void CJalr(XRegister rs1);
595 void CBeqz(XRegister rs1_s, int32_t offset);
596 void CBnez(XRegister rs1_s, int32_t offset);
597
598 void CEbreak();
599 void CNop();
600 void CUnimp();
601
602 // "Zba" Standard Extension, opcode = 0x1b, 0x33 or 0x3b, funct3 and funct7 varies.
603 void AddUw(XRegister rd, XRegister rs1, XRegister rs2);
604 void Sh1Add(XRegister rd, XRegister rs1, XRegister rs2);
605 void Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2);
606 void Sh2Add(XRegister rd, XRegister rs1, XRegister rs2);
607 void Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2);
608 void Sh3Add(XRegister rd, XRegister rs1, XRegister rs2);
609 void Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2);
610 void SlliUw(XRegister rd, XRegister rs1, int32_t shamt);
611
612 // "Zbb" Standard Extension, opcode = 0x13, 0x1b, 0x33 or 0x3b, funct3 and funct7 varies.
613 // Note: 32-bit sext.b, sext.h and zext.h from the Zbb extension are explicitly
614 // prefixed with "Zbb" to differentiate them from the utility macros.
615 void Andn(XRegister rd, XRegister rs1, XRegister rs2);
616 void Orn(XRegister rd, XRegister rs1, XRegister rs2);
617 void Xnor(XRegister rd, XRegister rs1, XRegister rs2);
618 void Clz(XRegister rd, XRegister rs1);
619 void Clzw(XRegister rd, XRegister rs1);
620 void Ctz(XRegister rd, XRegister rs1);
621 void Ctzw(XRegister rd, XRegister rs1);
622 void Cpop(XRegister rd, XRegister rs1);
623 void Cpopw(XRegister rd, XRegister rs1);
624 void Min(XRegister rd, XRegister rs1, XRegister rs2);
625 void Minu(XRegister rd, XRegister rs1, XRegister rs2);
626 void Max(XRegister rd, XRegister rs1, XRegister rs2);
627 void Maxu(XRegister rd, XRegister rs1, XRegister rs2);
628 void Rol(XRegister rd, XRegister rs1, XRegister rs2);
629 void Rolw(XRegister rd, XRegister rs1, XRegister rs2);
630 void Ror(XRegister rd, XRegister rs1, XRegister rs2);
631 void Rorw(XRegister rd, XRegister rs1, XRegister rs2);
632 void Rori(XRegister rd, XRegister rs1, int32_t shamt);
633 void Roriw(XRegister rd, XRegister rs1, int32_t shamt);
634 void OrcB(XRegister rd, XRegister rs1);
635 void Rev8(XRegister rd, XRegister rs1);
636 void ZbbSextB(XRegister rd, XRegister rs1);
637 void ZbbSextH(XRegister rd, XRegister rs1);
638 void ZbbZextH(XRegister rd, XRegister rs1);
639
640 // "Zbs" Standard Extension, opcode = 0x13, or 0x33, funct3 and funct7 varies.
641 void Bclr(XRegister rd, XRegister rs1, XRegister rs2);
642 void Bclri(XRegister rd, XRegister rs1, int32_t shamt);
643 void Bext(XRegister rd, XRegister rs1, XRegister rs2);
644 void Bexti(XRegister rd, XRegister rs1, int32_t shamt);
645 void Binv(XRegister rd, XRegister rs1, XRegister rs2);
646 void Binvi(XRegister rd, XRegister rs1, int32_t shamt);
647 void Bset(XRegister rd, XRegister rs1, XRegister rs2);
648 void Bseti(XRegister rd, XRegister rs1, int32_t shamt);
649
650 ////////////////////////////// RISC-V Vector Instructions START ///////////////////////////////
651 enum class LengthMultiplier : uint32_t {
652 kM1Over8 = 0b101,
653 kM1Over4 = 0b110,
654 kM1Over2 = 0b111,
655 kM1 = 0b000,
656 kM2 = 0b001,
657 kM4 = 0b010,
658 kM8 = 0b011,
659
660 kReserved1 = 0b100,
661 };
662
663 enum class SelectedElementWidth : uint32_t {
664 kE8 = 0b000,
665 kE16 = 0b001,
666 kE32 = 0b010,
667 kE64 = 0b011,
668
669 kReserved1 = 0b100,
670 kReserved2 = 0b101,
671 kReserved3 = 0b110,
672 kReserved4 = 0b111,
673 };
674
675 enum class VectorMaskAgnostic : uint32_t {
676 kUndisturbed = 0,
677 kAgnostic = 1,
678 };
679
680 enum class VectorTailAgnostic : uint32_t {
681 kUndisturbed = 0,
682 kAgnostic = 1,
683 };
684
685 enum class VM : uint32_t { // Vector mask
686 kV0_t = 0b0,
687 kUnmasked = 0b1
688 };
689
690 // Vector Conguration-Setting Instructions, opcode = 0x57, funct3 = 0x3
691 void VSetvli(XRegister rd, XRegister rs1, uint32_t vtypei);
692 void VSetivli(XRegister rd, uint32_t uimm, uint32_t vtypei);
693 void VSetvl(XRegister rd, XRegister rs1, XRegister rs2);
694
VTypeiValue(VectorMaskAgnostic vma,VectorTailAgnostic vta,SelectedElementWidth sew,LengthMultiplier lmul)695 static uint32_t VTypeiValue(VectorMaskAgnostic vma,
696 VectorTailAgnostic vta,
697 SelectedElementWidth sew,
698 LengthMultiplier lmul) {
699 return static_cast<uint32_t>(vma) << 7 | static_cast<uint32_t>(vta) << 6 |
700 static_cast<uint32_t>(sew) << 3 | static_cast<uint32_t>(lmul);
701 }
702
703 // Vector Unit-Stride Load/Store Instructions
704 void VLe8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
705 void VLe16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
706 void VLe32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
707 void VLe64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
708 void VLm(VRegister vd, XRegister rs1);
709
710 void VSe8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
711 void VSe16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
712 void VSe32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
713 void VSe64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
714 void VSm(VRegister vs3, XRegister rs1);
715
716 // Vector unit-stride fault-only-first Instructions
717 void VLe8ff(VRegister vd, XRegister rs1);
718 void VLe16ff(VRegister vd, XRegister rs1);
719 void VLe32ff(VRegister vd, XRegister rs1);
720 void VLe64ff(VRegister vd, XRegister rs1);
721
722 // Vector Strided Load/Store Instructions
723 void VLse8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
724 void VLse16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
725 void VLse32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
726 void VLse64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
727
728 void VSse8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
729 void VSse16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
730 void VSse32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
731 void VSse64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
732
733 // Vector Indexed Load/Store Instructions
734 void VLoxei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
735 void VLoxei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
736 void VLoxei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
737 void VLoxei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
738
739 void VLuxei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
740 void VLuxei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
741 void VLuxei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
742 void VLuxei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
743
744 void VSoxei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
745 void VSoxei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
746 void VSoxei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
747 void VSoxei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
748
749 void VSuxei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
750 void VSuxei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
751 void VSuxei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
752 void VSuxei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
753
754 // Vector Segment Load/Store
755
756 // Vector Unit-Stride Segment Loads/Stores
757
758 void VLseg2e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
759 void VLseg2e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
760 void VLseg2e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
761 void VLseg2e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
762 void VLseg3e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
763 void VLseg3e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
764 void VLseg3e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
765 void VLseg3e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
766 void VLseg4e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
767 void VLseg4e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
768 void VLseg4e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
769 void VLseg4e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
770 void VLseg5e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
771 void VLseg5e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
772 void VLseg5e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
773 void VLseg5e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
774 void VLseg6e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
775 void VLseg6e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
776 void VLseg6e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
777 void VLseg6e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
778 void VLseg7e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
779 void VLseg7e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
780 void VLseg7e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
781 void VLseg7e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
782 void VLseg8e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
783 void VLseg8e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
784 void VLseg8e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
785 void VLseg8e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
786
787 void VSseg2e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
788 void VSseg2e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
789 void VSseg2e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
790 void VSseg2e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
791 void VSseg3e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
792 void VSseg3e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
793 void VSseg3e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
794 void VSseg3e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
795 void VSseg4e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
796 void VSseg4e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
797 void VSseg4e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
798 void VSseg4e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
799 void VSseg5e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
800 void VSseg5e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
801 void VSseg5e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
802 void VSseg5e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
803 void VSseg6e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
804 void VSseg6e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
805 void VSseg6e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
806 void VSseg6e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
807 void VSseg7e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
808 void VSseg7e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
809 void VSseg7e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
810 void VSseg7e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
811 void VSseg8e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
812 void VSseg8e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
813 void VSseg8e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
814 void VSseg8e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
815
816 // Vector Unit-Stride Fault-only-First Segment Loads
817
818 void VLseg2e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
819 void VLseg2e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
820 void VLseg2e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
821 void VLseg2e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
822 void VLseg3e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
823 void VLseg3e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
824 void VLseg3e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
825 void VLseg3e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
826 void VLseg4e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
827 void VLseg4e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
828 void VLseg4e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
829 void VLseg4e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
830 void VLseg5e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
831 void VLseg5e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
832 void VLseg5e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
833 void VLseg5e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
834 void VLseg6e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
835 void VLseg6e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
836 void VLseg6e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
837 void VLseg6e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
838 void VLseg7e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
839 void VLseg7e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
840 void VLseg7e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
841 void VLseg7e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
842 void VLseg8e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
843 void VLseg8e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
844 void VLseg8e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
845 void VLseg8e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
846
847 // Vector Strided Segment Loads/Stores
848
849 void VLsseg2e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
850 void VLsseg2e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
851 void VLsseg2e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
852 void VLsseg2e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
853 void VLsseg3e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
854 void VLsseg3e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
855 void VLsseg3e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
856 void VLsseg3e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
857 void VLsseg4e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
858 void VLsseg4e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
859 void VLsseg4e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
860 void VLsseg4e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
861 void VLsseg5e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
862 void VLsseg5e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
863 void VLsseg5e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
864 void VLsseg5e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
865 void VLsseg6e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
866 void VLsseg6e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
867 void VLsseg6e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
868 void VLsseg6e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
869 void VLsseg7e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
870 void VLsseg7e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
871 void VLsseg7e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
872 void VLsseg7e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
873 void VLsseg8e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
874 void VLsseg8e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
875 void VLsseg8e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
876 void VLsseg8e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
877
878 void VSsseg2e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
879 void VSsseg2e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
880 void VSsseg2e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
881 void VSsseg2e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
882 void VSsseg3e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
883 void VSsseg3e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
884 void VSsseg3e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
885 void VSsseg3e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
886 void VSsseg4e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
887 void VSsseg4e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
888 void VSsseg4e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
889 void VSsseg4e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
890 void VSsseg5e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
891 void VSsseg5e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
892 void VSsseg5e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
893 void VSsseg5e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
894 void VSsseg6e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
895 void VSsseg6e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
896 void VSsseg6e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
897 void VSsseg6e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
898 void VSsseg7e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
899 void VSsseg7e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
900 void VSsseg7e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
901 void VSsseg7e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
902 void VSsseg8e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
903 void VSsseg8e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
904 void VSsseg8e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
905 void VSsseg8e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
906
907 // Vector Indexed-unordered Segment Loads/Stores
908
909 void VLuxseg2ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
910 void VLuxseg2ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
911 void VLuxseg2ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
912 void VLuxseg2ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
913 void VLuxseg3ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
914 void VLuxseg3ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
915 void VLuxseg3ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
916 void VLuxseg3ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
917 void VLuxseg4ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
918 void VLuxseg4ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
919 void VLuxseg4ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
920 void VLuxseg4ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
921 void VLuxseg5ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
922 void VLuxseg5ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
923 void VLuxseg5ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
924 void VLuxseg5ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
925 void VLuxseg6ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
926 void VLuxseg6ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
927 void VLuxseg6ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
928 void VLuxseg6ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
929 void VLuxseg7ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
930 void VLuxseg7ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
931 void VLuxseg7ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
932 void VLuxseg7ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
933 void VLuxseg8ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
934 void VLuxseg8ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
935 void VLuxseg8ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
936 void VLuxseg8ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
937
938 void VSuxseg2ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
939 void VSuxseg2ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
940 void VSuxseg2ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
941 void VSuxseg2ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
942 void VSuxseg3ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
943 void VSuxseg3ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
944 void VSuxseg3ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
945 void VSuxseg3ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
946 void VSuxseg4ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
947 void VSuxseg4ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
948 void VSuxseg4ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
949 void VSuxseg4ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
950 void VSuxseg5ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
951 void VSuxseg5ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
952 void VSuxseg5ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
953 void VSuxseg5ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
954 void VSuxseg6ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
955 void VSuxseg6ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
956 void VSuxseg6ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
957 void VSuxseg6ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
958 void VSuxseg7ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
959 void VSuxseg7ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
960 void VSuxseg7ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
961 void VSuxseg7ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
962 void VSuxseg8ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
963 void VSuxseg8ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
964 void VSuxseg8ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
965 void VSuxseg8ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
966
967 // Vector Indexed-ordered Segment Loads/Stores
968
969 void VLoxseg2ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
970 void VLoxseg2ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
971 void VLoxseg2ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
972 void VLoxseg2ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
973 void VLoxseg3ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
974 void VLoxseg3ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
975 void VLoxseg3ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
976 void VLoxseg3ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
977 void VLoxseg4ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
978 void VLoxseg4ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
979 void VLoxseg4ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
980 void VLoxseg4ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
981 void VLoxseg5ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
982 void VLoxseg5ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
983 void VLoxseg5ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
984 void VLoxseg5ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
985 void VLoxseg6ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
986 void VLoxseg6ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
987 void VLoxseg6ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
988 void VLoxseg6ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
989 void VLoxseg7ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
990 void VLoxseg7ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
991 void VLoxseg7ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
992 void VLoxseg7ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
993 void VLoxseg8ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
994 void VLoxseg8ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
995 void VLoxseg8ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
996 void VLoxseg8ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
997
998 void VSoxseg2ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
999 void VSoxseg2ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1000 void VSoxseg2ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1001 void VSoxseg2ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1002 void VSoxseg3ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1003 void VSoxseg3ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1004 void VSoxseg3ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1005 void VSoxseg3ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1006 void VSoxseg4ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1007 void VSoxseg4ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1008 void VSoxseg4ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1009 void VSoxseg4ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1010 void VSoxseg5ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1011 void VSoxseg5ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1012 void VSoxseg5ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1013 void VSoxseg5ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1014 void VSoxseg6ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1015 void VSoxseg6ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1016 void VSoxseg6ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1017 void VSoxseg6ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1018 void VSoxseg7ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1019 void VSoxseg7ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1020 void VSoxseg7ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1021 void VSoxseg7ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1022 void VSoxseg8ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1023 void VSoxseg8ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1024 void VSoxseg8ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1025 void VSoxseg8ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1026
1027 // Vector Whole Register Load/Store Instructions
1028
1029 void VL1re8(VRegister vd, XRegister rs1);
1030 void VL1re16(VRegister vd, XRegister rs1);
1031 void VL1re32(VRegister vd, XRegister rs1);
1032 void VL1re64(VRegister vd, XRegister rs1);
1033
1034 void VL2re8(VRegister vd, XRegister rs1);
1035 void VL2re16(VRegister vd, XRegister rs1);
1036 void VL2re32(VRegister vd, XRegister rs1);
1037 void VL2re64(VRegister vd, XRegister rs1);
1038
1039 void VL4re8(VRegister vd, XRegister rs1);
1040 void VL4re16(VRegister vd, XRegister rs1);
1041 void VL4re32(VRegister vd, XRegister rs1);
1042 void VL4re64(VRegister vd, XRegister rs1);
1043
1044 void VL8re8(VRegister vd, XRegister rs1);
1045 void VL8re16(VRegister vd, XRegister rs1);
1046 void VL8re32(VRegister vd, XRegister rs1);
1047 void VL8re64(VRegister vd, XRegister rs1);
1048
1049 void VL1r(VRegister vd, XRegister rs1); // Pseudoinstruction equal to VL1re8
1050 void VL2r(VRegister vd, XRegister rs1); // Pseudoinstruction equal to VL2re8
1051 void VL4r(VRegister vd, XRegister rs1); // Pseudoinstruction equal to VL4re8
1052 void VL8r(VRegister vd, XRegister rs1); // Pseudoinstruction equal to VL8re8
1053
1054 void VS1r(VRegister vs3, XRegister rs1); // Store {vs3} to address in a1
1055 void VS2r(VRegister vs3, XRegister rs1); // Store {vs3}-{vs3 + 1} to address in a1
1056 void VS4r(VRegister vs3, XRegister rs1); // Store {vs3}-{vs3 + 3} to address in a1
1057 void VS8r(VRegister vs3, XRegister rs1); // Store {vs3}-{vs3 + 7} to address in a1
1058
1059 // Vector Arithmetic Instruction
1060
1061 // Vector vadd instructions, funct6 = 0b000000
1062 void VAdd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1063 void VAdd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1064 void VAdd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1065
1066 // Vector vsub instructions, funct6 = 0b000010
1067 void VSub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1068 void VSub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1069
1070 // Vector vrsub instructions, funct6 = 0b000011
1071 void VRsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1072 void VRsub_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1073
1074 // Pseudo-instruction over VRsub_vi
1075 void VNeg_v(VRegister vd, VRegister vs2);
1076
1077 // Vector vminu instructions, funct6 = 0b000100
1078 void VMinu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1079 void VMinu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1080
1081 // Vector vmin instructions, funct6 = 0b000101
1082 void VMin_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1083 void VMin_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1084
1085 // Vector vmaxu instructions, funct6 = 0b000110
1086 void VMaxu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1087 void VMaxu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1088
1089 // Vector vmax instructions, funct6 = 0b000111
1090 void VMax_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1091 void VMax_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1092
1093 // Vector vand instructions, funct6 = 0b001001
1094 void VAnd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1095 void VAnd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1096 void VAnd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1097
1098 // Vector vor instructions, funct6 = 0b001010
1099 void VOr_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1100 void VOr_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1101 void VOr_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1102
1103 // Vector vxor instructions, funct6 = 0b001011
1104 void VXor_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1105 void VXor_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1106 void VXor_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1107
1108 // Pseudo-instruction over VXor_vi
1109 void VNot_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1110
1111 // Vector vrgather instructions, funct6 = 0b001100
1112 void VRgather_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1113 void VRgather_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1114 void VRgather_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1115
1116 // Vector vslideup instructions, funct6 = 0b001110
1117 void VSlideup_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1118 void VSlideup_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1119
1120 // Vector vrgatherei16 instructions, funct6 = 0b001110
1121 void VRgatherei16_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1122
1123 // Vector vslidedown instructions, funct6 = 0b001111
1124 void VSlidedown_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1125 void VSlidedown_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1126
1127 // Vector vadc instructions, funct6 = 0b010000
1128 void VAdc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1129 void VAdc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1130 void VAdc_vim(VRegister vd, VRegister vs2, int32_t imm5);
1131
1132 // Vector vmadc instructions, funct6 = 0b010001
1133 void VMadc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1134 void VMadc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1135 void VMadc_vim(VRegister vd, VRegister vs2, int32_t imm5);
1136
1137 // Vector vmadc instructions, funct6 = 0b010001
1138 void VMadc_vv(VRegister vd, VRegister vs2, VRegister vs1);
1139 void VMadc_vx(VRegister vd, VRegister vs2, XRegister rs1);
1140 void VMadc_vi(VRegister vd, VRegister vs2, int32_t imm5);
1141
1142 // Vector vsbc instructions, funct6 = 0b010010
1143 void VSbc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1144 void VSbc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1145
1146 // Vector vmsbc instructions, funct6 = 0b010011
1147 void VMsbc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1148 void VMsbc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1149 void VMsbc_vv(VRegister vd, VRegister vs2, VRegister vs1);
1150 void VMsbc_vx(VRegister vd, VRegister vs2, XRegister rs1);
1151
1152 // Vector vmerge instructions, funct6 = 0b010111, vm = 0
1153 void VMerge_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1154 void VMerge_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1155 void VMerge_vim(VRegister vd, VRegister vs2, int32_t imm5);
1156
1157 // Vector vmv instructions, funct6 = 0b010111, vm = 1, vs2 = v0
1158 void VMv_vv(VRegister vd, VRegister vs1);
1159 void VMv_vx(VRegister vd, XRegister rs1);
1160 void VMv_vi(VRegister vd, int32_t imm5);
1161
1162 // Vector vmseq instructions, funct6 = 0b011000
1163 void VMseq_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1164 void VMseq_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1165 void VMseq_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1166
1167 // Vector vmsne instructions, funct6 = 0b011001
1168 void VMsne_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1169 void VMsne_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1170 void VMsne_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1171
1172 // Vector vmsltu instructions, funct6 = 0b011010
1173 void VMsltu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1174 void VMsltu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1175
1176 // Pseudo-instruction over VMsltu_vv
1177 void VMsgtu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1178
1179 // Vector vmslt instructions, funct6 = 0b011011
1180 void VMslt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1181 void VMslt_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1182
1183 // Pseudo-instruction over VMslt_vv
1184 void VMsgt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1185
1186 // Vector vmsleu instructions, funct6 = 0b011100
1187 void VMsleu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1188 void VMsleu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1189 void VMsleu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1190
1191 // Pseudo-instructions over VMsleu_*
1192 void VMsgeu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1193 void VMsltu_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1194
1195 // Vector vmsle instructions, funct6 = 0b011101
1196 void VMsle_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1197 void VMsle_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1198 void VMsle_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1199
1200 // Pseudo-instructions over VMsle_*
1201 void VMsge_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1202 void VMslt_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1203
1204 // Vector vmsgtu instructions, funct6 = 0b011110
1205 void VMsgtu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1206 void VMsgtu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1207
1208 // Pseudo-instruction over VMsgtu_vi
1209 void VMsgeu_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1210
1211 // Vector vmsgt instructions, funct6 = 0b011111
1212 void VMsgt_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1213 void VMsgt_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1214
1215 // Pseudo-instruction over VMsgt_vi
1216 void VMsge_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1217
1218 // Vector vsaddu instructions, funct6 = 0b100000
1219 void VSaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1220 void VSaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1221 void VSaddu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1222
1223 // Vector vsadd instructions, funct6 = 0b100001
1224 void VSadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1225 void VSadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1226 void VSadd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1227
1228 // Vector vssubu instructions, funct6 = 0b100010
1229 void VSsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1230 void VSsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1231
1232 // Vector vssub instructions, funct6 = 0b100011
1233 void VSsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1234 void VSsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1235
1236 // Vector vsll instructions, funct6 = 0b100101
1237 void VSll_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1238 void VSll_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1239 void VSll_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1240
1241 // Vector vsmul instructions, funct6 = 0b100111
1242 void VSmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1243 void VSmul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1244
1245 // Vector vmv<nr>r.v instructions, funct6 = 0b100111
1246 void Vmv1r_v(VRegister vd, VRegister vs2);
1247 void Vmv2r_v(VRegister vd, VRegister vs2);
1248 void Vmv4r_v(VRegister vd, VRegister vs2);
1249 void Vmv8r_v(VRegister vd, VRegister vs2);
1250
1251 // Vector vsrl instructions, funct6 = 0b101000
1252 void VSrl_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1253 void VSrl_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1254 void VSrl_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1255
1256 // Vector vsra instructions, funct6 = 0b101001
1257 void VSra_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1258 void VSra_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1259 void VSra_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1260
1261 // Vector vssrl instructions, funct6 = 0b101010
1262 void VSsrl_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1263 void VSsrl_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1264 void VSsrl_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1265
1266 // Vector vssra instructions, funct6 = 0b101011
1267 void VSsra_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1268 void VSsra_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1269 void VSsra_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1270
1271 // Vector vnsrl instructions, funct6 = 0b101100
1272 void VNsrl_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1273 void VNsrl_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1274 void VNsrl_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1275
1276 // Pseudo-instruction over VNsrl_wx
1277 void VNcvt_x_x_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1278
1279 // Vector vnsra instructions, funct6 = 0b101101
1280 void VNsra_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1281 void VNsra_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1282 void VNsra_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1283
1284 // Vector vnclipu instructions, funct6 = 0b101110
1285 void VNclipu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1286 void VNclipu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1287 void VNclipu_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1288
1289 // Vector vnclip instructions, funct6 = 0b101111
1290 void VNclip_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1291 void VNclip_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1292 void VNclip_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1293
1294 // Vector vwredsumu instructions, funct6 = 0b110000
1295 void VWredsumu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1296
1297 // Vector vwredsum instructions, funct6 = 0b110001
1298 void VWredsum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1299
1300 // Vector vredsum instructions, funct6 = 0b000000
1301 void VRedsum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1302
1303 // Vector vredand instructions, funct6 = 0b000001
1304 void VRedand_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1305
1306 // Vector vredor instructions, funct6 = 0b000010
1307 void VRedor_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1308
1309 // Vector vredxor instructions, funct6 = 0b000011
1310 void VRedxor_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1311
1312 // Vector vredminu instructions, funct6 = 0b000100
1313 void VRedminu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1314
1315 // Vector vredmin instructions, funct6 = 0b000101
1316 void VRedmin_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1317
1318 // Vector vredmaxu instructions, funct6 = 0b000110
1319 void VRedmaxu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1320
1321 // Vector vredmax instructions, funct6 = 0b000111
1322 void VRedmax_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1323
1324 // Vector vaaddu instructions, funct6 = 0b001000
1325 void VAaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1326 void VAaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1327
1328 // Vector vaadd instructions, funct6 = 0b001001
1329 void VAadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1330 void VAadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1331
1332 // Vector vasubu instructions, funct6 = 0b001010
1333 void VAsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1334 void VAsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1335
1336 // Vector vasub instructions, funct6 = 0b001011
1337 void VAsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1338 void VAsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1339
1340 // Vector vslide1up instructions, funct6 = 0b001110
1341 void VSlide1up_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1342
1343 // Vector vslide1down instructions, funct6 = 0b001111
1344 void VSlide1down_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1345
1346 // Vector vcompress instructions, funct6 = 0b010111
1347 void VCompress_vm(VRegister vd, VRegister vs2, VRegister vs1);
1348
1349 // Vector vmandn instructions, funct6 = 0b011000
1350 void VMandn_mm(VRegister vd, VRegister vs2, VRegister vs1);
1351
1352 // Vector vmand instructions, funct6 = 0b011001
1353 void VMand_mm(VRegister vd, VRegister vs2, VRegister vs1);
1354
1355 // Pseudo-instruction over VMand_mm
1356 void VMmv_m(VRegister vd, VRegister vs2);
1357
1358 // Vector vmor instructions, funct6 = 0b011010
1359 void VMor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1360
1361 // Vector vmxor instructions, funct6 = 0b011011
1362 void VMxor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1363
1364 // Pseudo-instruction over VMxor_mm
1365 void VMclr_m(VRegister vd);
1366
1367 // Vector vmorn instructions, funct6 = 0b011100
1368 void VMorn_mm(VRegister vd, VRegister vs2, VRegister vs1);
1369
1370 // Vector vmnand instructions, funct6 = 0b011101
1371 void VMnand_mm(VRegister vd, VRegister vs2, VRegister vs1);
1372
1373 // Pseudo-instruction over VMnand_mm
1374 void VMnot_m(VRegister vd, VRegister vs2);
1375
1376 // Vector vmnor instructions, funct6 = 0b011110
1377 void VMnor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1378
1379 // Vector vmxnor instructions, funct6 = 0b011111
1380 void VMxnor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1381
1382 // Pseudo-instruction over VMxnor_mm
1383 void VMset_m(VRegister vd);
1384
1385 // Vector vdivu instructions, funct6 = 0b100000
1386 void VDivu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1387 void VDivu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1388
1389 // Vector vdiv instructions, funct6 = 0b100001
1390 void VDiv_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1391 void VDiv_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1392
1393 // Vector vremu instructions, funct6 = 0b100010
1394 void VRemu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1395 void VRemu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1396
1397 // Vector vrem instructions, funct6 = 0b100011
1398 void VRem_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1399 void VRem_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1400
1401 // Vector vmulhu instructions, funct6 = 0b100100
1402 void VMulhu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1403 void VMulhu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1404
1405 // Vector vmul instructions, funct6 = 0b100101
1406 void VMul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1407 void VMul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1408
1409 // Vector vmulhsu instructions, funct6 = 0b100110
1410 void VMulhsu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1411 void VMulhsu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1412
1413 // Vector vmulh instructions, funct6 = 0b100111
1414 void VMulh_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1415 void VMulh_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1416
1417 // Vector vmadd instructions, funct6 = 0b101001
1418 void VMadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1419 void VMadd_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1420
1421 // Vector vnmsub instructions, funct6 = 0b101011
1422 void VNmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1423 void VNmsub_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1424
1425 // Vector vmacc instructions, funct6 = 0b101101
1426 void VMacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1427 void VMacc_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1428
1429 // Vector vnmsac instructions, funct6 = 0b101111
1430 void VNmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1431 void VNmsac_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1432
1433 // Vector vwaddu instructions, funct6 = 0b110000
1434 void VWaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1435 void VWaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1436
1437 // Pseudo-instruction over VWaddu_vx
1438 void VWcvtu_x_x_v(VRegister vd, VRegister vs, VM vm = VM::kUnmasked);
1439
1440 // Vector vwadd instructions, funct6 = 0b110001
1441 void VWadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1442 void VWadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1443
1444 // Pseudo-instruction over VWadd_vx
1445 void VWcvt_x_x_v(VRegister vd, VRegister vs, VM vm = VM::kUnmasked);
1446
1447 // Vector vwsubu instructions, funct6 = 0b110010
1448 void VWsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1449 void VWsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1450
1451 // Vector vwsub instructions, funct6 = 0b110011
1452 void VWsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1453 void VWsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1454
1455 // Vector vwaddu.w instructions, funct6 = 0b110100
1456 void VWaddu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1457 void VWaddu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1458
1459 // Vector vwadd.w instructions, funct6 = 0b110101
1460 void VWadd_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1461 void VWadd_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1462
1463 // Vector vwsubu.w instructions, funct6 = 0b110110
1464 void VWsubu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1465 void VWsubu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1466
1467 // Vector vwsub.w instructions, funct6 = 0b110111
1468 void VWsub_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1469 void VWsub_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1470
1471 // Vector vwmulu instructions, funct6 = 0b111000
1472 void VWmulu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1473 void VWmulu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1474
1475 // Vector vwmulsu instructions, funct6 = 0b111010
1476 void VWmulsu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1477 void VWmulsu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1478
1479 // Vector vwmul instructions, funct6 = 0b111011
1480 void VWmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1481 void VWmul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1482
1483 // Vector vwmaccu instructions, funct6 = 0b111100
1484 void VWmaccu_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1485 void VWmaccu_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1486
1487 // Vector vwmacc instructions, funct6 = 0b111101
1488 void VWmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1489 void VWmacc_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1490
1491 // Vector vwmaccus instructions, funct6 = 0b111110
1492 void VWmaccus_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1493
1494 // Vector vwmaccsu instructions, funct6 = 0b111111
1495 void VWmaccsu_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1496 void VWmaccsu_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1497
1498 // Vector vfadd instructions, funct6 = 0b000000
1499 void VFadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1500 void VFadd_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1501
1502 // Vector vfredusum instructions, funct6 = 0b000001
1503 void VFredusum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1504
1505 // Vector vfsub instructions, funct6 = 0b000010
1506 void VFsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1507 void VFsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1508
1509 // Vector vfredosum instructions, funct6 = 0b000011
1510 void VFredosum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1511
1512 // Vector vfmin instructions, funct6 = 0b000100
1513 void VFmin_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1514 void VFmin_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1515
1516 // Vector vfredmin instructions, funct6 = 0b000101
1517 void VFredmin_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1518
1519 // Vector vfmax instructions, funct6 = 0b000110
1520 void VFmax_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1521 void VFmax_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1522
1523 // Vector vfredmax instructions, funct6 = 0b000111
1524 void VFredmax_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1525
1526 // Vector vfsgnj instructions, funct6 = 0b001000
1527 void VFsgnj_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1528 void VFsgnj_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1529
1530 // Vector vfsgnjn instructions, funct6 = 0b001001
1531 void VFsgnjn_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1532 void VFsgnjn_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1533
1534 // Pseudo-instruction over VFsgnjn_vv
1535 void VFneg_v(VRegister vd, VRegister vs);
1536
1537 // Vector vfsgnjx instructions, funct6 = 0b001010
1538 void VFsgnjx_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1539 void VFsgnjx_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1540
1541 // Pseudo-instruction over VFsgnjx_vv
1542 void VFabs_v(VRegister vd, VRegister vs);
1543
1544 // Vector vfslide1up instructions, funct6 = 0b001110
1545 void VFslide1up_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1546
1547 // Vector vfslide1down instructions, funct6 = 0b001111
1548 void VFslide1down_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1549
1550 // Vector vfmerge/vfmv instructions, funct6 = 0b010111
1551 void VFmerge_vfm(VRegister vd, VRegister vs2, FRegister fs1);
1552 void VFmv_v_f(VRegister vd, FRegister fs1);
1553
1554 // Vector vmfeq instructions, funct6 = 0b011000
1555 void VMfeq_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1556 void VMfeq_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1557
1558 // Vector vmfle instructions, funct6 = 0b011001
1559 void VMfle_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1560 void VMfle_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1561
1562 // Pseudo-instruction over VMfle_vv
1563 void VMfge_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1564
1565 // Vector vmflt instructions, funct6 = 0b011011
1566 void VMflt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1567 void VMflt_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1568
1569 // Pseudo-instruction over VMflt_vv
1570 void VMfgt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1571
1572 // Vector vmfne instructions, funct6 = 0b011100
1573 void VMfne_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1574 void VMfne_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1575
1576 // Vector vmfgt instructions, funct6 = 0b011101
1577 void VMfgt_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1578
1579 // Vector vmfge instructions, funct6 = 0b011111
1580 void VMfge_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1581
1582 // Vector vfdiv instructions, funct6 = 0b100000
1583 void VFdiv_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1584 void VFdiv_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1585
1586 // Vector vfrdiv instructions, funct6 = 0b100001
1587 void VFrdiv_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1588
1589 // Vector vfmul instructions, funct6 = 0b100100
1590 void VFmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1591 void VFmul_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1592
1593 // Vector vfrsub instructions, funct6 = 0b100111
1594 void VFrsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1595
1596 // Vector vfmadd instructions, funct6 = 0b101000
1597 void VFmadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1598 void VFmadd_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1599
1600 // Vector vfnmadd instructions, funct6 = 0b101001
1601 void VFnmadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1602 void VFnmadd_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1603
1604 // Vector vfmsub instructions, funct6 = 0b101010
1605 void VFmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1606 void VFmsub_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1607
1608 // Vector vfnmsub instructions, funct6 = 0b101011
1609 void VFnmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1610 void VFnmsub_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1611
1612 // Vector vfmacc instructions, funct6 = 0b101100
1613 void VFmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1614 void VFmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1615
1616 // Vector vfnmacc instructions, funct6 = 0b101101
1617 void VFnmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1618 void VFnmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1619
1620 // Vector vfmsac instructions, funct6 = 0b101110
1621 void VFmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1622 void VFmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1623
1624 // Vector vfnmsac instructions, funct6 = 0b101111
1625 void VFnmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1626 void VFnmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1627
1628 // Vector vfwadd instructions, funct6 = 0b110000
1629 void VFwadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1630 void VFwadd_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1631
1632 // Vector vfwredusum instructions, funct6 = 0b110001
1633 void VFwredusum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1634
1635 // Vector vfwsub instructions, funct6 = 0b110010
1636 void VFwsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1637 void VFwsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1638
1639 // Vector vfwredosum instructions, funct6 = 0b110011
1640 void VFwredosum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1641
1642 // Vector vfwadd.w instructions, funct6 = 0b110100
1643 void VFwadd_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1644 void VFwadd_wf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1645
1646 // Vector vfwsub.w instructions, funct6 = 0b110110
1647 void VFwsub_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1648 void VFwsub_wf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1649
1650 // Vector vfwmul instructions, funct6 = 0b111000
1651 void VFwmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1652 void VFwmul_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1653
1654 // Vector vfwmacc instructions, funct6 = 0b111100
1655 void VFwmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1656 void VFwmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1657
1658 // Vector vfwnmacc instructions, funct6 = 0b111101
1659 void VFwnmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1660 void VFwnmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1661
1662 // Vector vfwmsac instructions, funct6 = 0b111110
1663 void VFwmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1664 void VFwmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1665
1666 // Vector vfwnmsac instructions, funct6 = 0b111111
1667 void VFwnmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1668 void VFwnmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1669
1670 // Vector VRXUNARY0 kind instructions, funct6 = 0b010000
1671 void VMv_s_x(VRegister vd, XRegister rs1);
1672
1673 // Vector VWXUNARY0 kind instructions, funct6 = 0b010000
1674 void VMv_x_s(XRegister rd, VRegister vs2);
1675 void VCpop_m(XRegister rd, VRegister vs2, VM vm = VM::kUnmasked);
1676 void VFirst_m(XRegister rd, VRegister vs2, VM vm = VM::kUnmasked);
1677
1678 // Vector VXUNARY0 kind instructions, funct6 = 0b010010
1679 void VZext_vf8(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1680 void VSext_vf8(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1681 void VZext_vf4(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1682 void VSext_vf4(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1683 void VZext_vf2(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1684 void VSext_vf2(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1685
1686 // Vector VRFUNARY0 kind instructions, funct6 = 0b010000
1687 void VFmv_s_f(VRegister vd, FRegister fs1);
1688
1689 // Vector VWFUNARY0 kind instructions, funct6 = 0b010000
1690 void VFmv_f_s(FRegister fd, VRegister vs2);
1691
1692 // Vector VFUNARY0 kind instructions, funct6 = 0b010010
1693 void VFcvt_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1694 void VFcvt_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1695 void VFcvt_f_xu_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1696 void VFcvt_f_x_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1697 void VFcvt_rtz_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1698 void VFcvt_rtz_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1699 void VFwcvt_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1700 void VFwcvt_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1701 void VFwcvt_f_xu_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1702 void VFwcvt_f_x_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1703 void VFwcvt_f_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1704 void VFwcvt_rtz_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1705 void VFwcvt_rtz_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1706 void VFncvt_xu_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1707 void VFncvt_x_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1708 void VFncvt_f_xu_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1709 void VFncvt_f_x_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1710 void VFncvt_f_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1711 void VFncvt_rod_f_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1712 void VFncvt_rtz_xu_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1713 void VFncvt_rtz_x_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1714
1715 // Vector VFUNARY1 kind instructions, funct6 = 0b010011
1716 void VFsqrt_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1717 void VFrsqrt7_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1718 void VFrec7_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1719 void VFclass_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1720
1721 // Vector VMUNARY0 kind instructions, funct6 = 0b010100
1722 void VMsbf_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1723 void VMsof_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1724 void VMsif_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1725 void VIota_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1726 void VId_v(VRegister vd, VM vm = VM::kUnmasked);
1727
1728 ////////////////////////////// RISC-V Vector Instructions END //////////////////////////////
1729
1730 ////////////////////////////// RV64 MACRO Instructions START ///////////////////////////////
1731 // These pseudo instructions are from "RISC-V Assembly Programmer's Manual".
1732
1733 void Nop();
1734 void Li(XRegister rd, int64_t imm);
1735 void Mv(XRegister rd, XRegister rs);
1736 void Not(XRegister rd, XRegister rs);
1737 void Neg(XRegister rd, XRegister rs);
1738 void NegW(XRegister rd, XRegister rs);
1739 void SextB(XRegister rd, XRegister rs);
1740 void SextH(XRegister rd, XRegister rs);
1741 void SextW(XRegister rd, XRegister rs);
1742 void ZextB(XRegister rd, XRegister rs);
1743 void ZextH(XRegister rd, XRegister rs);
1744 void ZextW(XRegister rd, XRegister rs);
1745 void Seqz(XRegister rd, XRegister rs);
1746 void Snez(XRegister rd, XRegister rs);
1747 void Sltz(XRegister rd, XRegister rs);
1748 void Sgtz(XRegister rd, XRegister rs);
1749 void FMvS(FRegister rd, FRegister rs);
1750 void FAbsS(FRegister rd, FRegister rs);
1751 void FNegS(FRegister rd, FRegister rs);
1752 void FMvD(FRegister rd, FRegister rs);
1753 void FAbsD(FRegister rd, FRegister rs);
1754 void FNegD(FRegister rd, FRegister rs);
1755
1756 // Branch pseudo instructions
1757 void Beqz(XRegister rs, int32_t offset);
1758 void Bnez(XRegister rs, int32_t offset);
1759 void Blez(XRegister rs, int32_t offset);
1760 void Bgez(XRegister rs, int32_t offset);
1761 void Bltz(XRegister rs, int32_t offset);
1762 void Bgtz(XRegister rs, int32_t offset);
1763 void Bgt(XRegister rs, XRegister rt, int32_t offset);
1764 void Ble(XRegister rs, XRegister rt, int32_t offset);
1765 void Bgtu(XRegister rs, XRegister rt, int32_t offset);
1766 void Bleu(XRegister rs, XRegister rt, int32_t offset);
1767
1768 // Jump pseudo instructions
1769 void J(int32_t offset);
1770 void Jal(int32_t offset);
1771 void Jr(XRegister rs);
1772 void Jalr(XRegister rs);
1773 void Jalr(XRegister rd, XRegister rs);
1774 void Ret();
1775
1776 // Pseudo instructions for accessing control and status registers
1777 void RdCycle(XRegister rd);
1778 void RdTime(XRegister rd);
1779 void RdInstret(XRegister rd);
1780 void Csrr(XRegister rd, uint32_t csr);
1781 void Csrw(uint32_t csr, XRegister rs);
1782 void Csrs(uint32_t csr, XRegister rs);
1783 void Csrc(uint32_t csr, XRegister rs);
1784 void Csrwi(uint32_t csr, uint32_t uimm5);
1785 void Csrsi(uint32_t csr, uint32_t uimm5);
1786 void Csrci(uint32_t csr, uint32_t uimm5);
1787
1788 // Load/store macros for arbitrary 32-bit offsets.
1789 void Loadb(XRegister rd, XRegister rs1, int32_t offset);
1790 void Loadh(XRegister rd, XRegister rs1, int32_t offset);
1791 void Loadw(XRegister rd, XRegister rs1, int32_t offset);
1792 void Loadd(XRegister rd, XRegister rs1, int32_t offset);
1793 void Loadbu(XRegister rd, XRegister rs1, int32_t offset);
1794 void Loadhu(XRegister rd, XRegister rs1, int32_t offset);
1795 void Loadwu(XRegister rd, XRegister rs1, int32_t offset);
1796 void Storeb(XRegister rs2, XRegister rs1, int32_t offset);
1797 void Storeh(XRegister rs2, XRegister rs1, int32_t offset);
1798 void Storew(XRegister rs2, XRegister rs1, int32_t offset);
1799 void Stored(XRegister rs2, XRegister rs1, int32_t offset);
1800 void FLoadw(FRegister rd, XRegister rs1, int32_t offset);
1801 void FLoadd(FRegister rd, XRegister rs1, int32_t offset);
1802 void FStorew(FRegister rs2, XRegister rs1, int32_t offset);
1803 void FStored(FRegister rs2, XRegister rs1, int32_t offset);
1804
1805 // Macros for loading constants.
1806 void LoadConst32(XRegister rd, int32_t value);
1807 void LoadConst64(XRegister rd, int64_t value);
1808
1809 // Macros for adding constants.
1810 void AddConst32(XRegister rd, XRegister rs1, int32_t value);
1811 void AddConst64(XRegister rd, XRegister rs1, int64_t value);
1812
1813 // Jumps and branches to a label.
1814 void Beqz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1815 void Bnez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1816 void Blez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1817 void Bgez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1818 void Bltz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1819 void Bgtz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1820 void Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1821 void Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1822 void Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1823 void Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1824 void Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1825 void Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1826 void Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1827 void Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1828 void Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1829 void Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1830 void Jal(XRegister rd, Riscv64Label* label, bool is_bare = false);
1831 void J(Riscv64Label* label, bool is_bare = false);
1832 void Jal(Riscv64Label* label, bool is_bare = false);
1833
1834 // Literal load.
1835 void Loadw(XRegister rd, Literal* literal);
1836 void Loadwu(XRegister rd, Literal* literal);
1837 void Loadd(XRegister rd, Literal* literal);
1838 void FLoadw(FRegister rd, Literal* literal);
1839 void FLoadd(FRegister rd, Literal* literal);
1840
1841 // Illegal instruction that triggers SIGILL.
1842 void Unimp();
1843
1844 /////////////////////////////// RV64 MACRO Instructions END ///////////////////////////////
1845
Bind(Label * label)1846 void Bind(Label* label) override { Bind(down_cast<Riscv64Label*>(label)); }
1847
Jump(Label * label)1848 void Jump([[maybe_unused]] Label* label) override {
1849 UNIMPLEMENTED(FATAL) << "Do not use Jump for RISCV64";
1850 }
1851
Jump(Riscv64Label * label)1852 void Jump(Riscv64Label* label) {
1853 J(label);
1854 }
1855
1856 void Bind(Riscv64Label* label);
1857
1858 // Load label address using PC-relative loads.
1859 void LoadLabelAddress(XRegister rd, Riscv64Label* label);
1860
1861 // Create a new literal with a given value.
1862 // NOTE:Use `Identity<>` to force the template parameter to be explicitly specified.
1863 template <typename T>
NewLiteral(typename Identity<T>::type value)1864 Literal* NewLiteral(typename Identity<T>::type value) {
1865 static_assert(std::is_integral<T>::value, "T must be an integral type.");
1866 return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
1867 }
1868
1869 // Create a new literal with the given data.
1870 Literal* NewLiteral(size_t size, const uint8_t* data);
1871
1872 // Create a jump table for the given labels that will be emitted when finalizing.
1873 // When the table is emitted, offsets will be relative to the location of the table.
1874 // The table location is determined by the location of its label (the label precedes
1875 // the table data) and should be loaded using LoadLabelAddress().
1876 JumpTable* CreateJumpTable(ArenaVector<Riscv64Label*>&& labels);
1877
1878 public:
1879 // Emit slow paths queued during assembly, promote short branches to long if needed,
1880 // and emit branches.
1881 void FinalizeCode() override;
1882
1883 template <typename Reg>
IsShortReg(Reg reg)1884 static inline bool IsShortReg(Reg reg) {
1885 static_assert(std::is_same_v<Reg, XRegister> || std::is_same_v<Reg, FRegister>);
1886 uint32_t uv = enum_cast<uint32_t>(reg) - 8u;
1887 return IsUint<3>(uv);
1888 }
1889
1890 // Returns the current location of a label.
1891 //
1892 // This function must be used instead of `Riscv64Label::GetPosition()`
1893 // which returns assembler's internal data instead of an actual location.
1894 //
1895 // The location can change during branch fixup in `FinalizeCode()`. Before that,
1896 // the location is not final and therefore not very useful to external users,
1897 // so they should preferably retrieve the location only after `FinalizeCode()`.
1898 uint32_t GetLabelLocation(const Riscv64Label* label) const;
1899
1900 // Get the final position of a label after local fixup based on the old position
1901 // recorded before FinalizeCode().
1902 uint32_t GetAdjustedPosition(uint32_t old_position);
1903
1904 private:
ConvertExtensions(const Riscv64InstructionSetFeatures * instruction_set_features)1905 static uint32_t ConvertExtensions(
1906 const Riscv64InstructionSetFeatures* instruction_set_features) {
1907 // The `Riscv64InstructionSetFeatures` currently does not support "Zcb",
1908 // only the original "C" extension. For riscv64 that means "Zca" and "Zcd".
1909 constexpr Riscv64ExtensionMask kCompressedExtensionsMask =
1910 Riscv64ExtensionBit(Riscv64Extension::kZca) | Riscv64ExtensionBit(Riscv64Extension::kZcd);
1911 return
1912 (Riscv64ExtensionBit(Riscv64Extension::kLoadStore)) |
1913 (Riscv64ExtensionBit(Riscv64Extension::kZifencei)) |
1914 (Riscv64ExtensionBit(Riscv64Extension::kM)) |
1915 (Riscv64ExtensionBit(Riscv64Extension::kA)) |
1916 (Riscv64ExtensionBit(Riscv64Extension::kZicsr)) |
1917 (Riscv64ExtensionBit(Riscv64Extension::kF)) |
1918 (Riscv64ExtensionBit(Riscv64Extension::kD)) |
1919 (instruction_set_features->HasZba() ? Riscv64ExtensionBit(Riscv64Extension::kZba) : 0u) |
1920 (instruction_set_features->HasZbb() ? Riscv64ExtensionBit(Riscv64Extension::kZbb) : 0u) |
1921 (instruction_set_features->HasZbs() ? Riscv64ExtensionBit(Riscv64Extension::kZbs) : 0u) |
1922 (instruction_set_features->HasVector() ? Riscv64ExtensionBit(Riscv64Extension::kV) : 0u) |
1923 (instruction_set_features->HasCompressed() ? kCompressedExtensionsMask : 0u);
1924 }
1925
AssertExtensionsEnabled(Riscv64Extension ext)1926 void AssertExtensionsEnabled(Riscv64Extension ext) {
1927 DCHECK(IsExtensionEnabled(ext))
1928 << "ext=" << enum_cast<>(ext) << " enabled=0x" << std::hex << enabled_extensions_;
1929 }
1930
1931 template <typename... OtherExt>
AssertExtensionsEnabled(Riscv64Extension ext,OtherExt...other_ext)1932 void AssertExtensionsEnabled(Riscv64Extension ext, OtherExt... other_ext) {
1933 AssertExtensionsEnabled(ext);
1934 AssertExtensionsEnabled(other_ext...);
1935 }
1936
1937 enum BranchCondition : uint8_t {
1938 kCondEQ,
1939 kCondNE,
1940 kCondLT,
1941 kCondGE,
1942 kCondLE,
1943 kCondGT,
1944 kCondLTU,
1945 kCondGEU,
1946 kCondLEU,
1947 kCondGTU,
1948 kUncond,
1949 };
1950
1951 // Note that PC-relative literal loads are handled as pseudo branches because they need
1952 // to be emitted after branch relocation to use correct offsets.
1953 class Branch {
1954 public:
1955 enum Type : uint8_t {
1956 // Compressed branches (can be promoted to longer)
1957 kCondCBranch,
1958 kUncondCBranch,
1959 // Compressed branches (can't be promoted to longer)
1960 kBareCondCBranch,
1961 kBareUncondCBranch,
1962
1963 // Short branches (can be promoted to longer).
1964 kCondBranch,
1965 kUncondBranch,
1966 kCall,
1967 // Short branches (can't be promoted to longer).
1968 kBareCondBranch,
1969 kBareUncondBranch,
1970 kBareCall,
1971
1972 // Medium branches (can be promoted to long).
1973 // Compressed version
1974 kCondCBranch21,
1975 kCondBranch21,
1976
1977 // Long branches.
1978 kLongCondCBranch,
1979 kLongCondBranch,
1980 kLongUncondBranch,
1981 kLongCall,
1982
1983 // Label.
1984 kLabel,
1985
1986 // Literals.
1987 kLiteral,
1988 kLiteralUnsigned,
1989 kLiteralLong,
1990 kLiteralFloat,
1991 kLiteralDouble,
1992 };
1993
1994 // Bit sizes of offsets defined as enums to minimize chance of typos.
1995 enum OffsetBits {
1996 kOffset9 = 9,
1997 kOffset12 = 12,
1998 kOffset13 = 13,
1999 kOffset21 = 21,
2000 kOffset32 = 32,
2001 };
2002
2003 static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_
2004 static constexpr uint32_t kMaxBranchLength = 12; // In bytes.
2005
2006 struct BranchInfo {
2007 // Branch length in bytes.
2008 uint32_t length;
2009 // The offset in bytes of the PC used in the (only) PC-relative instruction from
2010 // the start of the branch sequence. RISC-V always uses the address of the PC-relative
2011 // instruction as the PC, so this is essentially the offset of that instruction.
2012 uint32_t pc_offset;
2013 // How large (in bits) a PC-relative offset can be for a given type of branch.
2014 OffsetBits offset_size;
2015 };
2016 static const BranchInfo branch_info_[/* Type */];
2017
2018 // Unconditional branch or call.
2019 Branch(
2020 uint32_t location, uint32_t target, XRegister rd, bool is_bare, bool compression_allowed);
2021 // Conditional branch.
2022 Branch(uint32_t location,
2023 uint32_t target,
2024 BranchCondition condition,
2025 XRegister lhs_reg,
2026 XRegister rhs_reg,
2027 bool is_bare,
2028 bool compression_allowed);
2029 // Label address or literal.
2030 Branch(uint32_t location, uint32_t target, XRegister rd, Type label_or_literal_type);
2031 Branch(uint32_t location, uint32_t target, FRegister rd, Type literal_type);
2032
2033 // Some conditional branches with lhs = rhs are effectively NOPs, while some
2034 // others are effectively unconditional.
2035 static bool IsNop(BranchCondition condition, XRegister lhs, XRegister rhs);
2036 static bool IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs);
2037 static bool IsCompressed(Type type);
2038
2039 static BranchCondition OppositeCondition(BranchCondition cond);
2040
2041 Type GetType() const;
2042 Type GetOldType() const;
2043 BranchCondition GetCondition() const;
2044 XRegister GetLeftRegister() const;
2045 XRegister GetRightRegister() const;
2046 XRegister GetNonZeroRegister() const;
2047 FRegister GetFRegister() const;
2048 uint32_t GetTarget() const;
2049 uint32_t GetLocation() const;
2050 uint32_t GetOldLocation() const;
2051 uint32_t GetLength() const;
2052 uint32_t GetOldLength() const;
2053 uint32_t GetEndLocation() const;
2054 uint32_t GetOldEndLocation() const;
2055 bool IsBare() const;
2056 bool IsResolved() const;
2057
2058 uint32_t NextBranchId() const;
2059
2060 // Checks if condition meets compression requirements
2061 bool IsCompressableCondition() const;
2062
2063 // Returns the bit size of the signed offset that the branch instruction can handle.
2064 OffsetBits GetOffsetSize() const;
2065
2066 // Calculates the distance between two byte locations in the assembler buffer and
2067 // returns the number of bits needed to represent the distance as a signed integer.
2068 static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
2069
2070 // Resolve a branch when the target is known.
2071 void Resolve(uint32_t target);
2072
2073 // Relocate a branch by a given delta if needed due to expansion of this or another
2074 // branch at a given location by this delta (just changes location_ and target_).
2075 void Relocate(uint32_t expand_location, uint32_t delta);
2076
2077 // If necessary, updates the type by promoting a short branch to a longer branch
2078 // based on the branch location and target. Returns the amount (in bytes) by
2079 // which the branch size has increased.
2080 uint32_t PromoteIfNeeded();
2081
2082 // Returns the offset into assembler buffer that shall be used as the base PC for
2083 // offset calculation. RISC-V always uses the address of the PC-relative instruction
2084 // as the PC, so this is essentially the location of that instruction.
2085 uint32_t GetOffsetLocation() const;
2086
2087 // Calculates and returns the offset ready for encoding in the branch instruction(s).
2088 int32_t GetOffset() const;
2089
2090 // Link with the next branch
2091 void LinkToList(uint32_t next_branch_id);
2092
2093 private:
2094 // Completes branch construction by determining and recording its type.
2095 void InitializeType(Type initial_type);
2096 // Helper for the above.
2097 void InitShortOrLong(OffsetBits ofs_size, std::initializer_list<Type> types);
2098
2099 uint32_t old_location_; // Offset into assembler buffer in bytes.
2100 uint32_t location_; // Offset into assembler buffer in bytes.
2101 uint32_t target_; // Offset into assembler buffer in bytes.
2102
2103 XRegister lhs_reg_; // Left-hand side register in conditional branches or
2104 // destination register in calls or literals.
2105 XRegister rhs_reg_; // Right-hand side register in conditional branches.
2106 FRegister freg_; // Destination register in FP literals.
2107 BranchCondition condition_; // Condition for conditional branches.
2108
2109 Type type_; // Current type of the branch.
2110 Type old_type_; // Initial type of the branch.
2111
2112 bool compression_allowed_;
2113
2114 // Id of the next branch bound to the same label in singly-linked zero-terminated list
2115 // NOTE: encoded the same way as a position in a linked Label (id + sizeof(void*))
2116 // Label itself is used to hold the 'head' of this list
2117 uint32_t next_branch_id_;
2118 };
2119
2120 // Branch and literal fixup.
2121
2122 void EmitBcond(BranchCondition cond, XRegister rs, XRegister rt, int32_t offset);
2123 void EmitBranch(Branch* branch);
2124 void EmitBranches();
2125 void EmitJumpTables();
2126 void EmitLiterals();
2127
2128 void FinalizeLabeledBranch(Riscv64Label* label);
2129 void Bcond(Riscv64Label* label,
2130 bool is_bare,
2131 BranchCondition condition,
2132 XRegister lhs,
2133 XRegister rhs);
2134 void Buncond(Riscv64Label* label, XRegister rd, bool is_bare);
2135 template <typename XRegisterOrFRegister>
2136 void LoadLiteral(Literal* literal, XRegisterOrFRegister rd, Branch::Type literal_type);
2137
2138 Branch* GetBranch(uint32_t branch_id);
2139 const Branch* GetBranch(uint32_t branch_id) const;
2140
2141 void ReserveJumpTableSpace();
2142 void PromoteBranches();
2143 void PatchCFI();
2144
2145 // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
2146 template <typename T>
Emit(T value)2147 void Emit(T value) {
2148 static_assert(std::is_same_v<T, uint32_t> || std::is_same_v<T, uint16_t>,
2149 "Only Integer types are allowed");
2150 if (overwriting_) {
2151 // Branches to labels are emitted into their placeholders here.
2152 buffer_.Store<T>(overwrite_location_, value);
2153 overwrite_location_ += sizeof(T);
2154 } else {
2155 // Other instructions are simply appended at the end here.
2156 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2157 buffer_.Emit<T>(value);
2158 }
2159 }
2160
Emit16(uint32_t value)2161 void Emit16(uint32_t value) { Emit(dchecked_integral_cast<uint16_t>(value)); }
Emit32(uint32_t value)2162 void Emit32(uint32_t value) { Emit(value); }
2163
2164 // Adjust base register and offset if needed for load/store with a large offset.
2165 void AdjustBaseAndOffset(XRegister& base, int32_t& offset, ScratchRegisterScope& srs);
2166
2167 // Helper templates for loads/stores with 32-bit offsets.
2168 template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
2169 void LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset);
2170 template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
2171 void StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset);
2172 template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
2173 void FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset);
2174 template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
2175 void FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset);
2176
2177 // Implementation helper for `Li()`, `LoadConst32()` and `LoadConst64()`.
2178 void LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp);
2179
2180 // RVV constants and helpers
2181
2182 enum class Nf : uint32_t {
2183 k1 = 0b000,
2184 k2 = 0b001,
2185 k3 = 0b010,
2186 k4 = 0b011,
2187 k5 = 0b100,
2188 k6 = 0b101,
2189 k7 = 0b110,
2190 k8 = 0b111,
2191 };
2192
2193 enum class VAIEncoding : uint32_t {
2194 // ----Operands---- | Type of Scalar | Instruction type
2195 kOPIVV = 0b000, // vector-vector | -- | R-type
2196 kOPFVV = 0b001, // vector-vector | -- | R-type
2197 kOPMVV = 0b010, // vector-vector | -- | R-type
2198 kOPIVI = 0b011, // vector-immediate | imm[4:0] | R-type
2199 kOPIVX = 0b100, // vector-scalar | GPR x register rs1 | R-type
2200 kOPFVF = 0b101, // vector-scalar | FP f register rs1 | R-type
2201 kOPMVX = 0b110, // vector-scalar | GPR x register rs1 | R-type
2202 kOPCFG = 0b111, // scalars-imms | GPR x register rs1 & rs2/imm | R/I-type
2203 };
2204
2205 enum class MemAddressMode : uint32_t {
2206 kUnitStride = 0b00,
2207 kIndexedUnordered = 0b01,
2208 kStrided = 0b10,
2209 kIndexedOrdered = 0b11,
2210 };
2211
2212 enum class VectorWidth : uint32_t {
2213 k8 = 0b000,
2214 k16 = 0b101,
2215 k32 = 0b110,
2216 k64 = 0b111,
2217
2218 kMask = 0b000,
2219 kWholeR = 0b000,
2220 };
2221
EncodeRVVMemF7(const Nf nf,const uint32_t mew,const MemAddressMode mop,const VM vm)2222 static constexpr uint32_t EncodeRVVMemF7(const Nf nf,
2223 const uint32_t mew,
2224 const MemAddressMode mop,
2225 const VM vm) {
2226 DCHECK(IsUint<3>(enum_cast<uint32_t>(nf)));
2227 DCHECK(IsUint<1>(mew));
2228 DCHECK(IsUint<2>(enum_cast<uint32_t>(mop)));
2229 DCHECK(IsUint<1>(enum_cast<uint32_t>(vm)));
2230
2231 return enum_cast<uint32_t>(nf) << 4 | mew << 3 | enum_cast<uint32_t>(mop) << 1 |
2232 enum_cast<uint32_t>(vm);
2233 }
2234
EncodeRVVF7(const uint32_t funct6,const VM vm)2235 static constexpr uint32_t EncodeRVVF7(const uint32_t funct6, const VM vm) {
2236 DCHECK(IsUint<6>(funct6));
2237 return funct6 << 1 | enum_cast<uint32_t>(vm);
2238 }
2239
2240 template <unsigned kWidth>
EncodeIntWidth(const int32_t imm)2241 static constexpr uint32_t EncodeIntWidth(const int32_t imm) {
2242 DCHECK(IsInt<kWidth>(imm));
2243 return static_cast<uint32_t>(imm) & MaskLeastSignificant<uint32_t>(kWidth);
2244 }
2245
EncodeInt5(const int32_t imm)2246 static constexpr uint32_t EncodeInt5(const int32_t imm) { return EncodeIntWidth<5>(imm); }
EncodeInt6(const int32_t imm)2247 static constexpr uint32_t EncodeInt6(const int32_t imm) { return EncodeIntWidth<6>(imm); }
2248
2249 template <typename Reg>
EncodeShortReg(const Reg reg)2250 static constexpr uint32_t EncodeShortReg(const Reg reg) {
2251 DCHECK(IsShortReg(reg));
2252 return enum_cast<uint32_t>(reg) - 8u;
2253 }
2254
2255 // Rearrange given offset in the way {offset[0] | offset[1]}
EncodeOffset0_1(int32_t offset)2256 static constexpr uint32_t EncodeOffset0_1(int32_t offset) {
2257 uint32_t u_offset = static_cast<uint32_t>(offset);
2258 DCHECK(IsUint<2>(u_offset));
2259
2260 return u_offset >> 1 | (u_offset & 1u) << 1;
2261 }
2262
2263 // Rearrange given offset, scaled by 4, in the way {offset[5:2] | offset[7:6]}
ExtractOffset52_76(int32_t offset)2264 static constexpr uint32_t ExtractOffset52_76(int32_t offset) {
2265 DCHECK(IsAligned<4>(offset)) << "Offset should be scalable by 4";
2266
2267 uint32_t u_offset = static_cast<uint32_t>(offset);
2268 DCHECK(IsUint<6 + 2>(u_offset));
2269
2270 uint32_t imm_52 = BitFieldExtract(u_offset, 2, 4);
2271 uint32_t imm_76 = BitFieldExtract(u_offset, 6, 2);
2272
2273 return BitFieldInsert(imm_76, imm_52, 2, 4);
2274 }
2275
2276 // Rearrange given offset, scaled by 8, in the way {offset[5:3] | offset[8:6]}
ExtractOffset53_86(int32_t offset)2277 static constexpr uint32_t ExtractOffset53_86(int32_t offset) {
2278 DCHECK(IsAligned<8>(offset)) << "Offset should be scalable by 8";
2279
2280 uint32_t u_offset = static_cast<uint32_t>(offset);
2281 DCHECK(IsUint<6 + 3>(u_offset));
2282
2283 uint32_t imm_53 = BitFieldExtract(u_offset, 3, 3);
2284 uint32_t imm_86 = BitFieldExtract(u_offset, 6, 3);
2285
2286 return BitFieldInsert(imm_86, imm_53, 3, 3);
2287 }
2288
2289 // Rearrange given offset, scaled by 4, in the way {offset[5:2] | offset[6]}
ExtractOffset52_6(int32_t offset)2290 static constexpr uint32_t ExtractOffset52_6(int32_t offset) {
2291 DCHECK(IsAligned<4>(offset)) << "Offset should be scalable by 4";
2292
2293 uint32_t u_offset = static_cast<uint32_t>(offset);
2294 DCHECK(IsUint<5 + 2>(u_offset));
2295
2296 uint32_t imm_52 = BitFieldExtract(u_offset, 2, 4);
2297 uint32_t imm_6 = BitFieldExtract(u_offset, 6, 1);
2298
2299 return BitFieldInsert(imm_6, imm_52, 1, 4);
2300 }
2301
2302 // Rearrange given offset, scaled by 8, in the way {offset[5:3], offset[7:6]}
ExtractOffset53_76(int32_t offset)2303 static constexpr uint32_t ExtractOffset53_76(int32_t offset) {
2304 DCHECK(IsAligned<8>(offset)) << "Offset should be scalable by 4";
2305
2306 uint32_t u_offset = static_cast<uint32_t>(offset);
2307 DCHECK(IsUint<5 + 3>(u_offset));
2308
2309 uint32_t imm_53 = BitFieldExtract(u_offset, 3, 3);
2310 uint32_t imm_76 = BitFieldExtract(u_offset, 6, 2);
2311
2312 return BitFieldInsert(imm_76, imm_53, 2, 3);
2313 }
2314
IsImmCLuiEncodable(uint32_t uimm)2315 static constexpr bool IsImmCLuiEncodable(uint32_t uimm) {
2316 // Instruction c.lui is odd and its immediate value is a bit tricky
2317 // Its value is not a full 32 bits value, but its bits [31:12]
2318 // (where the bit 17 marks the sign bit) shifted towards the bottom i.e. bits [19:0]
2319 // are the meaningful ones. Since that we want a signed non-zero 6-bit immediate to
2320 // keep values in the range [0, 0x1f], and the range [0xfffe0, 0xfffff] for negative values
2321 // since the sign bit was bit 17 (which is now bit 5 and replicated in the higher bits too)
2322 // Also encoding with immediate = 0 is reserved
2323 // For more details please see 16.5 chapter is the specification
2324
2325 return uimm != 0u && (IsUint<5>(uimm) || IsUint<5>(uimm - 0xfffe0u));
2326 }
2327
2328 // Emit helpers.
2329
2330 // I-type instruction:
2331 //
2332 // 31 20 19 15 14 12 11 7 6 0
2333 // -----------------------------------------------------------------
2334 // [ . . . . . . . . . . . | . . . . | . . | . . . . | . . . . . . ]
2335 // [ imm11:0 rs1 funct3 rd opcode ]
2336 // -----------------------------------------------------------------
2337 template <typename Reg1, typename Reg2>
EmitI(int32_t imm12,Reg1 rs1,uint32_t funct3,Reg2 rd,uint32_t opcode)2338 void EmitI(int32_t imm12, Reg1 rs1, uint32_t funct3, Reg2 rd, uint32_t opcode) {
2339 DCHECK(IsInt<12>(imm12)) << imm12;
2340 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2341 DCHECK(IsUint<3>(funct3));
2342 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2343 DCHECK(IsUint<7>(opcode));
2344 uint32_t encoding = static_cast<uint32_t>(imm12) << 20 | static_cast<uint32_t>(rs1) << 15 |
2345 funct3 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
2346 Emit32(encoding);
2347 }
2348
2349 // R-type instruction:
2350 //
2351 // 31 25 24 20 19 15 14 12 11 7 6 0
2352 // -----------------------------------------------------------------
2353 // [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2354 // [ funct7 rs2 rs1 funct3 rd opcode ]
2355 // -----------------------------------------------------------------
2356 template <typename Reg1, typename Reg2, typename Reg3>
EmitR(uint32_t funct7,Reg1 rs2,Reg2 rs1,uint32_t funct3,Reg3 rd,uint32_t opcode)2357 void EmitR(uint32_t funct7, Reg1 rs2, Reg2 rs1, uint32_t funct3, Reg3 rd, uint32_t opcode) {
2358 DCHECK(IsUint<7>(funct7));
2359 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2360 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2361 DCHECK(IsUint<3>(funct3));
2362 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2363 DCHECK(IsUint<7>(opcode));
2364 uint32_t encoding = funct7 << 25 | static_cast<uint32_t>(rs2) << 20 |
2365 static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
2366 static_cast<uint32_t>(rd) << 7 | opcode;
2367 Emit32(encoding);
2368 }
2369
2370 // R-type instruction variant for floating-point fused multiply-add/sub (F[N]MADD/ F[N]MSUB):
2371 //
2372 // 31 27 25 24 20 19 15 14 12 11 7 6 0
2373 // -----------------------------------------------------------------
2374 // [ . . . . | . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2375 // [ rs3 fmt rs2 rs1 funct3 rd opcode ]
2376 // -----------------------------------------------------------------
2377 template <typename Reg1, typename Reg2, typename Reg3, typename Reg4>
EmitR4(Reg1 rs3,uint32_t fmt,Reg2 rs2,Reg3 rs1,uint32_t funct3,Reg4 rd,uint32_t opcode)2378 void EmitR4(
2379 Reg1 rs3, uint32_t fmt, Reg2 rs2, Reg3 rs1, uint32_t funct3, Reg4 rd, uint32_t opcode) {
2380 DCHECK(IsUint<5>(static_cast<uint32_t>(rs3)));
2381 DCHECK(IsUint<2>(fmt));
2382 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2383 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2384 DCHECK(IsUint<3>(funct3));
2385 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2386 DCHECK(IsUint<7>(opcode));
2387 uint32_t encoding = static_cast<uint32_t>(rs3) << 27 | static_cast<uint32_t>(fmt) << 25 |
2388 static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2389 static_cast<uint32_t>(funct3) << 12 | static_cast<uint32_t>(rd) << 7 |
2390 opcode;
2391 Emit32(encoding);
2392 }
2393
2394 // S-type instruction:
2395 //
2396 // 31 25 24 20 19 15 14 12 11 7 6 0
2397 // -----------------------------------------------------------------
2398 // [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2399 // [ imm11:5 rs2 rs1 funct3 imm4:0 opcode ]
2400 // -----------------------------------------------------------------
2401 template <typename Reg1, typename Reg2>
EmitS(int32_t imm12,Reg1 rs2,Reg2 rs1,uint32_t funct3,uint32_t opcode)2402 void EmitS(int32_t imm12, Reg1 rs2, Reg2 rs1, uint32_t funct3, uint32_t opcode) {
2403 DCHECK(IsInt<12>(imm12)) << imm12;
2404 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2405 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2406 DCHECK(IsUint<3>(funct3));
2407 DCHECK(IsUint<7>(opcode));
2408 uint32_t encoding = (static_cast<uint32_t>(imm12) & 0xFE0) << 20 |
2409 static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2410 static_cast<uint32_t>(funct3) << 12 |
2411 (static_cast<uint32_t>(imm12) & 0x1F) << 7 | opcode;
2412 Emit32(encoding);
2413 }
2414
2415 // I-type instruction variant for shifts (SLLI / SRLI / SRAI):
2416 //
2417 // 31 26 25 20 19 15 14 12 11 7 6 0
2418 // -----------------------------------------------------------------
2419 // [ . . . . . | . . . . . | . . . . | . . | . . . . | . . . . . . ]
2420 // [ imm11:6 imm5:0(shamt) rs1 funct3 rd opcode ]
2421 // -----------------------------------------------------------------
EmitI6(uint32_t funct6,uint32_t imm6,XRegister rs1,uint32_t funct3,XRegister rd,uint32_t opcode)2422 void EmitI6(uint32_t funct6,
2423 uint32_t imm6,
2424 XRegister rs1,
2425 uint32_t funct3,
2426 XRegister rd,
2427 uint32_t opcode) {
2428 DCHECK(IsUint<6>(funct6));
2429 DCHECK(IsUint<6>(imm6)) << imm6;
2430 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2431 DCHECK(IsUint<3>(funct3));
2432 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2433 DCHECK(IsUint<7>(opcode));
2434 uint32_t encoding = funct6 << 26 | static_cast<uint32_t>(imm6) << 20 |
2435 static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
2436 static_cast<uint32_t>(rd) << 7 | opcode;
2437 Emit32(encoding);
2438 }
2439
2440 // B-type instruction:
2441 //
2442 // 31 30 25 24 20 19 15 14 12 11 8 7 6 0
2443 // -----------------------------------------------------------------
2444 // [ | . . . . . | . . . . | . . . . | . . | . . . | | . . . . . . ]
2445 // imm12 imm11:5 rs2 rs1 funct3 imm4:1 imm11 opcode ]
2446 // -----------------------------------------------------------------
EmitB(int32_t offset,XRegister rs2,XRegister rs1,uint32_t funct3,uint32_t opcode)2447 void EmitB(int32_t offset, XRegister rs2, XRegister rs1, uint32_t funct3, uint32_t opcode) {
2448 DCHECK_ALIGNED(offset, 2);
2449 DCHECK(IsInt<13>(offset)) << offset;
2450 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2451 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2452 DCHECK(IsUint<3>(funct3));
2453 DCHECK(IsUint<7>(opcode));
2454 uint32_t imm12 = (static_cast<uint32_t>(offset) >> 1) & 0xfffu;
2455 uint32_t encoding = (imm12 & 0x800u) << (31 - 11) | (imm12 & 0x03f0u) << (25 - 4) |
2456 static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2457 static_cast<uint32_t>(funct3) << 12 |
2458 (imm12 & 0xfu) << 8 | (imm12 & 0x400u) >> (10 - 7) | opcode;
2459 Emit32(encoding);
2460 }
2461
2462 // U-type instruction:
2463 //
2464 // 31 12 11 7 6 0
2465 // -----------------------------------------------------------------
2466 // [ . . . . . . . . . . . . . . . . . . . | . . . . | . . . . . . ]
2467 // [ imm31:12 rd opcode ]
2468 // -----------------------------------------------------------------
EmitU(uint32_t imm20,XRegister rd,uint32_t opcode)2469 void EmitU(uint32_t imm20, XRegister rd, uint32_t opcode) {
2470 CHECK(IsUint<20>(imm20)) << imm20;
2471 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2472 DCHECK(IsUint<7>(opcode));
2473 uint32_t encoding = imm20 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
2474 Emit32(encoding);
2475 }
2476
2477 // J-type instruction:
2478 //
2479 // 31 30 21 19 12 11 7 6 0
2480 // -----------------------------------------------------------------
2481 // [ | . . . . . . . . . | | . . . . . . . | . . . . | . . . . . . ]
2482 // imm20 imm10:1 imm11 imm19:12 rd opcode ]
2483 // -----------------------------------------------------------------
EmitJ(int32_t offset,XRegister rd,uint32_t opcode)2484 void EmitJ(int32_t offset, XRegister rd, uint32_t opcode) {
2485 DCHECK_ALIGNED(offset, 2);
2486 CHECK(IsInt<21>(offset)) << offset;
2487 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2488 DCHECK(IsUint<7>(opcode));
2489 uint32_t imm20 = (static_cast<uint32_t>(offset) >> 1) & 0xfffffu;
2490 uint32_t encoding = (imm20 & 0x80000u) << (31 - 19) | (imm20 & 0x03ffu) << 21 |
2491 (imm20 & 0x400u) << (20 - 10) | (imm20 & 0x7f800u) << (12 - 11) |
2492 static_cast<uint32_t>(rd) << 7 | opcode;
2493 Emit32(encoding);
2494 }
2495
2496 // Compressed Instruction Encodings
2497
2498 // CR-type instruction:
2499 //
2500 // 15 12 11 7 6 2 1 0
2501 // ---------------------------------
2502 // [ . . . | . . . . | . . . . | . ]
2503 // [ func4 rd/rs1 rs2 op ]
2504 // ---------------------------------
2505 //
EmitCR(uint32_t funct4,XRegister rd_rs1,XRegister rs2,uint32_t opcode)2506 void EmitCR(uint32_t funct4, XRegister rd_rs1, XRegister rs2, uint32_t opcode) {
2507 DCHECK(IsUint<4>(funct4));
2508 DCHECK(IsUint<5>(static_cast<uint32_t>(rd_rs1)));
2509 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2510 DCHECK(IsUint<2>(opcode));
2511
2512 uint32_t encoding = funct4 << 12 | static_cast<uint32_t>(rd_rs1) << 7 |
2513 static_cast<uint32_t>(rs2) << 2 | opcode;
2514 Emit16(encoding);
2515 }
2516
2517 // CI-type instruction:
2518 //
2519 // 15 13 11 7 6 2 1 0
2520 // ---------------------------------
2521 // [ . . | | . . . . | . . . . | . ]
2522 // [func3 imm rd/rs1 imm op ]
2523 // ---------------------------------
2524 //
2525 template <typename Reg>
EmitCI(uint32_t funct3,Reg rd_rs1,uint32_t imm6,uint32_t opcode)2526 void EmitCI(uint32_t funct3, Reg rd_rs1, uint32_t imm6, uint32_t opcode) {
2527 DCHECK(IsUint<3>(funct3));
2528 DCHECK(IsUint<5>(static_cast<uint32_t>(rd_rs1)));
2529 DCHECK(IsUint<6>(imm6));
2530 DCHECK(IsUint<2>(opcode));
2531
2532 uint32_t immH1 = BitFieldExtract(imm6, 5, 1);
2533 uint32_t immL5 = BitFieldExtract(imm6, 0, 5);
2534
2535 uint32_t encoding =
2536 funct3 << 13 | immH1 << 12 | static_cast<uint32_t>(rd_rs1) << 7 | immL5 << 2 | opcode;
2537 Emit16(encoding);
2538 }
2539
2540 // CSS-type instruction:
2541 //
2542 // 15 13 12 7 6 2 1 0
2543 // ---------------------------------
2544 // [ . . | . . . . . | . . . . | . ]
2545 // [func3 imm6 rs2 op ]
2546 // ---------------------------------
2547 //
2548 template <typename Reg>
EmitCSS(uint32_t funct3,uint32_t offset6,Reg rs2,uint32_t opcode)2549 void EmitCSS(uint32_t funct3, uint32_t offset6, Reg rs2, uint32_t opcode) {
2550 DCHECK(IsUint<3>(funct3));
2551 DCHECK(IsUint<6>(offset6));
2552 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2553 DCHECK(IsUint<2>(opcode));
2554
2555 uint32_t encoding = funct3 << 13 | offset6 << 7 | static_cast<uint32_t>(rs2) << 2 | opcode;
2556 Emit16(encoding);
2557 }
2558
2559 // CIW-type instruction:
2560 //
2561 // 15 13 12 5 4 2 1 0
2562 // ---------------------------------
2563 // [ . . | . . . . . . . | . . | . ]
2564 // [func3 imm8 rd' op ]
2565 // ---------------------------------
2566 //
EmitCIW(uint32_t funct3,uint32_t imm8,XRegister rd_s,uint32_t opcode)2567 void EmitCIW(uint32_t funct3, uint32_t imm8, XRegister rd_s, uint32_t opcode) {
2568 DCHECK(IsUint<3>(funct3));
2569 DCHECK(IsUint<8>(imm8));
2570 DCHECK(IsShortReg(rd_s)) << rd_s;
2571 DCHECK(IsUint<2>(opcode));
2572
2573 uint32_t encoding = funct3 << 13 | imm8 << 5 | EncodeShortReg(rd_s) << 2 | opcode;
2574 Emit16(encoding);
2575 }
2576
2577 // CL/S-type instruction:
2578 //
2579 // 15 13 12 10 9 7 6 5 4 2 1 0
2580 // ---------------------------------
2581 // [ . . | . . | . . | . | . . | . ]
2582 // [func3 imm rs1' imm rds2' op ]
2583 // ---------------------------------
2584 //
2585 template <typename Reg>
EmitCM(uint32_t funct3,uint32_t imm5,XRegister rs1_s,Reg rd_rs2_s,uint32_t opcode)2586 void EmitCM(uint32_t funct3, uint32_t imm5, XRegister rs1_s, Reg rd_rs2_s, uint32_t opcode) {
2587 DCHECK(IsUint<3>(funct3));
2588 DCHECK(IsUint<5>(imm5));
2589 DCHECK(IsShortReg(rs1_s)) << rs1_s;
2590 DCHECK(IsShortReg(rd_rs2_s)) << rd_rs2_s;
2591 DCHECK(IsUint<2>(opcode));
2592
2593 uint32_t immH3 = BitFieldExtract(imm5, 2, 3);
2594 uint32_t immL2 = BitFieldExtract(imm5, 0, 2);
2595
2596 uint32_t encoding = funct3 << 13 | immH3 << 10 | EncodeShortReg(rs1_s) << 7 | immL2 << 5 |
2597 EncodeShortReg(rd_rs2_s) << 2 | opcode;
2598 Emit16(encoding);
2599 }
2600
2601 // CA-type instruction:
2602 //
2603 // 15 10 9 7 6 5 4 2 1 0
2604 // ---------------------------------
2605 // [ . . . . . | . . | . | . . | . ]
2606 // [ funct6 rds1' funct2 rs2' op]
2607 // ---------------------------------
2608 //
EmitCA(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,uint32_t rs2_v,uint32_t opcode)2609 void EmitCA(
2610 uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, uint32_t rs2_v, uint32_t opcode) {
2611 DCHECK(IsUint<6>(funct6));
2612 DCHECK(IsShortReg(rd_rs1_s)) << rd_rs1_s;
2613 DCHECK(IsUint<2>(funct2));
2614 DCHECK(IsUint<3>(rs2_v));
2615 DCHECK(IsUint<2>(opcode));
2616
2617 uint32_t encoding =
2618 funct6 << 10 | EncodeShortReg(rd_rs1_s) << 7 | funct2 << 5 | rs2_v << 2 | opcode;
2619 Emit16(encoding);
2620 }
2621
EmitCAReg(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,XRegister rs2_s,uint32_t opcode)2622 void EmitCAReg(
2623 uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, XRegister rs2_s, uint32_t opcode) {
2624 DCHECK(IsShortReg(rs2_s)) << rs2_s;
2625 EmitCA(funct6, rd_rs1_s, funct2, EncodeShortReg(rs2_s), opcode);
2626 }
2627
EmitCAImm(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,uint32_t funct3,uint32_t opcode)2628 void EmitCAImm(
2629 uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, uint32_t funct3, uint32_t opcode) {
2630 EmitCA(funct6, rd_rs1_s, funct2, funct3, opcode);
2631 }
2632
2633 // CB-type instruction:
2634 //
2635 // 15 13 12 10 9 7 6 2 1 0
2636 // ---------------------------------
2637 // [ . . | . . | . . | . . . . | . ]
2638 // [func3 offset rs1' offset op ]
2639 // ---------------------------------
2640 //
EmitCB(uint32_t funct3,int32_t offset8,XRegister rd_rs1_s,uint32_t opcode)2641 void EmitCB(uint32_t funct3, int32_t offset8, XRegister rd_rs1_s, uint32_t opcode) {
2642 DCHECK(IsUint<3>(funct3));
2643 DCHECK(IsUint<8>(offset8));
2644 DCHECK(IsShortReg(rd_rs1_s)) << rd_rs1_s;
2645 DCHECK(IsUint<2>(opcode));
2646
2647 uint32_t offsetH3 = BitFieldExtract<uint32_t>(offset8, 5, 3);
2648 uint32_t offsetL5 = BitFieldExtract<uint32_t>(offset8, 0, 5);
2649
2650 uint32_t encoding =
2651 funct3 << 13 | offsetH3 << 10 | EncodeShortReg(rd_rs1_s) << 7 | offsetL5 << 2 | opcode;
2652 Emit16(encoding);
2653 }
2654
2655 // Wrappers for EmitCB with different imm bit permutation
EmitCBBranch(uint32_t funct3,int32_t offset,XRegister rs1_s,uint32_t opcode)2656 void EmitCBBranch(uint32_t funct3, int32_t offset, XRegister rs1_s, uint32_t opcode) {
2657 DCHECK(IsInt<9>(offset));
2658 DCHECK_ALIGNED(offset, 2);
2659
2660 uint32_t u_offset = static_cast<uint32_t>(offset);
2661
2662 // offset[8|4:3]
2663 uint32_t offsetH3 = (BitFieldExtract(u_offset, 8, 1) << 2) |
2664 BitFieldExtract(u_offset, 3, 2);
2665 // offset[7:6|2:1|5]
2666 uint32_t offsetL5 = (BitFieldExtract(u_offset, 6, 2) << 3) |
2667 (BitFieldExtract(u_offset, 1, 2) << 1) |
2668 BitFieldExtract(u_offset, 5, 1);
2669
2670 EmitCB(funct3, BitFieldInsert(offsetL5, offsetH3, 5, 3), rs1_s, opcode);
2671 }
2672
EmitCBArithmetic(uint32_t funct3,uint32_t funct2,uint32_t imm,XRegister rd_s,uint32_t opcode)2673 void EmitCBArithmetic(
2674 uint32_t funct3, uint32_t funct2, uint32_t imm, XRegister rd_s, uint32_t opcode) {
2675 uint32_t imm_5 = BitFieldExtract(imm, 5, 1);
2676 uint32_t immH3 = BitFieldInsert(funct2, imm_5, 2, 1);
2677 uint32_t immL5 = BitFieldExtract(imm, 0, 5);
2678
2679 EmitCB(funct3, BitFieldInsert(immL5, immH3, 5, 3), rd_s, opcode);
2680 }
2681
2682 // CJ-type instruction:
2683 //
2684 // 15 13 12 2 1 0
2685 // ---------------------------------
2686 // [ . . | . . . . . . . . . . | . ]
2687 // [func3 jump target 11 op ]
2688 // ---------------------------------
2689 //
EmitCJ(uint32_t funct3,int32_t offset,uint32_t opcode)2690 void EmitCJ(uint32_t funct3, int32_t offset, uint32_t opcode) {
2691 DCHECK_ALIGNED(offset, 2);
2692 DCHECK(IsInt<12>(offset)) << offset;
2693 DCHECK(IsUint<3>(funct3));
2694 DCHECK(IsUint<2>(opcode));
2695
2696 uint32_t uoffset = static_cast<uint32_t>(offset);
2697 // offset[11|4|9:8|10|6|7|3:1|5]
2698 uint32_t jumpt = (BitFieldExtract(uoffset, 11, 1) << 10) |
2699 (BitFieldExtract(uoffset, 4, 1) << 9) |
2700 (BitFieldExtract(uoffset, 8, 2) << 7) |
2701 (BitFieldExtract(uoffset, 10, 1) << 6) |
2702 (BitFieldExtract(uoffset, 6, 1) << 5) |
2703 (BitFieldExtract(uoffset, 7, 1) << 4) |
2704 (BitFieldExtract(uoffset, 1, 3) << 1) |
2705 BitFieldExtract(uoffset, 5, 1);
2706
2707 DCHECK(IsUint<11>(jumpt));
2708
2709 uint32_t encoding = funct3 << 13 | jumpt << 2 | opcode;
2710 Emit16(encoding);
2711 }
2712
2713 ArenaVector<Branch> branches_;
2714
2715 // For checking that we finalize the code only once.
2716 bool finalized_;
2717
2718 // Whether appending instructions at the end of the buffer or overwriting the existing ones.
2719 bool overwriting_;
2720 // The current overwrite location.
2721 uint32_t overwrite_location_;
2722
2723 // Use `std::deque<>` for literal labels to allow insertions at the end
2724 // without invalidating pointers and references to existing elements.
2725 ArenaDeque<Literal> literals_;
2726 ArenaDeque<Literal> long_literals_; // 64-bit literals separated for alignment reasons.
2727
2728 // Jump table list.
2729 ArenaDeque<JumpTable> jump_tables_;
2730
2731 // Data for `GetAdjustedPosition()`, see the description there.
2732 uint32_t last_position_adjustment_;
2733 uint32_t last_old_position_;
2734 uint32_t last_branch_id_;
2735
2736 Riscv64ExtensionMask enabled_extensions_;
2737 uint32_t available_scratch_core_registers_;
2738 uint32_t available_scratch_fp_registers_;
2739
2740 static constexpr uint32_t kXlen = 64;
2741
2742 friend class ScopedExtensionsOverride;
2743 friend class ScratchRegisterScope;
2744
2745 DISALLOW_COPY_AND_ASSIGN(Riscv64Assembler);
2746 };
2747
2748 class ScopedExtensionsOverride {
2749 public:
ScopedExtensionsOverride(Riscv64Assembler * assembler,Riscv64ExtensionMask enabled_extensions)2750 ScopedExtensionsOverride(Riscv64Assembler* assembler, Riscv64ExtensionMask enabled_extensions)
2751 : assembler_(assembler),
2752 old_enabled_extensions_(assembler->enabled_extensions_) {
2753 assembler->enabled_extensions_ = enabled_extensions;
2754 }
2755
~ScopedExtensionsOverride()2756 ~ScopedExtensionsOverride() {
2757 assembler_->enabled_extensions_ = old_enabled_extensions_;
2758 }
2759
2760 protected:
GetEnabledExtensions(Riscv64Assembler * assembler)2761 static Riscv64ExtensionMask GetEnabledExtensions(Riscv64Assembler* assembler) {
2762 return assembler->enabled_extensions_;
2763 }
2764
2765 private:
2766 Riscv64Assembler* const assembler_;
2767 const Riscv64ExtensionMask old_enabled_extensions_;
2768 };
2769
2770 template <Riscv64ExtensionMask kMask>
2771 class ScopedExtensionsRestriction : public ScopedExtensionsOverride {
2772 public:
ScopedExtensionsRestriction(Riscv64Assembler * assembler)2773 explicit ScopedExtensionsRestriction(Riscv64Assembler* assembler)
2774 : ScopedExtensionsOverride(assembler, GetEnabledExtensions(assembler) & kMask) {}
2775 };
2776
2777 template <Riscv64ExtensionMask kMask>
2778 class ScopedExtensionsInclusion : public ScopedExtensionsOverride {
2779 public:
ScopedExtensionsInclusion(Riscv64Assembler * assembler)2780 explicit ScopedExtensionsInclusion(Riscv64Assembler* assembler)
2781 : ScopedExtensionsOverride(assembler, GetEnabledExtensions(assembler) | kMask) {}
2782 };
2783
2784 template <Riscv64ExtensionMask kMask>
2785 using ScopedExtensionsExclusion = ScopedExtensionsRestriction<~kMask>;
2786
2787 using ScopedLrScExtensionsRestriction =
2788 ScopedExtensionsRestriction<kRiscv64LrScSequenceExtensionsMask>;
2789
2790 class ScratchRegisterScope {
2791 public:
ScratchRegisterScope(Riscv64Assembler * assembler)2792 explicit ScratchRegisterScope(Riscv64Assembler* assembler)
2793 : assembler_(assembler),
2794 old_available_scratch_core_registers_(assembler->available_scratch_core_registers_),
2795 old_available_scratch_fp_registers_(assembler->available_scratch_fp_registers_) {}
2796
~ScratchRegisterScope()2797 ~ScratchRegisterScope() {
2798 assembler_->available_scratch_core_registers_ = old_available_scratch_core_registers_;
2799 assembler_->available_scratch_fp_registers_ = old_available_scratch_fp_registers_;
2800 }
2801
2802 // Alocate a scratch `XRegister`. There must be an available register to allocate.
AllocateXRegister()2803 XRegister AllocateXRegister() {
2804 CHECK_NE(assembler_->available_scratch_core_registers_, 0u);
2805 // Allocate the highest available scratch register (prefer TMP(T6) over TMP2(T5)).
2806 uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_core_registers_) - 1u) -
2807 CLZ(assembler_->available_scratch_core_registers_);
2808 assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
2809 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2810 return enum_cast<XRegister>(reg_num);
2811 }
2812
2813 // Free a previously unavailable core register for use as a scratch register.
2814 // This can be an arbitrary register, not necessarly the usual `TMP` or `TMP2`.
FreeXRegister(XRegister reg)2815 void FreeXRegister(XRegister reg) {
2816 uint32_t reg_num = enum_cast<uint32_t>(reg);
2817 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2818 CHECK_EQ((1u << reg_num) & assembler_->available_scratch_core_registers_, 0u);
2819 assembler_->available_scratch_core_registers_ |= 1u << reg_num;
2820 }
2821
2822 // The number of available scratch core registers.
AvailableXRegisters()2823 size_t AvailableXRegisters() {
2824 return POPCOUNT(assembler_->available_scratch_core_registers_);
2825 }
2826
2827 // Make sure a core register is available for use as a scratch register.
IncludeXRegister(XRegister reg)2828 void IncludeXRegister(XRegister reg) {
2829 uint32_t reg_num = enum_cast<uint32_t>(reg);
2830 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2831 assembler_->available_scratch_core_registers_ |= 1u << reg_num;
2832 }
2833
2834 // Make sure a core register is not available for use as a scratch register.
ExcludeXRegister(XRegister reg)2835 void ExcludeXRegister(XRegister reg) {
2836 uint32_t reg_num = enum_cast<uint32_t>(reg);
2837 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2838 assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
2839 }
2840
2841 // Alocate a scratch `FRegister`. There must be an available register to allocate.
AllocateFRegister()2842 FRegister AllocateFRegister() {
2843 CHECK_NE(assembler_->available_scratch_fp_registers_, 0u);
2844 // Allocate the highest available scratch register (same as for core registers).
2845 uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_fp_registers_) - 1u) -
2846 CLZ(assembler_->available_scratch_fp_registers_);
2847 assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
2848 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2849 return enum_cast<FRegister>(reg_num);
2850 }
2851
2852 // Free a previously unavailable FP register for use as a scratch register.
2853 // This can be an arbitrary register, not necessarly the usual `FTMP`.
FreeFRegister(FRegister reg)2854 void FreeFRegister(FRegister reg) {
2855 uint32_t reg_num = enum_cast<uint32_t>(reg);
2856 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2857 CHECK_EQ((1u << reg_num) & assembler_->available_scratch_fp_registers_, 0u);
2858 assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
2859 }
2860
2861 // The number of available scratch FP registers.
AvailableFRegisters()2862 size_t AvailableFRegisters() {
2863 return POPCOUNT(assembler_->available_scratch_fp_registers_);
2864 }
2865
2866 // Make sure an FP register is available for use as a scratch register.
IncludeFRegister(FRegister reg)2867 void IncludeFRegister(FRegister reg) {
2868 uint32_t reg_num = enum_cast<uint32_t>(reg);
2869 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2870 assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
2871 }
2872
2873 // Make sure an FP register is not available for use as a scratch register.
ExcludeFRegister(FRegister reg)2874 void ExcludeFRegister(FRegister reg) {
2875 uint32_t reg_num = enum_cast<uint32_t>(reg);
2876 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2877 assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
2878 }
2879
2880 private:
2881 Riscv64Assembler* const assembler_;
2882 const uint32_t old_available_scratch_core_registers_;
2883 const uint32_t old_available_scratch_fp_registers_;
2884
2885 DISALLOW_COPY_AND_ASSIGN(ScratchRegisterScope);
2886 };
2887
2888 constexpr Riscv64ExtensionMask kRiscv64CompressedExtensionsMask =
2889 Riscv64ExtensionBit(Riscv64Extension::kZca) |
2890 Riscv64ExtensionBit(Riscv64Extension::kZcd) |
2891 Riscv64ExtensionBit(Riscv64Extension::kZcb);
2892
2893 using ScopedNoCInstructions = ScopedExtensionsExclusion<kRiscv64CompressedExtensionsMask>;
2894 using ScopedUseCInstructions = ScopedExtensionsInclusion<kRiscv64CompressedExtensionsMask>;
2895
2896 } // namespace riscv64
2897 } // namespace art
2898
2899 #endif // ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
2900