1/*************************************************************************************** 2 * Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3 * Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4 * Copyright (c) 2020-2021 Peng Cheng Laboratory 5 * 6 * XiangShan is licensed under Mulan PSL v2. 7 * You can use this software according to the terms and conditions of the Mulan PSL v2. 8 * You may obtain a copy of Mulan PSL v2 at: 9 * http://license.coscl.org.cn/MulanPSL2 10 * 11 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14 * 15 * See the Mulan PSL v2 for more details. 16 ***************************************************************************************/ 17 18package xiangshan.backend.decode 19 20import org.chipsalliance.cde.config.Parameters 21import chisel3._ 22import chisel3.util._ 23import freechips.rocketchip.rocket.Instructions 24import freechips.rocketchip.util.uintToBitPat 25import utils._ 26import utility._ 27import xiangshan.ExceptionNO.illegalInstr 28import xiangshan._ 29import xiangshan.backend.fu.fpu.FPU 30import xiangshan.backend.fu.FuType 31import freechips.rocketchip.rocket.Instructions._ 32import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 33import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 34import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul} 35import yunsuan.VpermType 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(4.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={ 43 // only consider non segment indexed load/store 44 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 45 var offset = 1 << (emul - lmul) 46 for (i <- 0 until (1 << emul)) { 47 if (uopIdx == i) { 48 return (i, i / offset) 49 } 50 } 51 } else { // lmul > emul, uop num is depend on lmul * nf 52 var offset = 1 << (lmul - emul) 53 for (i <- 0 until (1 << lmul)) { 54 if (uopIdx == i) { 55 return (i / offset, i) 56 } 57 } 58 } 59 return (0, 0) 60 } 61 // strided load/store 62 var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq() 63 for (emul <- 0 until 4) { 64 for (lmul <- 0 until 4) { 65 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx) 66 var offsetVs2 = offset._1 67 var offsetVd = offset._2 68 combVemulNf :+= (emul, lmul, offsetVs2, offsetVd) 69 } 70 } 71 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 72 case (emul, lmul, offsetVs2, offsetVd) => 73 (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W))) 74 }, BitPat.N(6))) 75 outOffsetVs2 := out(5, 3) 76 outOffsetVd := out(2, 0) 77} 78 79trait VectorConstants { 80 val MAX_VLMUL = 8 81 val VECTOR_TMP_REG_LMUL = 32 // 32~46 -> 15 82 val VECTOR_COMPRESS = 1 // in v0 regfile 83 val MAX_INDEXED_LS_UOPNUM = 64 84} 85 86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 87 val redirect = Input(Bool()) 88 val csrCtrl = Input(new CustomCSRCtrlIO) 89 val vtypeBypass = Input(new VType) 90 // When the first inst in decode vector is complex inst, pass it in 91 val in = Flipped(DecoupledIO(new Bundle { 92 val simpleDecodedInst = new DecodedInst 93 val uopInfo = new UopInfo 94 })) 95 val out = new Bundle { 96 val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst)) 97 } 98 val complexNum = Output(UInt(3.W)) 99} 100 101/** 102 * @author zly 103 */ 104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 105 val io = IO(new DecodeUnitCompIO) 106 107 // alias 108 private val inReady = io.in.ready 109 private val inValid = io.in.valid 110 private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst) 111 private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields) 112 private val inUopInfo = io.in.bits.uopInfo 113 private val outValids = io.out.complexDecodedInsts.map(_.valid) 114 private val outReadys = io.out.complexDecodedInsts.map(_.ready) 115 private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits) 116 private val outComplexNum = io.complexNum 117 118 val maxUopSize = MaxUopSize 119 when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) { 120 when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) { 121 inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType) 122 }.elsewhen(inInstFields.RS1 === 0.U) { 123 inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType) 124 } 125 } 126 127 val latchedInst = RegEnable(inDecodedInst, inValid && inReady) 128 val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady) 129 //input bits 130 private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields) 131 132 val src1 = Cat(0.U(1.W), instFields.RS1) 133 val src2 = Cat(0.U(1.W), instFields.RS2) 134 val dest = Cat(0.U(1.W), instFields.RD) 135 136 val nf = instFields.NF 137 val width = instFields.WIDTH(1, 0) 138 139 //output of DecodeUnit 140 val numOfUop = Wire(UInt(log2Up(maxUopSize).W)) 141 val numOfWB = Wire(UInt(log2Up(maxUopSize).W)) 142 val lmul = Wire(UInt(4.W)) 143 val isVsetSimple = Wire(Bool()) 144 145 val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i))) 146 indexedLSRegOffset.map(_.src := 0.U) 147 148 //pre decode 149 lmul := latchedUopInfo.lmul 150 isVsetSimple := latchedInst.isVset 151 val vlmulReg = latchedInst.vpu.vlmul 152 val vsewReg = latchedInst.vpu.vsew 153 val vstartReg = latchedInst.vpu.vstart 154 155 //Type of uop Div 156 val typeOfSplit = latchedInst.uopSplitType 157 val src1Type = latchedInst.srcType(0) 158 val src1IsImm = src1Type === SrcType.imm 159 val src1IsFp = src1Type === SrcType.fp 160 161 val isVstore = FuType.isVStore(latchedInst.fuType) 162 163 numOfUop := latchedUopInfo.numOfUop 164 numOfWB := latchedUopInfo.numOfWB 165 166 //uops dispatch 167 val s_idle :: s_active :: Nil = Enum(2) 168 val state = RegInit(s_idle) 169 val stateNext = WireDefault(state) 170 val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W)) 171 val uopRes = RegInit(0.U(log2Up(maxUopSize).W)) 172 val uopResNext = WireInit(uopRes) 173 val e64 = 3.U(2.W) 174 val isUsSegment = instFields.MOP === 0.U && ((nf =/= 0.U && instFields.LUMOP === 0.U) || instFields.LUMOP === "b10000".U) 175 val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U 176 val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U 177 178 //uop div up to maxUopSize 179 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 180 val fixedDecodedInst = Wire(Vec(maxUopSize, new DecodedInst)) 181 182 csBundle.foreach { case dst => 183 dst := latchedInst 184 dst.numUops := latchedUopInfo.numOfUop 185 dst.numWB := latchedUopInfo.numOfWB 186 dst.firstUop := false.B 187 dst.lastUop := false.B 188 dst.vlsInstr := false.B 189 } 190 191 csBundle(0).firstUop := true.B 192 csBundle(numOfUop - 1.U).lastUop := true.B 193 194 // when vstart is not zero, the last uop will modify vstart to zero 195 // therefore, blockback and flush pipe 196 csBundle(numOfUop - 1.U).blockBackward := vstartReg =/= 0.U 197 csBundle(0.U).flushPipe := vstartReg =/= 0.U 198 199 switch(typeOfSplit) { 200 is(UopSplitType.VSET) { 201 // In simple decoder, rfWen and vecWen are not set 202 when(isVsetSimple) { 203 // Default 204 // uop0 set rd, never flushPipe 205 csBundle(0).fuType := FuType.vsetiwi.U 206 csBundle(0).flushPipe := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U) 207 csBundle(0).blockBackward := false.B 208 csBundle(0).rfWen := true.B 209 // uop1 set vl, vsetvl will flushPipe 210 csBundle(1).ldest := Vl_IDX.U 211 csBundle(1).vecWen := false.B 212 csBundle(1).vlWen := true.B 213 csBundle(1).flushPipe := false.B 214 csBundle(1).blockBackward := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U) 215 when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 216 // write nothing, uop0 is a nop instruction 217 csBundle(0).rfWen := false.B 218 csBundle(0).fpWen := false.B 219 csBundle(0).vecWen := false.B 220 csBundle(0).vlWen := false.B 221 csBundle(1).fuType := FuType.vsetfwf.U 222 csBundle(1).srcType(0) := SrcType.no 223 csBundle(1).srcType(2) := SrcType.no 224 csBundle(1).srcType(3) := SrcType.no 225 csBundle(1).srcType(4) := SrcType.vp 226 csBundle(1).lsrc(4) := Vl_IDX.U 227 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 228 // uop0: mv vtype gpr to vector region 229 csBundle(0).srcType(0) := SrcType.xp 230 csBundle(0).srcType(1) := SrcType.no 231 csBundle(0).lsrc(0) := src2 232 csBundle(0).lsrc(1) := 0.U 233 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 234 csBundle(0).fuType := FuType.i2v.U 235 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 236 csBundle(0).rfWen := false.B 237 csBundle(0).fpWen := false.B 238 csBundle(0).vecWen := true.B 239 csBundle(0).vlWen := false.B 240 // uop1: uvsetvcfg_vv 241 csBundle(1).fuType := FuType.vsetfwf.U 242 // vl 243 csBundle(1).srcType(0) := SrcType.no 244 csBundle(1).srcType(2) := SrcType.no 245 csBundle(1).srcType(3) := SrcType.no 246 csBundle(1).srcType(4) := SrcType.vp 247 csBundle(1).lsrc(4) := Vl_IDX.U 248 // vtype 249 csBundle(1).srcType(1) := SrcType.vp 250 csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U 251 csBundle(1).vecWen := false.B 252 csBundle(1).vlWen := true.B 253 csBundle(1).ldest := Vl_IDX.U 254 }.elsewhen(dest === 0.U) { 255 // write nothing, uop0 is a nop instruction 256 csBundle(0).rfWen := false.B 257 csBundle(0).fpWen := false.B 258 csBundle(0).vecWen := false.B 259 csBundle(0).vlWen := false.B 260 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) { 261 // because vsetvl may modified src2 when src2 == rd, 262 // we need to modify vd in second uop to avoid dependency 263 // uop0 set vl 264 csBundle(0).fuType := FuType.vsetiwf.U 265 csBundle(0).ldest := Vl_IDX.U 266 csBundle(0).rfWen := false.B 267 csBundle(0).vlWen := true.B 268 // uop1 set rd 269 csBundle(1).fuType := FuType.vsetiwi.U 270 csBundle(1).ldest := dest 271 csBundle(1).rfWen := true.B 272 csBundle(1).vlWen := false.B 273 } 274 // use bypass vtype from vtypeGen 275 csBundle(0).vpu.connectVType(io.vtypeBypass) 276 csBundle(1).vpu.connectVType(io.vtypeBypass) 277 } 278 } 279 is(UopSplitType.VEC_VVV) { 280 for (i <- 0 until MAX_VLMUL) { 281 csBundle(i).lsrc(0) := src1 + i.U 282 csBundle(i).lsrc(1) := src2 + i.U 283 csBundle(i).lsrc(2) := dest + i.U 284 csBundle(i).ldest := dest + i.U 285 csBundle(i).uopIdx := i.U 286 } 287 } 288 is(UopSplitType.VEC_VFV) { 289 /* 290 f to vector move 291 */ 292 csBundle(0).srcType(0) := SrcType.fp 293 csBundle(0).srcType(1) := SrcType.imm 294 csBundle(0).srcType(2) := SrcType.imm 295 csBundle(0).lsrc(1) := 0.U 296 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 297 csBundle(0).fuType := FuType.f2v.U 298 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 299 csBundle(0).vecWen := true.B 300 csBundle(0).vpu.isReverse := false.B 301 /* 302 LMUL 303 */ 304 for (i <- 0 until MAX_VLMUL) { 305 csBundle(i + 1).srcType(0) := SrcType.vp 306 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 307 csBundle(i + 1).lsrc(1) := src2 + i.U 308 csBundle(i + 1).lsrc(2) := dest + i.U 309 csBundle(i + 1).ldest := dest + i.U 310 csBundle(i + 1).uopIdx := i.U 311 } 312 } 313 is(UopSplitType.VEC_EXT2) { 314 for (i <- 0 until MAX_VLMUL / 2) { 315 csBundle(2 * i).lsrc(1) := src2 + i.U 316 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 317 csBundle(2 * i).ldest := dest + (2 * i).U 318 csBundle(2 * i).uopIdx := (2 * i).U 319 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 320 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 321 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 322 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 323 } 324 } 325 is(UopSplitType.VEC_EXT4) { 326 for (i <- 0 until MAX_VLMUL / 4) { 327 csBundle(4 * i).lsrc(1) := src2 + i.U 328 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 329 csBundle(4 * i).ldest := dest + (4 * i).U 330 csBundle(4 * i).uopIdx := (4 * i).U 331 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 332 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 333 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 334 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 335 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 336 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 337 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 338 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 339 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 340 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 341 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 342 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 343 } 344 } 345 is(UopSplitType.VEC_EXT8) { 346 for (i <- 0 until MAX_VLMUL) { 347 csBundle(i).lsrc(1) := src2 348 csBundle(i).lsrc(2) := dest + i.U 349 csBundle(i).ldest := dest + i.U 350 csBundle(i).uopIdx := i.U 351 } 352 } 353 is(UopSplitType.VEC_0XV) { 354 /* 355 i/f to vector move 356 */ 357 csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg) 358 csBundle(0).srcType(1) := SrcType.imm 359 csBundle(0).srcType(2) := SrcType.imm 360 csBundle(0).lsrc(1) := 0.U 361 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 362 csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U) 363 csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 364 csBundle(0).rfWen := false.B 365 csBundle(0).fpWen := false.B 366 csBundle(0).vecWen := true.B 367 /* 368 vmv.s.x 369 */ 370 csBundle(1).srcType(0) := SrcType.vp 371 csBundle(1).srcType(1) := SrcType.imm 372 csBundle(1).srcType(2) := SrcType.vp 373 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 374 csBundle(1).lsrc(1) := 0.U 375 csBundle(1).lsrc(2) := dest 376 csBundle(1).ldest := dest 377 csBundle(1).rfWen := false.B 378 csBundle(1).fpWen := false.B 379 csBundle(1).vecWen := true.B 380 csBundle(1).uopIdx := 0.U 381 } 382 is(UopSplitType.VEC_VXV) { 383 /* 384 i to vector move 385 */ 386 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 387 csBundle(0).srcType(1) := SrcType.imm 388 csBundle(0).srcType(2) := SrcType.imm 389 csBundle(0).lsrc(1) := 0.U 390 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 391 csBundle(0).fuType := FuType.i2v.U 392 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 393 csBundle(0).vecWen := true.B 394 csBundle(0).vpu.isReverse := false.B 395 /* 396 LMUL 397 */ 398 for (i <- 0 until MAX_VLMUL) { 399 csBundle(i + 1).srcType(0) := SrcType.vp 400 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 401 csBundle(i + 1).lsrc(1) := src2 + i.U 402 csBundle(i + 1).lsrc(2) := dest + i.U 403 csBundle(i + 1).ldest := dest + i.U 404 csBundle(i + 1).uopIdx := i.U 405 } 406 } 407 is(UopSplitType.VEC_VVW) { 408 for (i <- 0 until MAX_VLMUL / 2) { 409 csBundle(2 * i).lsrc(0) := src1 + i.U 410 csBundle(2 * i).lsrc(1) := src2 + i.U 411 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 412 csBundle(2 * i).ldest := dest + (2 * i).U 413 csBundle(2 * i).uopIdx := (2 * i).U 414 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 415 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 416 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 417 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 418 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 419 } 420 } 421 is(UopSplitType.VEC_VFW) { 422 /* 423 f to vector move 424 */ 425 csBundle(0).srcType(0) := SrcType.fp 426 csBundle(0).srcType(1) := SrcType.imm 427 csBundle(0).srcType(2) := SrcType.imm 428 csBundle(0).lsrc(1) := 0.U 429 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 430 csBundle(0).fuType := FuType.f2v.U 431 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 432 csBundle(0).rfWen := false.B 433 csBundle(0).fpWen := false.B 434 csBundle(0).vecWen := true.B 435 436 for (i <- 0 until MAX_VLMUL / 2) { 437 csBundle(2 * i + 1).srcType(0) := SrcType.vp 438 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 439 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 440 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 441 csBundle(2 * i + 1).ldest := dest + (2 * i).U 442 csBundle(2 * i + 1).uopIdx := (2 * i).U 443 csBundle(2 * i + 2).srcType(0) := SrcType.vp 444 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 445 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 446 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 447 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 448 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 449 } 450 } 451 is(UopSplitType.VEC_WVW) { 452 for (i <- 0 until MAX_VLMUL / 2) { 453 csBundle(2 * i).lsrc(0) := src1 + i.U 454 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 455 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 456 csBundle(2 * i).ldest := dest + (2 * i).U 457 csBundle(2 * i).uopIdx := (2 * i).U 458 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 459 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 460 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 461 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 462 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 463 } 464 } 465 is(UopSplitType.VEC_VXW) { 466 /* 467 i to vector move 468 */ 469 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 470 csBundle(0).srcType(1) := SrcType.imm 471 csBundle(0).srcType(2) := SrcType.imm 472 csBundle(0).lsrc(1) := 0.U 473 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 474 csBundle(0).fuType := FuType.i2v.U 475 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 476 csBundle(0).vecWen := true.B 477 478 for (i <- 0 until MAX_VLMUL / 2) { 479 csBundle(2 * i + 1).srcType(0) := SrcType.vp 480 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 481 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 482 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 483 csBundle(2 * i + 1).ldest := dest + (2 * i).U 484 csBundle(2 * i + 1).uopIdx := (2 * i).U 485 csBundle(2 * i + 2).srcType(0) := SrcType.vp 486 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 487 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 488 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 489 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 490 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 491 } 492 } 493 is(UopSplitType.VEC_WXW) { 494 /* 495 i to vector move 496 */ 497 csBundle(0).srcType(0) := SrcType.reg 498 csBundle(0).srcType(1) := SrcType.imm 499 csBundle(0).srcType(2) := SrcType.imm 500 csBundle(0).lsrc(1) := 0.U 501 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 502 csBundle(0).fuType := FuType.i2v.U 503 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 504 csBundle(0).vecWen := true.B 505 506 for (i <- 0 until MAX_VLMUL / 2) { 507 csBundle(2 * i + 1).srcType(0) := SrcType.vp 508 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 509 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 510 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 511 csBundle(2 * i + 1).ldest := dest + (2 * i).U 512 csBundle(2 * i + 1).uopIdx := (2 * i).U 513 csBundle(2 * i + 2).srcType(0) := SrcType.vp 514 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 515 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 516 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 517 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 518 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 519 } 520 } 521 is(UopSplitType.VEC_WVV) { 522 for (i <- 0 until MAX_VLMUL / 2) { 523 524 csBundle(2 * i).lsrc(0) := src1 + i.U 525 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 526 csBundle(2 * i).lsrc(2) := dest + i.U 527 csBundle(2 * i).ldest := dest + i.U 528 csBundle(2 * i).uopIdx := (2 * i).U 529 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 530 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 531 csBundle(2 * i + 1).lsrc(2) := dest + i.U 532 csBundle(2 * i + 1).ldest := dest + i.U 533 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 534 } 535 } 536 is(UopSplitType.VEC_WFW) { 537 /* 538 f to vector move 539 */ 540 csBundle(0).srcType(0) := SrcType.fp 541 csBundle(0).srcType(1) := SrcType.imm 542 csBundle(0).srcType(2) := SrcType.imm 543 csBundle(0).lsrc(1) := 0.U 544 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 545 csBundle(0).fuType := FuType.f2v.U 546 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 547 csBundle(0).rfWen := false.B 548 csBundle(0).fpWen := false.B 549 csBundle(0).vecWen := true.B 550 551 for (i <- 0 until MAX_VLMUL / 2) { 552 csBundle(2 * i + 1).srcType(0) := SrcType.vp 553 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 554 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 555 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 556 csBundle(2 * i + 1).ldest := dest + (2 * i).U 557 csBundle(2 * i + 1).uopIdx := (2 * i).U 558 csBundle(2 * i + 2).srcType(0) := SrcType.vp 559 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 560 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 561 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 562 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 563 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 564 } 565 } 566 is(UopSplitType.VEC_WXV) { 567 /* 568 i to vector move 569 */ 570 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 571 csBundle(0).srcType(1) := SrcType.imm 572 csBundle(0).srcType(2) := SrcType.imm 573 csBundle(0).lsrc(1) := 0.U 574 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 575 csBundle(0).fuType := FuType.i2v.U 576 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 577 csBundle(0).vecWen := true.B 578 579 for (i <- 0 until MAX_VLMUL / 2) { 580 csBundle(2 * i + 1).srcType(0) := SrcType.vp 581 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 582 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 583 csBundle(2 * i + 1).lsrc(2) := dest + i.U 584 csBundle(2 * i + 1).ldest := dest + i.U 585 csBundle(2 * i + 1).uopIdx := (2 * i).U 586 csBundle(2 * i + 2).srcType(0) := SrcType.vp 587 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 588 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 589 csBundle(2 * i + 2).lsrc(2) := dest + i.U 590 csBundle(2 * i + 2).ldest := dest + i.U 591 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 592 } 593 } 594 is(UopSplitType.VEC_VVM) { 595 csBundle(0).lsrc(2) := dest 596 csBundle(0).ldest := dest 597 csBundle(0).uopIdx := 0.U 598 for (i <- 1 until MAX_VLMUL) { 599 csBundle(i).lsrc(0) := src1 + i.U 600 csBundle(i).lsrc(1) := src2 + i.U 601 csBundle(i).lsrc(2) := dest 602 csBundle(i).ldest := dest 603 csBundle(i).uopIdx := i.U 604 } 605 } 606 is(UopSplitType.VEC_VFM) { 607 /* 608 f to vector move 609 */ 610 csBundle(0).srcType(0) := SrcType.fp 611 csBundle(0).srcType(1) := SrcType.imm 612 csBundle(0).srcType(2) := SrcType.imm 613 csBundle(0).lsrc(1) := 0.U 614 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 615 csBundle(0).fuType := FuType.f2v.U 616 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 617 csBundle(0).rfWen := false.B 618 csBundle(0).fpWen := false.B 619 csBundle(0).vecWen := true.B 620 //LMUL 621 csBundle(1).srcType(0) := SrcType.vp 622 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 623 csBundle(1).lsrc(2) := dest 624 csBundle(1).ldest := dest 625 csBundle(1).uopIdx := 0.U 626 for (i <- 1 until MAX_VLMUL) { 627 csBundle(i + 1).srcType(0) := SrcType.vp 628 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 629 csBundle(i + 1).lsrc(1) := src2 + i.U 630 csBundle(i + 1).lsrc(2) := dest 631 csBundle(i + 1).ldest := dest 632 csBundle(i + 1).uopIdx := i.U 633 } 634 csBundle(numOfUop - 1.U).ldest := dest 635 } 636 is(UopSplitType.VEC_VXM) { 637 /* 638 i to vector move 639 */ 640 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 641 csBundle(0).srcType(1) := SrcType.imm 642 csBundle(0).srcType(2) := SrcType.imm 643 csBundle(0).lsrc(1) := 0.U 644 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 645 csBundle(0).fuType := FuType.i2v.U 646 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 647 csBundle(0).vecWen := true.B 648 //LMUL 649 csBundle(1).srcType(0) := SrcType.vp 650 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 651 csBundle(1).lsrc(2) := dest 652 csBundle(1).ldest := dest 653 csBundle(1).uopIdx := 0.U 654 for (i <- 1 until MAX_VLMUL) { 655 csBundle(i + 1).srcType(0) := SrcType.vp 656 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 657 csBundle(i + 1).lsrc(1) := src2 + i.U 658 csBundle(i + 1).lsrc(2) := dest 659 csBundle(i + 1).ldest := dest 660 csBundle(i + 1).uopIdx := i.U 661 } 662 csBundle(numOfUop - 1.U).ldest := dest 663 } 664 is(UopSplitType.VEC_SLIDE1UP) { 665 /* 666 i to vector move 667 */ 668 csBundle(0).srcType(0) := SrcType.reg 669 csBundle(0).srcType(1) := SrcType.imm 670 csBundle(0).srcType(2) := SrcType.imm 671 csBundle(0).lsrc(1) := 0.U 672 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 673 csBundle(0).fuType := FuType.i2v.U 674 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 675 csBundle(0).vecWen := true.B 676 //LMUL 677 csBundle(1).srcType(0) := SrcType.vp 678 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 679 csBundle(1).lsrc(2) := dest 680 csBundle(1).ldest := dest 681 csBundle(1).uopIdx := 0.U 682 for (i <- 1 until MAX_VLMUL) { 683 csBundle(i + 1).srcType(0) := SrcType.vp 684 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 685 csBundle(i + 1).lsrc(1) := src2 + i.U 686 csBundle(i + 1).lsrc(2) := dest + i.U 687 csBundle(i + 1).ldest := dest + i.U 688 csBundle(i + 1).uopIdx := i.U 689 } 690 } 691 is(UopSplitType.VEC_FSLIDE1UP) { 692 /* 693 f to vector move 694 */ 695 csBundle(0).srcType(0) := SrcType.fp 696 csBundle(0).srcType(1) := SrcType.imm 697 csBundle(0).srcType(2) := SrcType.imm 698 csBundle(0).lsrc(1) := 0.U 699 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 700 csBundle(0).fuType := FuType.f2v.U 701 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 702 csBundle(0).rfWen := false.B 703 csBundle(0).fpWen := false.B 704 csBundle(0).vecWen := true.B 705 //LMUL 706 csBundle(1).srcType(0) := SrcType.vp 707 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 708 csBundle(1).lsrc(1) := src2 709 csBundle(1).lsrc(2) := dest 710 csBundle(1).ldest := dest 711 csBundle(1).uopIdx := 0.U 712 for (i <- 1 until MAX_VLMUL) { 713 csBundle(i + 1).srcType(0) := SrcType.vp 714 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 715 csBundle(i + 1).lsrc(1) := src2 + i.U 716 csBundle(i + 1).lsrc(2) := dest + i.U 717 csBundle(i + 1).ldest := dest + i.U 718 csBundle(i + 1).uopIdx := i.U 719 } 720 } 721 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 722 /* 723 i to vector move 724 */ 725 csBundle(0).srcType(0) := SrcType.reg 726 csBundle(0).srcType(1) := SrcType.imm 727 csBundle(0).srcType(2) := SrcType.imm 728 csBundle(0).lsrc(1) := 0.U 729 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 730 csBundle(0).fuType := FuType.i2v.U 731 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 732 csBundle(0).vecWen := true.B 733 //LMUL 734 for (i <- 0 until MAX_VLMUL) { 735 csBundle(2 * i + 1).srcType(0) := SrcType.vp 736 csBundle(2 * i + 1).srcType(1) := SrcType.vp 737 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 738 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 739 csBundle(2 * i + 1).lsrc(2) := dest + i.U 740 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 741 csBundle(2 * i + 1).uopIdx := (2 * i).U 742 if (2 * i + 2 < MAX_VLMUL * 2) { 743 csBundle(2 * i + 2).srcType(0) := SrcType.vp 744 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 745 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 746 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 747 csBundle(2 * i + 2).ldest := dest + i.U 748 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 749 } 750 } 751 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 752 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 753 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 754 } 755 is(UopSplitType.VEC_FSLIDE1DOWN) { 756 /* 757 f to vector move 758 */ 759 csBundle(0).srcType(0) := SrcType.fp 760 csBundle(0).srcType(1) := SrcType.imm 761 csBundle(0).srcType(2) := SrcType.imm 762 csBundle(0).lsrc(1) := 0.U 763 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 764 csBundle(0).fuType := FuType.f2v.U 765 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 766 csBundle(0).rfWen := false.B 767 csBundle(0).fpWen := false.B 768 csBundle(0).vecWen := true.B 769 //LMUL 770 for (i <- 0 until MAX_VLMUL) { 771 csBundle(2 * i + 1).srcType(0) := SrcType.vp 772 csBundle(2 * i + 1).srcType(1) := SrcType.vp 773 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 774 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 775 csBundle(2 * i + 1).lsrc(2) := dest + i.U 776 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 777 csBundle(2 * i + 1).uopIdx := (2 * i).U 778 if (2 * i + 2 < MAX_VLMUL * 2) { 779 csBundle(2 * i + 2).srcType(0) := SrcType.vp 780 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 781 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 782 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 783 csBundle(2 * i + 2).ldest := dest + i.U 784 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 785 } 786 } 787 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 788 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 789 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 790 } 791 is(UopSplitType.VEC_VRED) { 792 when(vlmulReg === "b001".U) { 793 csBundle(0).srcType(2) := SrcType.DC 794 csBundle(0).lsrc(0) := src2 + 1.U 795 csBundle(0).lsrc(1) := src2 796 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 797 csBundle(0).uopIdx := 0.U 798 } 799 when(vlmulReg === "b010".U) { 800 csBundle(0).srcType(2) := SrcType.DC 801 csBundle(0).lsrc(0) := src2 + 1.U 802 csBundle(0).lsrc(1) := src2 803 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 804 csBundle(0).uopIdx := 0.U 805 806 csBundle(1).srcType(2) := SrcType.DC 807 csBundle(1).lsrc(0) := src2 + 3.U 808 csBundle(1).lsrc(1) := src2 + 2.U 809 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 810 csBundle(1).uopIdx := 1.U 811 812 csBundle(2).srcType(2) := SrcType.DC 813 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 814 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 815 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 816 csBundle(2).uopIdx := 2.U 817 } 818 when(vlmulReg === "b011".U) { 819 for (i <- 0 until MAX_VLMUL) { 820 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 821 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 822 csBundle(i).lsrc(1) := src2 + (i * 2).U 823 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 824 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 825 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 826 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 827 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 828 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 829 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 830 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 831 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 832 } 833 csBundle(i).srcType(2) := SrcType.DC 834 csBundle(i).uopIdx := i.U 835 } 836 } 837 when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) { 838 /* 839 * 2 <= vlmul <= 8 840 */ 841 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 842 csBundle(numOfUop - 1.U).lsrc(0) := src1 843 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 844 csBundle(numOfUop - 1.U).lsrc(2) := dest 845 csBundle(numOfUop - 1.U).ldest := dest 846 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 847 } 848 } 849 is(UopSplitType.VEC_VFRED) { 850 val vlmul = vlmulReg 851 val vsew = vsewReg 852 when(vlmul === VLmul.m8){ 853 for (i <- 0 until 4) { 854 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 855 csBundle(i).lsrc(1) := src2 + (i * 2).U 856 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 857 csBundle(i).uopIdx := i.U 858 } 859 for (i <- 4 until 6) { 860 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 861 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 862 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 863 csBundle(i).uopIdx := i.U 864 } 865 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 866 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 867 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 868 csBundle(6).uopIdx := 6.U 869 when(vsew === VSew.e64) { 870 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 871 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 872 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 873 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 874 csBundle(7).uopIdx := 7.U 875 csBundle(8).lsrc(0) := src1 876 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 877 csBundle(8).ldest := dest 878 csBundle(8).uopIdx := 8.U 879 } 880 when(vsew === VSew.e32) { 881 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 882 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 883 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 884 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 885 csBundle(7).uopIdx := 7.U 886 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 887 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 888 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 889 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 890 csBundle(8).uopIdx := 8.U 891 csBundle(9).lsrc(0) := src1 892 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 893 csBundle(9).ldest := dest 894 csBundle(9).uopIdx := 9.U 895 } 896 when(vsew === VSew.e16) { 897 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 898 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 899 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 900 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 901 csBundle(7).uopIdx := 7.U 902 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 903 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 904 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 905 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 906 csBundle(8).uopIdx := 8.U 907 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 908 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 909 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 910 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 911 csBundle(9).uopIdx := 9.U 912 csBundle(10).lsrc(0) := src1 913 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 914 csBundle(10).ldest := dest 915 csBundle(10).uopIdx := 10.U 916 } 917 } 918 when(vlmul === VLmul.m4) { 919 for (i <- 0 until 2) { 920 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 921 csBundle(i).lsrc(1) := src2 + (i * 2).U 922 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 923 csBundle(i).uopIdx := i.U 924 } 925 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 926 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 927 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 928 csBundle(2).uopIdx := 2.U 929 when(vsew === VSew.e64) { 930 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 931 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 932 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 933 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 934 csBundle(3).uopIdx := 3.U 935 csBundle(4).lsrc(0) := src1 936 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 937 csBundle(4).ldest := dest 938 csBundle(4).uopIdx := 4.U 939 } 940 when(vsew === VSew.e32) { 941 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 942 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 943 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 944 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 945 csBundle(3).uopIdx := 3.U 946 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 947 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 948 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 949 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 950 csBundle(4).uopIdx := 4.U 951 csBundle(5).lsrc(0) := src1 952 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 953 csBundle(5).ldest := dest 954 csBundle(5).uopIdx := 5.U 955 } 956 when(vsew === VSew.e16) { 957 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 958 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 959 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 960 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 961 csBundle(3).uopIdx := 3.U 962 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 963 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 964 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 965 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 966 csBundle(4).uopIdx := 4.U 967 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 968 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 969 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 970 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 971 csBundle(5).uopIdx := 5.U 972 csBundle(6).lsrc(0) := src1 973 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 974 csBundle(6).ldest := dest 975 csBundle(6).uopIdx := 6.U 976 } 977 } 978 when(vlmul === VLmul.m2) { 979 csBundle(0).lsrc(0) := src2 + 1.U 980 csBundle(0).lsrc(1) := src2 + 0.U 981 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 982 csBundle(0).uopIdx := 0.U 983 when(vsew === VSew.e64) { 984 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 985 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 986 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 987 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 988 csBundle(1).uopIdx := 1.U 989 csBundle(2).lsrc(0) := src1 990 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 991 csBundle(2).ldest := dest 992 csBundle(2).uopIdx := 2.U 993 } 994 when(vsew === VSew.e32) { 995 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 996 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 997 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 998 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 999 csBundle(1).uopIdx := 1.U 1000 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1001 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1002 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1003 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 1004 csBundle(2).uopIdx := 2.U 1005 csBundle(3).lsrc(0) := src1 1006 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1007 csBundle(3).ldest := dest 1008 csBundle(3).uopIdx := 3.U 1009 } 1010 when(vsew === VSew.e16) { 1011 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1012 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1013 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1014 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 1015 csBundle(1).uopIdx := 1.U 1016 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1017 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1018 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1019 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 1020 csBundle(2).uopIdx := 2.U 1021 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 1022 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1023 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 1024 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 1025 csBundle(3).uopIdx := 3.U 1026 csBundle(4).lsrc(0) := src1 1027 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 1028 csBundle(4).ldest := dest 1029 csBundle(4).uopIdx := 4.U 1030 } 1031 } 1032 when(vlmul === VLmul.m1) { 1033 when(vsew === VSew.e64) { 1034 csBundle(0).lsrc(0) := src2 1035 csBundle(0).lsrc(1) := src2 1036 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1037 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1038 csBundle(0).uopIdx := 0.U 1039 csBundle(1).lsrc(0) := src1 1040 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1041 csBundle(1).ldest := dest 1042 csBundle(1).uopIdx := 1.U 1043 } 1044 when(vsew === VSew.e32) { 1045 csBundle(0).lsrc(0) := src2 1046 csBundle(0).lsrc(1) := src2 1047 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1048 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1049 csBundle(0).uopIdx := 0.U 1050 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1051 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1052 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1053 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1054 csBundle(1).uopIdx := 1.U 1055 csBundle(2).lsrc(0) := src1 1056 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1057 csBundle(2).ldest := dest 1058 csBundle(2).uopIdx := 2.U 1059 } 1060 when(vsew === VSew.e16) { 1061 csBundle(0).lsrc(0) := src2 1062 csBundle(0).lsrc(1) := src2 1063 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1064 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1065 csBundle(0).uopIdx := 0.U 1066 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1067 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1068 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1069 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1070 csBundle(1).uopIdx := 1.U 1071 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1072 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1073 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1074 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 1075 csBundle(2).uopIdx := 2.U 1076 csBundle(3).lsrc(0) := src1 1077 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1078 csBundle(3).ldest := dest 1079 csBundle(3).uopIdx := 3.U 1080 } 1081 } 1082 when(vlmul === VLmul.mf2) { 1083 when(vsew === VSew.e32) { 1084 csBundle(0).lsrc(0) := src2 1085 csBundle(0).lsrc(1) := src2 1086 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1087 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1088 csBundle(0).uopIdx := 0.U 1089 csBundle(1).lsrc(0) := src1 1090 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1091 csBundle(1).ldest := dest 1092 csBundle(1).uopIdx := 1.U 1093 } 1094 when(vsew === VSew.e16) { 1095 csBundle(0).lsrc(0) := src2 1096 csBundle(0).lsrc(1) := src2 1097 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1098 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1099 csBundle(0).uopIdx := 0.U 1100 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1101 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1102 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1103 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 1104 csBundle(1).uopIdx := 1.U 1105 csBundle(2).lsrc(0) := src1 1106 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1107 csBundle(2).ldest := dest 1108 csBundle(2).uopIdx := 2.U 1109 } 1110 } 1111 when(vlmul === VLmul.mf4) { 1112 when(vsew === VSew.e16) { 1113 csBundle(0).lsrc(0) := src2 1114 csBundle(0).lsrc(1) := src2 1115 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1116 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 1117 csBundle(0).uopIdx := 0.U 1118 csBundle(1).lsrc(0) := src1 1119 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1120 csBundle(1).ldest := dest 1121 csBundle(1).uopIdx := 1.U 1122 } 1123 } 1124 } 1125 1126 is(UopSplitType.VEC_VFREDOSUM) { 1127 import yunsuan.VfaluType 1128 val vlmul = vlmulReg 1129 val vsew = vsewReg 1130 val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum 1131 when(vlmul === VLmul.m8) { 1132 when(vsew === VSew.e64) { 1133 val vlmax = 16 1134 for (i <- 0 until vlmax) { 1135 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1136 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1137 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1138 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1139 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1140 csBundle(i).uopIdx := i.U 1141 } 1142 } 1143 when(vsew === VSew.e32) { 1144 val vlmax = 32 1145 for (i <- 0 until vlmax) { 1146 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1147 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1148 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1149 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1150 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1151 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1152 csBundle(i).uopIdx := i.U 1153 } 1154 } 1155 when(vsew === VSew.e16) { 1156 val vlmax = 64 1157 for (i <- 0 until vlmax) { 1158 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1159 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1160 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1161 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1162 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1163 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1164 csBundle(i).uopIdx := i.U 1165 } 1166 } 1167 } 1168 when(vlmul === VLmul.m4) { 1169 when(vsew === VSew.e64) { 1170 val vlmax = 8 1171 for (i <- 0 until vlmax) { 1172 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1173 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1174 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1175 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1176 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1177 csBundle(i).uopIdx := i.U 1178 } 1179 } 1180 when(vsew === VSew.e32) { 1181 val vlmax = 16 1182 for (i <- 0 until vlmax) { 1183 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1184 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1185 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1186 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1187 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1188 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1189 csBundle(i).uopIdx := i.U 1190 } 1191 } 1192 when(vsew === VSew.e16) { 1193 val vlmax = 32 1194 for (i <- 0 until vlmax) { 1195 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1196 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1197 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1198 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1199 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1200 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1201 csBundle(i).uopIdx := i.U 1202 } 1203 } 1204 } 1205 when(vlmul === VLmul.m2) { 1206 when(vsew === VSew.e64) { 1207 val vlmax = 4 1208 for (i <- 0 until vlmax) { 1209 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1210 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1211 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1212 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1213 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1214 csBundle(i).uopIdx := i.U 1215 } 1216 } 1217 when(vsew === VSew.e32) { 1218 val vlmax = 8 1219 for (i <- 0 until vlmax) { 1220 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1221 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1222 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1223 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1224 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1225 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1226 csBundle(i).uopIdx := i.U 1227 } 1228 } 1229 when(vsew === VSew.e16) { 1230 val vlmax = 16 1231 for (i <- 0 until vlmax) { 1232 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1233 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1234 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1235 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1236 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1237 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1238 csBundle(i).uopIdx := i.U 1239 } 1240 } 1241 } 1242 when(vlmul === VLmul.m1) { 1243 when(vsew === VSew.e64) { 1244 val vlmax = 2 1245 for (i <- 0 until vlmax) { 1246 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1247 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1248 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1249 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1250 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1251 csBundle(i).uopIdx := i.U 1252 } 1253 } 1254 when(vsew === VSew.e32) { 1255 val vlmax = 4 1256 for (i <- 0 until vlmax) { 1257 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1258 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1259 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1260 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1261 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1262 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1263 csBundle(i).uopIdx := i.U 1264 } 1265 } 1266 when(vsew === VSew.e16) { 1267 val vlmax = 8 1268 for (i <- 0 until vlmax) { 1269 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1270 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1271 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1272 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1273 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1274 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1275 csBundle(i).uopIdx := i.U 1276 } 1277 } 1278 } 1279 when(vlmul === VLmul.mf2) { 1280 when(vsew === VSew.e32) { 1281 val vlmax = 2 1282 for (i <- 0 until vlmax) { 1283 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1284 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1285 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1286 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1287 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1288 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1289 csBundle(i).uopIdx := i.U 1290 } 1291 } 1292 when(vsew === VSew.e16) { 1293 val vlmax = 4 1294 for (i <- 0 until vlmax) { 1295 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1296 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1297 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1298 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1299 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1300 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1301 csBundle(i).uopIdx := i.U 1302 } 1303 } 1304 } 1305 when(vlmul === VLmul.mf4) { 1306 when(vsew === VSew.e16) { 1307 val vlmax = 2 1308 for (i <- 0 until vlmax) { 1309 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1310 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1311 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1312 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1313 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1314 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1315 csBundle(i).uopIdx := i.U 1316 } 1317 } 1318 } 1319 } 1320 1321 is(UopSplitType.VEC_SLIDEUP) { 1322 // i to vector move 1323 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1324 csBundle(0).srcType(1) := SrcType.imm 1325 csBundle(0).srcType(2) := SrcType.imm 1326 csBundle(0).lsrc(1) := 0.U 1327 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1328 csBundle(0).fuType := FuType.i2v.U 1329 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1330 csBundle(0).vecWen := true.B 1331 // LMUL 1332 for (i <- 0 until MAX_VLMUL) 1333 for (j <- 0 to i) { 1334 val old_vd = if (j == 0) { 1335 dest + i.U 1336 } else (VECTOR_TMP_REG_LMUL + j).U 1337 val vd = if (j == i) { 1338 dest + i.U 1339 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1340 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1341 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1342 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1343 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1344 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1345 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1346 } 1347 } 1348 1349 is(UopSplitType.VEC_SLIDEDOWN) { 1350 // i to vector move 1351 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1352 csBundle(0).srcType(1) := SrcType.imm 1353 csBundle(0).srcType(2) := SrcType.imm 1354 csBundle(0).lsrc(1) := 0.U 1355 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1356 csBundle(0).fuType := FuType.i2v.U 1357 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1358 csBundle(0).vecWen := true.B 1359 // LMUL 1360 for (i <- 0 until MAX_VLMUL) 1361 for (j <- (0 to i).reverse) { 1362 when(i.U < lmul) { 1363 val old_vd = if (j == 0) { 1364 dest + lmul - 1.U - i.U 1365 } else (VECTOR_TMP_REG_LMUL + j).U 1366 val vd = if (j == i) { 1367 dest + lmul - 1.U - i.U 1368 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1369 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1370 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1371 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1372 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1373 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1374 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1375 } 1376 } 1377 } 1378 1379 is(UopSplitType.VEC_M0X) { 1380 // LMUL 1381 for (i <- 0 until MAX_VLMUL) { 1382 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1383 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1384 csBundle(i).srcType(0) := srcType0 1385 csBundle(i).srcType(1) := SrcType.vp 1386 csBundle(i).rfWen := false.B 1387 csBundle(i).fpWen := false.B 1388 csBundle(i).vecWen := true.B 1389 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1390 csBundle(i).lsrc(1) := src2 1391 // csBundle(i).lsrc(2) := dest + i.U DontCare 1392 csBundle(i).ldest := ldest 1393 csBundle(i).uopIdx := i.U 1394 } 1395 csBundle(numOfUop - 1.U).rfWen := Mux(dest === 0.U, false.B, true.B) 1396 csBundle(numOfUop - 1.U).fpWen := false.B 1397 csBundle(numOfUop - 1.U).vecWen := false.B 1398 csBundle(numOfUop - 1.U).ldest := dest 1399 } 1400 1401 is(UopSplitType.VEC_MVV) { 1402 // LMUL 1403 for (i <- 0 until MAX_VLMUL) { 1404 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1405 csBundle(i * 2 + 0).srcType(0) := srcType0 1406 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1407 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1408 csBundle(i * 2 + 0).lsrc(1) := src2 1409 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1410 csBundle(i * 2 + 0).ldest := dest + i.U 1411 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1412 1413 csBundle(i * 2 + 1).srcType(0) := srcType0 1414 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1415 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1416 csBundle(i * 2 + 1).lsrc(1) := src2 1417 // csBundle(i).lsrc(2) := dest + i.U DontCare 1418 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1419 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1420 } 1421 } 1422 is(UopSplitType.VEC_VWW) { 1423 for (i <- 0 until MAX_VLMUL*2) { 1424 when(i.U < lmul){ 1425 csBundle(i).srcType(2) := SrcType.DC 1426 csBundle(i).lsrc(0) := src2 + i.U 1427 csBundle(i).lsrc(1) := src2 + i.U 1428 // csBundle(i).lsrc(2) := dest + (2 * i).U 1429 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1430 csBundle(i).uopIdx := i.U 1431 } otherwise { 1432 csBundle(i).srcType(2) := SrcType.DC 1433 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1434 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1435 // csBundle(i).lsrc(2) := dest + (2 * i).U 1436 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1437 csBundle(i).uopIdx := i.U 1438 } 1439 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1440 csBundle(numOfUop-1.U).lsrc(0) := src1 1441 csBundle(numOfUop-1.U).lsrc(2) := dest 1442 csBundle(numOfUop-1.U).ldest := dest 1443 } 1444 } 1445 is(UopSplitType.VEC_RGATHER) { 1446 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1447 for (i <- 0 until len) 1448 for (j <- 0 until len) { 1449 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1450 // csBundle(i * len + j).srcType(1) := SrcType.vp 1451 // csBundle(i * len + j).srcType(2) := SrcType.vp 1452 csBundle(i * len + j).lsrc(0) := src1 + i.U 1453 csBundle(i * len + j).lsrc(1) := src2 + j.U 1454 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1455 csBundle(i * len + j).lsrc(2) := vd_old 1456 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1457 csBundle(i * len + j).ldest := vd 1458 csBundle(i * len + j).uopIdx := (i * len + j).U 1459 } 1460 } 1461 switch(vlmulReg) { 1462 is("b001".U ){ 1463 genCsBundle_VEC_RGATHER(2) 1464 } 1465 is("b010".U ){ 1466 genCsBundle_VEC_RGATHER(4) 1467 } 1468 is("b011".U ){ 1469 genCsBundle_VEC_RGATHER(8) 1470 } 1471 } 1472 } 1473 is(UopSplitType.VEC_RGATHER_VX) { 1474 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1475 for (i <- 0 until len) 1476 for (j <- 0 until len) { 1477 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1478 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1479 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1480 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1481 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1482 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1483 csBundle(i * len + j + 1).lsrc(2) := vd_old 1484 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1485 csBundle(i * len + j + 1).ldest := vd 1486 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1487 } 1488 } 1489 // i to vector move 1490 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1491 csBundle(0).srcType(1) := SrcType.imm 1492 csBundle(0).srcType(2) := SrcType.imm 1493 csBundle(0).lsrc(1) := 0.U 1494 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1495 csBundle(0).fuType := FuType.i2v.U 1496 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1497 csBundle(0).rfWen := false.B 1498 csBundle(0).fpWen := false.B 1499 csBundle(0).vecWen := true.B 1500 genCsBundle_RGATHER_VX(1) 1501 switch(vlmulReg) { 1502 is("b001".U ){ 1503 genCsBundle_RGATHER_VX(2) 1504 } 1505 is("b010".U ){ 1506 genCsBundle_RGATHER_VX(4) 1507 } 1508 is("b011".U ){ 1509 genCsBundle_RGATHER_VX(8) 1510 } 1511 } 1512 } 1513 is(UopSplitType.VEC_RGATHEREI16) { 1514 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1515 for (i <- 0 until len) 1516 for (j <- 0 until len) { 1517 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1518 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1519 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1520 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1521 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1522 csBundle((i * len + j)*2+0).ldest := vd0 1523 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1524 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1525 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1526 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1527 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1528 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1529 csBundle((i * len + j)*2+1).ldest := vd1 1530 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1531 } 1532 } 1533 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1534 for (i <- 0 until len) 1535 for (j <- 0 until len) { 1536 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1537 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1538 csBundle(i * len + j).lsrc(0) := src1 + i.U 1539 csBundle(i * len + j).lsrc(1) := src2 + j.U 1540 csBundle(i * len + j).lsrc(2) := vd_old 1541 csBundle(i * len + j).ldest := vd 1542 csBundle(i * len + j).uopIdx := (i * len + j).U 1543 } 1544 } 1545 def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={ 1546 for (i <- 0 until len) 1547 for (j <- 0 until len) { 1548 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1549 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1550 csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U 1551 csBundle(i * len + j).lsrc(1) := src2 + j.U 1552 csBundle(i * len + j).lsrc(2) := vd_old 1553 csBundle(i * len + j).ldest := vd 1554 csBundle(i * len + j).uopIdx := (i * len + j).U 1555 } 1556 } 1557 def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={ 1558 for (i <- 0 until len) 1559 for (j <- 0 until len) { 1560 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1561 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1562 csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U 1563 csBundle(i * len + j).lsrc(1) := src2 + j.U 1564 csBundle(i * len + j).lsrc(2) := vd_old 1565 csBundle(i * len + j).ldest := vd 1566 csBundle(i * len + j).uopIdx := (i * len + j).U 1567 } 1568 } 1569 when(!vsewReg.orR){ 1570 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1571 }.elsewhen(vsewReg === VSew.e32){ 1572 genCsBundle_VEC_RGATHEREI16_SEW32(1) 1573 }.elsewhen(vsewReg === VSew.e64){ 1574 genCsBundle_VEC_RGATHEREI16_SEW64(1) 1575 }.otherwise{ 1576 genCsBundle_VEC_RGATHEREI16(1) 1577 } 1578 switch(vlmulReg) { 1579 is("b001".U) { 1580 when(!vsewReg.orR) { 1581 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1582 }.elsewhen(vsewReg === VSew.e32){ 1583 genCsBundle_VEC_RGATHEREI16_SEW32(2) 1584 }.elsewhen(vsewReg === VSew.e64){ 1585 genCsBundle_VEC_RGATHEREI16_SEW64(2) 1586 }.otherwise{ 1587 genCsBundle_VEC_RGATHEREI16(2) 1588 } 1589 } 1590 is("b010".U) { 1591 when(!vsewReg.orR) { 1592 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1593 }.elsewhen(vsewReg === VSew.e32){ 1594 genCsBundle_VEC_RGATHEREI16_SEW32(4) 1595 }.elsewhen(vsewReg === VSew.e64){ 1596 genCsBundle_VEC_RGATHEREI16_SEW64(4) 1597 }.otherwise{ 1598 genCsBundle_VEC_RGATHEREI16(4) 1599 } 1600 } 1601 is("b011".U) { 1602 when(vsewReg === VSew.e32){ 1603 genCsBundle_VEC_RGATHEREI16_SEW32(8) 1604 }.elsewhen(vsewReg === VSew.e64){ 1605 genCsBundle_VEC_RGATHEREI16_SEW64(8) 1606 }.otherwise{ 1607 genCsBundle_VEC_RGATHEREI16(8) 1608 } 1609 } 1610 } 1611 } 1612 is(UopSplitType.VEC_COMPRESS) { 1613 def genCsBundle_VEC_COMPRESS(len:Int): Unit = { 1614 for (i <- 0 until len) { 1615 val jlen = if (i == len-1) i+1 else i+2 1616 for (j <- 0 until jlen) { 1617 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1618 val vd = if(i==len-1) (dest + j.U) else { 1619 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1620 } 1621 csBundle(i*(i+3)/2 + j).vecWen := true.B 1622 csBundle(i*(i+3)/2 + j).v0Wen := false.B 1623 val src13Type = if (j == i+1) DontCare else SrcType.vp 1624 csBundle(i*(i+3)/2 + j).srcType(0) := src13Type 1625 csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp 1626 csBundle(i*(i+3)/2 + j).srcType(2) := src13Type 1627 if (i == 0) { 1628 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1629 } else { 1630 csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1631 } 1632 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1633 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1634 csBundle(i*(i+3)/2 + j).ldest := vd 1635 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1636 } 1637 } 1638 } 1639 switch(vlmulReg) { 1640 is("b001".U ){ 1641 genCsBundle_VEC_COMPRESS(2) 1642 } 1643 is("b010".U ){ 1644 genCsBundle_VEC_COMPRESS(4) 1645 } 1646 is("b011".U ){ 1647 genCsBundle_VEC_COMPRESS(8) 1648 } 1649 } 1650 } 1651 is(UopSplitType.VEC_MVNR) { 1652 for (i <- 0 until MAX_VLMUL) { 1653 csBundle(i).lsrc(0) := src1 + i.U 1654 csBundle(i).lsrc(1) := src2 + i.U 1655 csBundle(i).lsrc(2) := dest + i.U 1656 csBundle(i).ldest := dest + i.U 1657 csBundle(i).uopIdx := i.U 1658 } 1659 } 1660 is(UopSplitType.VEC_US_LDST) { 1661 /* 1662 FMV.D.X 1663 */ 1664 csBundle(0).srcType(0) := SrcType.reg 1665 csBundle(0).srcType(1) := SrcType.imm 1666 csBundle(0).lsrc(1) := 0.U 1667 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1668 csBundle(0).fuType := FuType.i2v.U 1669 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1670 csBundle(0).rfWen := false.B 1671 csBundle(0).fpWen := false.B 1672 csBundle(0).vecWen := true.B 1673 csBundle(0).vlsInstr := true.B 1674 //LMUL 1675 for (i <- 0 until MAX_VLMUL) { 1676 csBundle(i + 1).srcType(0) := SrcType.vp 1677 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1678 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1679 csBundle(i + 1).ldest := dest + i.U 1680 csBundle(i + 1).uopIdx := i.U 1681 csBundle(i + 1).vlsInstr := true.B 1682 } 1683 csBundle.head.waitForward := isUsSegment 1684 csBundle(numOfUop - 1.U).blockBackward := isUsSegment 1685 } 1686 is(UopSplitType.VEC_US_FF_LD) { 1687 csBundle(0).srcType(0) := SrcType.reg 1688 csBundle(0).srcType(1) := SrcType.imm 1689 csBundle(0).lsrc(1) := 0.U 1690 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1691 csBundle(0).fuType := FuType.i2v.U 1692 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1693 csBundle(0).rfWen := false.B 1694 csBundle(0).fpWen := false.B 1695 csBundle(0).vecWen := true.B 1696 csBundle(0).vlsInstr := true.B 1697 //LMUL 1698 for (i <- 0 until MAX_VLMUL) { 1699 csBundle(i + 1).srcType(0) := SrcType.vp 1700 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1701 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1702 csBundle(i + 1).ldest := dest + i.U 1703 csBundle(i + 1).uopIdx := i.U 1704 csBundle(i + 1).vlsInstr := true.B 1705 } 1706 csBundle.head.waitForward := isUsSegment 1707 csBundle(numOfUop - 1.U).blockBackward := isUsSegment 1708 // last uop read vl and write vl 1709 csBundle(numOfUop - 1.U).srcType(0) := SrcType.no 1710 csBundle(numOfUop - 1.U).srcType(1) := SrcType.no 1711 csBundle(numOfUop - 1.U).srcType(2) := SrcType.no 1712 csBundle(numOfUop - 1.U).srcType(3) := SrcType.no 1713 csBundle(numOfUop - 1.U).srcType(4) := SrcType.vp 1714 csBundle(numOfUop - 1.U).lsrc(4) := Vl_IDX.U 1715 // vtype 1716 csBundle(numOfUop - 1.U).vecWen := false.B 1717 csBundle(numOfUop - 1.U).vlWen := true.B 1718 csBundle(numOfUop - 1.U).ldest := Vl_IDX.U 1719 } 1720 is(UopSplitType.VEC_S_LDST) { 1721 /* 1722 FMV.D.X 1723 */ 1724 csBundle(0).srcType(0) := SrcType.reg 1725 csBundle(0).srcType(1) := SrcType.imm 1726 csBundle(0).lsrc(1) := 0.U 1727 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1728 csBundle(0).fuType := FuType.i2v.U 1729 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1730 csBundle(0).rfWen := false.B 1731 csBundle(0).fpWen := false.B 1732 csBundle(0).vecWen := true.B 1733 csBundle(0).vlsInstr := true.B 1734 1735 csBundle(1).srcType(0) := SrcType.reg 1736 csBundle(1).srcType(1) := SrcType.imm 1737 csBundle(1).lsrc(0) := latchedInst.lsrc(1) 1738 csBundle(1).lsrc(1) := 0.U 1739 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1740 csBundle(1).fuType := FuType.i2v.U 1741 csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1742 csBundle(1).rfWen := false.B 1743 csBundle(1).fpWen := false.B 1744 csBundle(1).vecWen := true.B 1745 csBundle(1).vlsInstr := true.B 1746 1747 //LMUL 1748 for (i <- 0 until MAX_VLMUL) { 1749 csBundle(i + 2).srcType(0) := SrcType.vp 1750 csBundle(i + 2).srcType(1) := SrcType.vp 1751 csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1752 csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1753 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1754 csBundle(i + 2).ldest := dest + i.U 1755 csBundle(i + 2).uopIdx := i.U 1756 csBundle(i + 2).vlsInstr := true.B 1757 } 1758 csBundle.head.waitForward := isSdSegment 1759 csBundle(numOfUop - 1.U).blockBackward := isSdSegment 1760 } 1761 is(UopSplitType.VEC_I_LDST) { 1762 def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={ 1763 for (i <- 0 until MAX_VLMUL) { 1764 val vecWen = if (i < lmul * nf) true.B else false.B 1765 val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no 1766 csBundle(i + 1).srcType(0) := SrcType.vp 1767 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1768 csBundle(i + 1).srcType(1) := SrcType.no 1769 csBundle(i + 1).lsrc(1) := src2 + i.U 1770 csBundle(i + 1).srcType(2) := src2Type 1771 csBundle(i + 1).lsrc(2) := dest + i.U 1772 csBundle(i + 1).ldest := dest + i.U 1773 csBundle(i + 1).rfWen := false.B 1774 csBundle(i + 1).fpWen := false.B 1775 csBundle(i + 1).vecWen := vecWen 1776 csBundle(i + 1).uopIdx := i.U 1777 csBundle(i + 1).vlsInstr := true.B 1778 } 1779 } 1780 def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={ 1781 for (i <- 0 until MAX_VLMUL) { 1782 val src1Type = if (i < emul) SrcType.vp else SrcType.no 1783 csBundle(i + 1).srcType(1) := src1Type 1784 csBundle(i + 1).lsrc(1) := src2 + i.U 1785 } 1786 } 1787 1788 val vlmul = vlmulReg 1789 val vsew = Cat(0.U(1.W), vsewReg) 1790 val veew = Cat(0.U(1.W), width) 1791 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1792 val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Seq( 1793 "b001".U -> 1.U, 1794 "b010".U -> 2.U, 1795 "b011".U -> 3.U 1796 )) 1797 val simple_emul = MuxLookup(vemul, 0.U(2.W))(Seq( 1798 "b001".U -> 1.U, 1799 "b010".U -> 2.U, 1800 "b011".U -> 3.U 1801 )) 1802 csBundle(0).srcType(0) := SrcType.reg 1803 csBundle(0).srcType(1) := SrcType.imm 1804 csBundle(0).lsrc(1) := 0.U 1805 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1806 csBundle(0).fuType := FuType.i2v.U 1807 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1808 csBundle(0).rfWen := false.B 1809 csBundle(0).fpWen := false.B 1810 csBundle(0).vecWen := true.B 1811 csBundle(0).vlsInstr := true.B 1812 1813 //LMUL 1814 when(nf === 0.U) { 1815 for (i <- 0 until MAX_VLMUL) { 1816 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul) 1817 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1818 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1819 csBundle(i + 1).srcType(0) := SrcType.vp 1820 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1821 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1822 csBundle(i + 1).srcType(2) := SrcType.vp 1823 // lsrc2 is old vd 1824 csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1825 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1826 csBundle(i + 1).uopIdx := i.U 1827 csBundle(i + 1).vlsInstr := true.B 1828 } 1829 }.otherwise{ 1830 // nf > 1, is segment indexed load/store 1831 // gen src0, vd 1832 switch(simple_lmul) { 1833 is(0.U) { 1834 switch(nf) { 1835 is(1.U) { 1836 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2) 1837 } 1838 is(2.U) { 1839 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3) 1840 } 1841 is(3.U) { 1842 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4) 1843 } 1844 is(4.U) { 1845 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5) 1846 } 1847 is(5.U) { 1848 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6) 1849 } 1850 is(6.U) { 1851 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7) 1852 } 1853 is(7.U) { 1854 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8) 1855 } 1856 } 1857 } 1858 is(1.U) { 1859 switch(nf) { 1860 is(1.U) { 1861 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2) 1862 } 1863 is(2.U) { 1864 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3) 1865 } 1866 is(3.U) { 1867 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4) 1868 } 1869 } 1870 } 1871 is(2.U) { 1872 switch(nf) { 1873 is(1.U) { 1874 genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2) 1875 } 1876 } 1877 } 1878 } 1879 1880 // gen src1 1881 switch(simple_emul) { 1882 is(0.U) { 1883 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1) 1884 } 1885 is(1.U) { 1886 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2) 1887 } 1888 is(2.U) { 1889 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4) 1890 } 1891 is(3.U) { 1892 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8) 1893 } 1894 } 1895 1896 // when is vstore instructions, not set vecwen 1897 when(isVstore) { 1898 for (i <- 0 until MAX_VLMUL) { 1899 csBundle(i + 1).vecWen := false.B 1900 } 1901 } 1902 } 1903 csBundle.head.waitForward := isIxSegment 1904 csBundle(numOfUop - 1.U).blockBackward := isIxSegment 1905 } 1906 } 1907 1908 //readyFromRename Counter 1909 val readyCounter = Mux(outReadys.head, RenameWidth.U, 0.U) 1910 1911 // The left uops of the complex inst in ComplexDecoder can be send out this cycle 1912 val thisAllOut = uopRes <= readyCounter 1913 1914 switch(state) { 1915 is(s_idle) { 1916 when (inValid) { 1917 stateNext := s_active 1918 uopResNext := inUopInfo.numOfUop 1919 } 1920 } 1921 is(s_active) { 1922 when (thisAllOut) { 1923 when (inValid) { 1924 stateNext := s_active 1925 uopResNext := inUopInfo.numOfUop 1926 }.otherwise { 1927 stateNext := s_idle 1928 uopResNext := 0.U 1929 } 1930 }.otherwise { 1931 stateNext := s_active 1932 uopResNext := uopRes - readyCounter 1933 } 1934 } 1935 } 1936 1937 state := Mux(io.redirect, s_idle, stateNext) 1938 uopRes := Mux(io.redirect, 0.U, uopResNext) 1939 1940 val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes) 1941 1942 fixedDecodedInst := csBundle 1943 1944 // when vstart is not zero, the last uop will modify vstart to zero 1945 // therefore, blockback and flush pipe 1946 fixedDecodedInst(numOfUop - 1.U).flushPipe := (vstartReg =/= 0.U) || latchedInst.flushPipe 1947 1948 for(i <- 0 until RenameWidth) { 1949 outValids(i) := complexNum > i.U 1950 outDecodedInsts(i) := fixedDecodedInst(i.U + numOfUop - uopRes) 1951 } 1952 1953 outComplexNum := Mux(state === s_active, complexNum, 0.U) 1954 inReady := state === s_idle || state === s_active && thisAllOut 1955 1956 1957 XSError(inValid && inUopInfo.numOfUop === 0.U, 1958 p"uop number ${inUopInfo.numOfUop} is illegal, cannot be zero") 1959// val validSimple = Wire(Vec(DecodeWidth, Bool())) 1960// validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1961// val notInf = Wire(Vec(DecodeWidth, Bool())) 1962// notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1963// notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1964// val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1965// notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1966// 1967// complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1968// Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1969// 0.U) 1970// validToRename.zipWithIndex.foreach{ 1971// case(dst, i) => 1972// val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1973// dst := MuxCase(false.B, Seq( 1974// (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1975// (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1976// ).toSeq) 1977// } 1978// 1979// readyToIBuf.zipWithIndex.foreach { 1980// case (dst, i) => 1981// val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1982// dst := MuxCase(true.B, Seq( 1983// (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1984// (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1985// ).toSeq) 1986// } 1987// 1988// io.deq.decodedInsts := decodedInsts 1989// io.deq.complexNum := complexNum 1990// io.deq.validToRename := validToRename 1991// io.deq.readyToIBuf := readyToIBuf 1992} 1993