1/*************************************************************************************** 2 * Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3 * Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4 * Copyright (c) 2020-2021 Peng Cheng Laboratory 5 * 6 * XiangShan is licensed under Mulan PSL v2. 7 * You can use this software according to the terms and conditions of the Mulan PSL v2. 8 * You may obtain a copy of Mulan PSL v2 at: 9 * http://license.coscl.org.cn/MulanPSL2 10 * 11 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14 * 15 * See the Mulan PSL v2 for more details. 16 ***************************************************************************************/ 17 18package xiangshan.backend.decode 19 20import org.chipsalliance.cde.config.Parameters 21import chisel3._ 22import chisel3.util._ 23import freechips.rocketchip.rocket.Instructions 24import freechips.rocketchip.util.uintToBitPat 25import utils._ 26import utility._ 27import xiangshan.ExceptionNO.illegalInstr 28import xiangshan._ 29import xiangshan.backend.fu.fpu.FPU 30import xiangshan.backend.fu.FuType 31import freechips.rocketchip.rocket.Instructions._ 32import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 33import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 34import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul, Vl} 35import yunsuan.VpermType 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(4.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={ 43 // only consider non segment indexed load/store 44 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 45 var offset = 1 << (emul - lmul) 46 for (i <- 0 until (1 << emul)) { 47 if (uopIdx == i) { 48 return (i, i / offset) 49 } 50 } 51 } else { // lmul > emul, uop num is depend on lmul * nf 52 var offset = 1 << (lmul - emul) 53 for (i <- 0 until (1 << lmul)) { 54 if (uopIdx == i) { 55 return (i / offset, i) 56 } 57 } 58 } 59 return (0, 0) 60 } 61 // strided load/store 62 var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq() 63 for (emul <- 0 until 4) { 64 for (lmul <- 0 until 4) { 65 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx) 66 var offsetVs2 = offset._1 67 var offsetVd = offset._2 68 combVemulNf :+= (emul, lmul, offsetVs2, offsetVd) 69 } 70 } 71 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 72 case (emul, lmul, offsetVs2, offsetVd) => 73 (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W))) 74 }, BitPat.N(6))) 75 outOffsetVs2 := out(5, 3) 76 outOffsetVd := out(2, 0) 77} 78 79trait VectorConstants { 80 val MAX_VLMUL = 8 81 val VECTOR_TMP_REG_LMUL = 32 // 32~46 -> 15 82 val VECTOR_COMPRESS = 1 // in v0 regfile 83 val MAX_INDEXED_LS_UOPNUM = 64 84} 85 86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 87 val redirect = Input(Bool()) 88 val csrCtrl = Input(new CustomCSRCtrlIO) 89 val vtypeBypass = Input(new VType) 90 // When the first inst in decode vector is complex inst, pass it in 91 val in = Flipped(DecoupledIO(new Bundle { 92 val simpleDecodedInst = new DecodedInst 93 val uopInfo = new UopInfo 94 })) 95 val out = new Bundle { 96 val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst)) 97 } 98 val complexNum = Output(UInt(3.W)) 99} 100 101/** 102 * @author zly 103 */ 104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 105 val io = IO(new DecodeUnitCompIO) 106 107 // alias 108 private val inReady = io.in.ready 109 private val inValid = io.in.valid 110 private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst) 111 private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields) 112 private val inUopInfo = io.in.bits.uopInfo 113 private val outValids = io.out.complexDecodedInsts.map(_.valid) 114 private val outReadys = io.out.complexDecodedInsts.map(_.ready) 115 private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits) 116 private val outComplexNum = io.complexNum 117 118 val maxUopSize = MaxUopSize 119 when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) { 120 when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) { 121 inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType) 122 }.elsewhen(inInstFields.RS1 === 0.U) { 123 inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType) 124 } 125 } 126 127 val latchedInst = RegEnable(inDecodedInst, inValid && inReady) 128 val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady) 129 //input bits 130 private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields) 131 132 val src1 = Cat(0.U(1.W), instFields.RS1) 133 val src2 = Cat(0.U(1.W), instFields.RS2) 134 val dest = Cat(0.U(1.W), instFields.RD) 135 136 val nf = instFields.NF 137 val width = instFields.WIDTH(1, 0) 138 139 //output of DecodeUnit 140 val numOfUop = Wire(UInt(log2Up(maxUopSize).W)) 141 val numOfWB = Wire(UInt(log2Up(maxUopSize).W)) 142 val lmul = Wire(UInt(4.W)) 143 val isVsetSimple = Wire(Bool()) 144 145 val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i))) 146 indexedLSRegOffset.map(_.src := 0.U) 147 148 //pre decode 149 lmul := latchedUopInfo.lmul 150 isVsetSimple := latchedInst.isVset 151 val vlmulReg = latchedInst.vpu.vlmul 152 val vsewReg = latchedInst.vpu.vsew 153 val vstartReg = latchedInst.vpu.vstart 154 155 //Type of uop Div 156 val typeOfSplit = latchedInst.uopSplitType 157 val src1Type = latchedInst.srcType(0) 158 val src1IsImm = src1Type === SrcType.imm 159 val src1IsFp = src1Type === SrcType.fp 160 161 val isVstore = FuType.isVStore(latchedInst.fuType) 162 163 // exception generator 164 val vecException = Module(new VecExceptionGen) 165 vecException.io.inst := latchedInst.instr 166 vecException.io.decodedInst := latchedInst 167 vecException.io.vtype := latchedInst.vpu.vtype 168 vecException.io.vstart := latchedInst.vpu.vstart 169 val illegalInst = vecException.io.illegalInst 170 171 numOfUop := latchedUopInfo.numOfUop 172 numOfWB := latchedUopInfo.numOfWB 173 174 //uops dispatch 175 val s_idle :: s_active :: Nil = Enum(2) 176 val state = RegInit(s_idle) 177 val stateNext = WireDefault(state) 178 val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W)) 179 val uopRes = RegInit(0.U(log2Up(maxUopSize).W)) 180 val uopResNext = WireInit(uopRes) 181 val e64 = 3.U(2.W) 182 val isUsSegment = instFields.MOP === 0.U && ((nf =/= 0.U && instFields.LUMOP === 0.U) || instFields.LUMOP === "b10000".U) 183 val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U 184 val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U 185 186 //uop div up to maxUopSize 187 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 188 val fixedDecodedInst = Wire(Vec(maxUopSize, new DecodedInst)) 189 190 csBundle.foreach { case dst => 191 dst := latchedInst 192 dst.numUops := latchedUopInfo.numOfUop 193 dst.numWB := latchedUopInfo.numOfWB 194 dst.exceptionVec(ExceptionNO.EX_II) := latchedInst.exceptionVec(ExceptionNO.EX_II) || illegalInst 195 dst.firstUop := false.B 196 dst.lastUop := false.B 197 dst.vlsInstr := false.B 198 } 199 200 csBundle(0).firstUop := true.B 201 csBundle(numOfUop - 1.U).lastUop := true.B 202 203 // when vstart is not zero, the last uop will modify vstart to zero 204 // therefore, blockback and flush pipe 205 csBundle(numOfUop - 1.U).blockBackward := vstartReg =/= 0.U 206 csBundle(0.U).flushPipe := vstartReg =/= 0.U 207 208 switch(typeOfSplit) { 209 is(UopSplitType.AMO_CAS_W) { 210 csBundle(0).uopIdx := 0.U 211 csBundle(0).fuOpType := Cat(0.U(3.W), LSUOpType.amocas_w) 212 csBundle(0).lsrc(0) := src1 213 csBundle(0).lsrc(1) := dest 214 csBundle(0).waitForward := true.B 215 csBundle(0).blockBackward := false.B 216 217 csBundle(1).uopIdx := 1.U 218 csBundle(1).fuOpType := Cat(1.U(3.W), LSUOpType.amocas_w) 219 csBundle(1).lsrc(0) := src1 220 csBundle(1).lsrc(1) := src2 221 csBundle(1).rfWen := false.B 222 csBundle(1).waitForward := false.B 223 csBundle(1).blockBackward := true.B 224 } 225 is(UopSplitType.AMO_CAS_D) { 226 csBundle(0).uopIdx := 0.U 227 csBundle(0).fuOpType := Cat(0.U(3.W), LSUOpType.amocas_d) 228 csBundle(0).lsrc(0) := src1 229 csBundle(0).lsrc(1) := dest 230 csBundle(0).waitForward := true.B 231 csBundle(0).blockBackward := false.B 232 233 csBundle(1).uopIdx := 1.U 234 csBundle(1).fuOpType := Cat(1.U(3.W), LSUOpType.amocas_d) 235 csBundle(1).lsrc(0) := src1 236 csBundle(1).lsrc(1) := src2 237 csBundle(1).rfWen := false.B 238 csBundle(1).waitForward := false.B 239 csBundle(1).blockBackward := true.B 240 } 241 is(UopSplitType.AMO_CAS_Q) { 242 csBundle(0).uopIdx := 0.U 243 csBundle(0).fuOpType := Cat(0.U(3.W), LSUOpType.amocas_q) 244 csBundle(0).lsrc(0) := src1 245 csBundle(0).lsrc(1) := dest 246 csBundle(0).waitForward := true.B 247 csBundle(0).blockBackward := false.B 248 249 csBundle(1).uopIdx := 1.U 250 csBundle(1).fuOpType := Cat(1.U(3.W), LSUOpType.amocas_q) 251 csBundle(1).lsrc(0) := src1 252 csBundle(1).lsrc(1) := src2 253 csBundle(1).rfWen := false.B 254 csBundle(1).waitForward := false.B 255 csBundle(1).blockBackward := false.B 256 257 csBundle(2).uopIdx := 2.U 258 csBundle(2).fuOpType := Cat(2.U(3.W), LSUOpType.amocas_q) 259 csBundle(2).lsrc(0) := src1 260 csBundle(2).lsrc(1) := Mux(dest === 0.U, 0.U, dest + 1.U) 261 csBundle(2).ldest := Mux(dest === 0.U, 0.U, dest + 1.U) 262 csBundle(2).waitForward := false.B 263 csBundle(2).blockBackward := false.B 264 265 csBundle(3).uopIdx := 3.U 266 csBundle(3).fuOpType := Cat(3.U(3.W), LSUOpType.amocas_q) 267 csBundle(3).lsrc(0) := src1 268 csBundle(3).lsrc(1) := Mux(src2 === 0.U, 0.U, src2 + 1.U) 269 csBundle(3).rfWen := false.B 270 csBundle(3).waitForward := false.B 271 csBundle(3).blockBackward := true.B 272 } 273 is(UopSplitType.VSET) { 274 // In simple decoder, rfWen and vecWen are not set 275 when(isVsetSimple) { 276 // Default 277 // uop0 set rd, never flushPipe 278 csBundle(0).fuType := FuType.vsetiwi.U 279 csBundle(0).flushPipe := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U) 280 csBundle(0).blockBackward := false.B 281 csBundle(0).rfWen := true.B 282 // uop1 set vl, vsetvl will flushPipe 283 csBundle(1).ldest := Vl_IDX.U 284 csBundle(1).vecWen := false.B 285 csBundle(1).vlWen := true.B 286 csBundle(1).flushPipe := false.B 287 csBundle(1).blockBackward := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U) 288 when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 289 // write nothing, uop0 is a nop instruction 290 csBundle(0).rfWen := false.B 291 csBundle(0).fpWen := false.B 292 csBundle(0).vecWen := false.B 293 csBundle(0).vlWen := false.B 294 csBundle(1).fuType := FuType.vsetfwf.U 295 csBundle(1).srcType(0) := SrcType.no 296 csBundle(1).srcType(2) := SrcType.no 297 csBundle(1).srcType(3) := SrcType.no 298 csBundle(1).srcType(4) := SrcType.vp 299 csBundle(1).lsrc(4) := Vl_IDX.U 300 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 301 // uop0: mv vtype gpr to vector region 302 csBundle(0).srcType(0) := SrcType.xp 303 csBundle(0).srcType(1) := SrcType.no 304 csBundle(0).lsrc(0) := src2 305 csBundle(0).lsrc(1) := 0.U 306 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 307 csBundle(0).fuType := FuType.i2v.U 308 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 309 csBundle(0).rfWen := false.B 310 csBundle(0).fpWen := false.B 311 csBundle(0).vecWen := true.B 312 csBundle(0).vlWen := false.B 313 // uop1: uvsetvcfg_vv 314 csBundle(1).fuType := FuType.vsetfwf.U 315 // vl 316 csBundle(1).srcType(0) := SrcType.no 317 csBundle(1).srcType(2) := SrcType.no 318 csBundle(1).srcType(3) := SrcType.no 319 csBundle(1).srcType(4) := SrcType.vp 320 csBundle(1).lsrc(4) := Vl_IDX.U 321 // vtype 322 csBundle(1).srcType(1) := SrcType.vp 323 csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U 324 csBundle(1).vecWen := false.B 325 csBundle(1).vlWen := true.B 326 csBundle(1).ldest := Vl_IDX.U 327 }.elsewhen(dest === 0.U) { 328 // write nothing, uop0 is a nop instruction 329 csBundle(0).rfWen := false.B 330 csBundle(0).fpWen := false.B 331 csBundle(0).vecWen := false.B 332 csBundle(0).vlWen := false.B 333 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) { 334 // because vsetvl may modified src2 when src2 == rd, 335 // we need to modify vd in second uop to avoid dependency 336 // uop0 set vl 337 csBundle(0).fuType := FuType.vsetiwf.U 338 csBundle(0).ldest := Vl_IDX.U 339 csBundle(0).rfWen := false.B 340 csBundle(0).vlWen := true.B 341 // uop1 set rd 342 csBundle(1).fuType := FuType.vsetiwi.U 343 csBundle(1).ldest := dest 344 csBundle(1).rfWen := true.B 345 csBundle(1).vlWen := false.B 346 } 347 // use bypass vtype from vtypeGen 348 csBundle(0).vpu.connectVType(io.vtypeBypass) 349 csBundle(1).vpu.connectVType(io.vtypeBypass) 350 } 351 } 352 is(UopSplitType.VEC_VVV) { 353 for (i <- 0 until MAX_VLMUL) { 354 csBundle(i).lsrc(0) := src1 + i.U 355 csBundle(i).lsrc(1) := src2 + i.U 356 csBundle(i).lsrc(2) := dest + i.U 357 csBundle(i).ldest := dest + i.U 358 csBundle(i).uopIdx := i.U 359 } 360 } 361 is(UopSplitType.VEC_VFV) { 362 /* 363 f to vector move 364 */ 365 csBundle(0).srcType(0) := SrcType.fp 366 csBundle(0).srcType(1) := SrcType.imm 367 csBundle(0).srcType(2) := SrcType.imm 368 csBundle(0).lsrc(1) := 0.U 369 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 370 csBundle(0).fuType := FuType.f2v.U 371 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 372 csBundle(0).vecWen := true.B 373 csBundle(0).vpu.isReverse := false.B 374 /* 375 LMUL 376 */ 377 for (i <- 0 until MAX_VLMUL) { 378 csBundle(i + 1).srcType(0) := SrcType.vp 379 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 380 csBundle(i + 1).lsrc(1) := src2 + i.U 381 csBundle(i + 1).lsrc(2) := dest + i.U 382 csBundle(i + 1).ldest := dest + i.U 383 csBundle(i + 1).uopIdx := i.U 384 } 385 } 386 is(UopSplitType.VEC_EXT2) { 387 for (i <- 0 until MAX_VLMUL / 2) { 388 csBundle(2 * i).lsrc(1) := src2 + i.U 389 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 390 csBundle(2 * i).ldest := dest + (2 * i).U 391 csBundle(2 * i).uopIdx := (2 * i).U 392 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 393 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 394 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 395 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 396 } 397 } 398 is(UopSplitType.VEC_EXT4) { 399 for (i <- 0 until MAX_VLMUL / 4) { 400 csBundle(4 * i).lsrc(1) := src2 + i.U 401 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 402 csBundle(4 * i).ldest := dest + (4 * i).U 403 csBundle(4 * i).uopIdx := (4 * i).U 404 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 405 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 406 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 407 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 408 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 409 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 410 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 411 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 412 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 413 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 414 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 415 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 416 } 417 } 418 is(UopSplitType.VEC_EXT8) { 419 for (i <- 0 until MAX_VLMUL) { 420 csBundle(i).lsrc(1) := src2 421 csBundle(i).lsrc(2) := dest + i.U 422 csBundle(i).ldest := dest + i.U 423 csBundle(i).uopIdx := i.U 424 } 425 } 426 is(UopSplitType.VEC_0XV) { 427 /* 428 i/f to vector move 429 */ 430 csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg) 431 csBundle(0).srcType(1) := SrcType.imm 432 csBundle(0).srcType(2) := SrcType.imm 433 csBundle(0).lsrc(1) := 0.U 434 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 435 csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U) 436 csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 437 csBundle(0).rfWen := false.B 438 csBundle(0).fpWen := false.B 439 csBundle(0).vecWen := true.B 440 /* 441 vmv.s.x 442 */ 443 csBundle(1).srcType(0) := SrcType.vp 444 csBundle(1).srcType(1) := SrcType.imm 445 csBundle(1).srcType(2) := SrcType.vp 446 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 447 csBundle(1).lsrc(1) := 0.U 448 csBundle(1).lsrc(2) := dest 449 csBundle(1).ldest := dest 450 csBundle(1).rfWen := false.B 451 csBundle(1).fpWen := false.B 452 csBundle(1).vecWen := true.B 453 csBundle(1).uopIdx := 0.U 454 } 455 is(UopSplitType.VEC_VXV) { 456 /* 457 i to vector move 458 */ 459 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 460 csBundle(0).srcType(1) := SrcType.imm 461 csBundle(0).srcType(2) := SrcType.imm 462 csBundle(0).lsrc(1) := 0.U 463 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 464 csBundle(0).fuType := FuType.i2v.U 465 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 466 csBundle(0).vecWen := true.B 467 csBundle(0).vpu.isReverse := false.B 468 /* 469 LMUL 470 */ 471 for (i <- 0 until MAX_VLMUL) { 472 csBundle(i + 1).srcType(0) := SrcType.vp 473 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 474 csBundle(i + 1).lsrc(1) := src2 + i.U 475 csBundle(i + 1).lsrc(2) := dest + i.U 476 csBundle(i + 1).ldest := dest + i.U 477 csBundle(i + 1).uopIdx := i.U 478 } 479 } 480 is(UopSplitType.VEC_VVW) { 481 for (i <- 0 until MAX_VLMUL / 2) { 482 csBundle(2 * i).lsrc(0) := src1 + i.U 483 csBundle(2 * i).lsrc(1) := src2 + i.U 484 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 485 csBundle(2 * i).ldest := dest + (2 * i).U 486 csBundle(2 * i).uopIdx := (2 * i).U 487 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 488 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 489 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 490 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 491 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 492 } 493 } 494 is(UopSplitType.VEC_VFW) { 495 /* 496 f to vector move 497 */ 498 csBundle(0).srcType(0) := SrcType.fp 499 csBundle(0).srcType(1) := SrcType.imm 500 csBundle(0).srcType(2) := SrcType.imm 501 csBundle(0).lsrc(1) := 0.U 502 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 503 csBundle(0).fuType := FuType.f2v.U 504 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 505 csBundle(0).rfWen := false.B 506 csBundle(0).fpWen := false.B 507 csBundle(0).vecWen := true.B 508 509 for (i <- 0 until MAX_VLMUL / 2) { 510 csBundle(2 * i + 1).srcType(0) := SrcType.vp 511 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 512 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 513 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 514 csBundle(2 * i + 1).ldest := dest + (2 * i).U 515 csBundle(2 * i + 1).uopIdx := (2 * i).U 516 csBundle(2 * i + 2).srcType(0) := SrcType.vp 517 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 518 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 519 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 520 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 521 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 522 } 523 } 524 is(UopSplitType.VEC_WVW) { 525 for (i <- 0 until MAX_VLMUL / 2) { 526 csBundle(2 * i).lsrc(0) := src1 + i.U 527 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 528 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 529 csBundle(2 * i).ldest := dest + (2 * i).U 530 csBundle(2 * i).uopIdx := (2 * i).U 531 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 532 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 533 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 534 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 535 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 536 } 537 } 538 is(UopSplitType.VEC_VXW) { 539 /* 540 i to vector move 541 */ 542 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 543 csBundle(0).srcType(1) := SrcType.imm 544 csBundle(0).srcType(2) := SrcType.imm 545 csBundle(0).lsrc(1) := 0.U 546 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 547 csBundle(0).fuType := FuType.i2v.U 548 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 549 csBundle(0).vecWen := true.B 550 551 for (i <- 0 until MAX_VLMUL / 2) { 552 csBundle(2 * i + 1).srcType(0) := SrcType.vp 553 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 554 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 555 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 556 csBundle(2 * i + 1).ldest := dest + (2 * i).U 557 csBundle(2 * i + 1).uopIdx := (2 * i).U 558 csBundle(2 * i + 2).srcType(0) := SrcType.vp 559 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 560 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 561 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 562 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 563 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 564 } 565 } 566 is(UopSplitType.VEC_WXW) { 567 /* 568 i to vector move 569 */ 570 csBundle(0).srcType(0) := SrcType.reg 571 csBundle(0).srcType(1) := SrcType.imm 572 csBundle(0).srcType(2) := SrcType.imm 573 csBundle(0).lsrc(1) := 0.U 574 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 575 csBundle(0).fuType := FuType.i2v.U 576 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 577 csBundle(0).vecWen := true.B 578 579 for (i <- 0 until MAX_VLMUL / 2) { 580 csBundle(2 * i + 1).srcType(0) := SrcType.vp 581 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 582 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 583 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 584 csBundle(2 * i + 1).ldest := dest + (2 * i).U 585 csBundle(2 * i + 1).uopIdx := (2 * i).U 586 csBundle(2 * i + 2).srcType(0) := SrcType.vp 587 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 588 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 589 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 590 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 591 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 592 } 593 } 594 is(UopSplitType.VEC_WVV) { 595 for (i <- 0 until MAX_VLMUL / 2) { 596 597 csBundle(2 * i).lsrc(0) := src1 + i.U 598 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 599 csBundle(2 * i).lsrc(2) := dest + i.U 600 csBundle(2 * i).ldest := dest + i.U 601 csBundle(2 * i).uopIdx := (2 * i).U 602 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 603 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 604 csBundle(2 * i + 1).lsrc(2) := dest + i.U 605 csBundle(2 * i + 1).ldest := dest + i.U 606 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 607 } 608 } 609 is(UopSplitType.VEC_WFW) { 610 /* 611 f to vector move 612 */ 613 csBundle(0).srcType(0) := SrcType.fp 614 csBundle(0).srcType(1) := SrcType.imm 615 csBundle(0).srcType(2) := SrcType.imm 616 csBundle(0).lsrc(1) := 0.U 617 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 618 csBundle(0).fuType := FuType.f2v.U 619 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 620 csBundle(0).rfWen := false.B 621 csBundle(0).fpWen := false.B 622 csBundle(0).vecWen := true.B 623 624 for (i <- 0 until MAX_VLMUL / 2) { 625 csBundle(2 * i + 1).srcType(0) := SrcType.vp 626 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 627 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 628 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 629 csBundle(2 * i + 1).ldest := dest + (2 * i).U 630 csBundle(2 * i + 1).uopIdx := (2 * i).U 631 csBundle(2 * i + 2).srcType(0) := SrcType.vp 632 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 633 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 634 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 635 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 636 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 637 } 638 } 639 is(UopSplitType.VEC_WXV) { 640 /* 641 i to vector move 642 */ 643 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 644 csBundle(0).srcType(1) := SrcType.imm 645 csBundle(0).srcType(2) := SrcType.imm 646 csBundle(0).lsrc(1) := 0.U 647 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 648 csBundle(0).fuType := FuType.i2v.U 649 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 650 csBundle(0).vecWen := true.B 651 652 for (i <- 0 until MAX_VLMUL / 2) { 653 csBundle(2 * i + 1).srcType(0) := SrcType.vp 654 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 655 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 656 csBundle(2 * i + 1).lsrc(2) := dest + i.U 657 csBundle(2 * i + 1).ldest := dest + i.U 658 csBundle(2 * i + 1).uopIdx := (2 * i).U 659 csBundle(2 * i + 2).srcType(0) := SrcType.vp 660 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 661 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 662 csBundle(2 * i + 2).lsrc(2) := dest + i.U 663 csBundle(2 * i + 2).ldest := dest + i.U 664 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 665 } 666 } 667 is(UopSplitType.VEC_VVM) { 668 csBundle(0).lsrc(2) := dest 669 csBundle(0).ldest := dest 670 csBundle(0).uopIdx := 0.U 671 for (i <- 1 until MAX_VLMUL) { 672 csBundle(i).lsrc(0) := src1 + i.U 673 csBundle(i).lsrc(1) := src2 + i.U 674 csBundle(i).lsrc(2) := dest 675 csBundle(i).ldest := dest 676 csBundle(i).uopIdx := i.U 677 } 678 } 679 is(UopSplitType.VEC_VFM) { 680 /* 681 f to vector move 682 */ 683 csBundle(0).srcType(0) := SrcType.fp 684 csBundle(0).srcType(1) := SrcType.imm 685 csBundle(0).srcType(2) := SrcType.imm 686 csBundle(0).lsrc(1) := 0.U 687 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 688 csBundle(0).fuType := FuType.f2v.U 689 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 690 csBundle(0).rfWen := false.B 691 csBundle(0).fpWen := false.B 692 csBundle(0).vecWen := true.B 693 //LMUL 694 csBundle(1).srcType(0) := SrcType.vp 695 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 696 csBundle(1).lsrc(2) := dest 697 csBundle(1).ldest := dest 698 csBundle(1).uopIdx := 0.U 699 for (i <- 1 until MAX_VLMUL) { 700 csBundle(i + 1).srcType(0) := SrcType.vp 701 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 702 csBundle(i + 1).lsrc(1) := src2 + i.U 703 csBundle(i + 1).lsrc(2) := dest 704 csBundle(i + 1).ldest := dest 705 csBundle(i + 1).uopIdx := i.U 706 } 707 csBundle(numOfUop - 1.U).ldest := dest 708 } 709 is(UopSplitType.VEC_VXM) { 710 /* 711 i to vector move 712 */ 713 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 714 csBundle(0).srcType(1) := SrcType.imm 715 csBundle(0).srcType(2) := SrcType.imm 716 csBundle(0).lsrc(1) := 0.U 717 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 718 csBundle(0).fuType := FuType.i2v.U 719 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 720 csBundle(0).vecWen := true.B 721 //LMUL 722 csBundle(1).srcType(0) := SrcType.vp 723 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 724 csBundle(1).lsrc(2) := dest 725 csBundle(1).ldest := dest 726 csBundle(1).uopIdx := 0.U 727 for (i <- 1 until MAX_VLMUL) { 728 csBundle(i + 1).srcType(0) := SrcType.vp 729 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 730 csBundle(i + 1).lsrc(1) := src2 + i.U 731 csBundle(i + 1).lsrc(2) := dest 732 csBundle(i + 1).ldest := dest 733 csBundle(i + 1).uopIdx := i.U 734 } 735 csBundle(numOfUop - 1.U).ldest := dest 736 } 737 is(UopSplitType.VEC_SLIDE1UP) { 738 /* 739 i to vector move 740 */ 741 csBundle(0).srcType(0) := SrcType.reg 742 csBundle(0).srcType(1) := SrcType.imm 743 csBundle(0).srcType(2) := SrcType.imm 744 csBundle(0).lsrc(1) := 0.U 745 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 746 csBundle(0).fuType := FuType.i2v.U 747 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 748 csBundle(0).vecWen := true.B 749 //LMUL 750 csBundle(1).srcType(0) := SrcType.vp 751 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 752 csBundle(1).lsrc(2) := dest 753 csBundle(1).ldest := dest 754 csBundle(1).uopIdx := 0.U 755 for (i <- 1 until MAX_VLMUL) { 756 csBundle(i + 1).srcType(0) := SrcType.vp 757 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 758 csBundle(i + 1).lsrc(1) := src2 + i.U 759 csBundle(i + 1).lsrc(2) := dest + i.U 760 csBundle(i + 1).ldest := dest + i.U 761 csBundle(i + 1).uopIdx := i.U 762 } 763 } 764 is(UopSplitType.VEC_FSLIDE1UP) { 765 /* 766 f to vector move 767 */ 768 csBundle(0).srcType(0) := SrcType.fp 769 csBundle(0).srcType(1) := SrcType.imm 770 csBundle(0).srcType(2) := SrcType.imm 771 csBundle(0).lsrc(1) := 0.U 772 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 773 csBundle(0).fuType := FuType.f2v.U 774 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 775 csBundle(0).rfWen := false.B 776 csBundle(0).fpWen := false.B 777 csBundle(0).vecWen := true.B 778 //LMUL 779 csBundle(1).srcType(0) := SrcType.vp 780 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 781 csBundle(1).lsrc(1) := src2 782 csBundle(1).lsrc(2) := dest 783 csBundle(1).ldest := dest 784 csBundle(1).uopIdx := 0.U 785 for (i <- 1 until MAX_VLMUL) { 786 csBundle(i + 1).srcType(0) := SrcType.vp 787 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 788 csBundle(i + 1).lsrc(1) := src2 + i.U 789 csBundle(i + 1).lsrc(2) := dest + i.U 790 csBundle(i + 1).ldest := dest + i.U 791 csBundle(i + 1).uopIdx := i.U 792 } 793 } 794 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 795 /* 796 i to vector move 797 */ 798 csBundle(0).srcType(0) := SrcType.reg 799 csBundle(0).srcType(1) := SrcType.imm 800 csBundle(0).srcType(2) := SrcType.imm 801 csBundle(0).lsrc(1) := 0.U 802 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 803 csBundle(0).fuType := FuType.i2v.U 804 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 805 csBundle(0).vecWen := true.B 806 //LMUL 807 for (i <- 0 until MAX_VLMUL) { 808 csBundle(2 * i + 1).srcType(0) := SrcType.vp 809 csBundle(2 * i + 1).srcType(1) := SrcType.vp 810 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 811 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 812 csBundle(2 * i + 1).lsrc(2) := dest + i.U 813 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 814 csBundle(2 * i + 1).uopIdx := (2 * i).U 815 if (2 * i + 2 < MAX_VLMUL * 2) { 816 csBundle(2 * i + 2).srcType(0) := SrcType.vp 817 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 818 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 819 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 820 csBundle(2 * i + 2).ldest := dest + i.U 821 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 822 } 823 } 824 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 825 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 826 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 827 } 828 is(UopSplitType.VEC_FSLIDE1DOWN) { 829 /* 830 f to vector move 831 */ 832 csBundle(0).srcType(0) := SrcType.fp 833 csBundle(0).srcType(1) := SrcType.imm 834 csBundle(0).srcType(2) := SrcType.imm 835 csBundle(0).lsrc(1) := 0.U 836 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 837 csBundle(0).fuType := FuType.f2v.U 838 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 839 csBundle(0).rfWen := false.B 840 csBundle(0).fpWen := false.B 841 csBundle(0).vecWen := true.B 842 //LMUL 843 for (i <- 0 until MAX_VLMUL) { 844 csBundle(2 * i + 1).srcType(0) := SrcType.vp 845 csBundle(2 * i + 1).srcType(1) := SrcType.vp 846 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 847 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 848 csBundle(2 * i + 1).lsrc(2) := dest + i.U 849 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 850 csBundle(2 * i + 1).uopIdx := (2 * i).U 851 if (2 * i + 2 < MAX_VLMUL * 2) { 852 csBundle(2 * i + 2).srcType(0) := SrcType.vp 853 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 854 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 855 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 856 csBundle(2 * i + 2).ldest := dest + i.U 857 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 858 } 859 } 860 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 861 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 862 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 863 } 864 is(UopSplitType.VEC_VRED) { 865 when(vlmulReg === "b001".U) { 866 csBundle(0).srcType(2) := SrcType.DC 867 csBundle(0).lsrc(0) := src2 + 1.U 868 csBundle(0).lsrc(1) := src2 869 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 870 csBundle(0).uopIdx := 0.U 871 } 872 when(vlmulReg === "b010".U) { 873 csBundle(0).srcType(2) := SrcType.DC 874 csBundle(0).lsrc(0) := src2 + 1.U 875 csBundle(0).lsrc(1) := src2 876 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 877 csBundle(0).uopIdx := 0.U 878 879 csBundle(1).srcType(2) := SrcType.DC 880 csBundle(1).lsrc(0) := src2 + 3.U 881 csBundle(1).lsrc(1) := src2 + 2.U 882 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 883 csBundle(1).uopIdx := 1.U 884 885 csBundle(2).srcType(2) := SrcType.DC 886 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 887 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 888 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 889 csBundle(2).uopIdx := 2.U 890 } 891 when(vlmulReg === "b011".U) { 892 for (i <- 0 until MAX_VLMUL) { 893 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 894 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 895 csBundle(i).lsrc(1) := src2 + (i * 2).U 896 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 897 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 898 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 899 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 900 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 901 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 902 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 903 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 904 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 905 } 906 csBundle(i).srcType(2) := SrcType.DC 907 csBundle(i).uopIdx := i.U 908 } 909 } 910 when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) { 911 /* 912 * 2 <= vlmul <= 8 913 */ 914 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 915 csBundle(numOfUop - 1.U).lsrc(0) := src1 916 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 917 csBundle(numOfUop - 1.U).lsrc(2) := dest 918 csBundle(numOfUop - 1.U).ldest := dest 919 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 920 } 921 } 922 is(UopSplitType.VEC_VFRED) { 923 val vlmul = vlmulReg 924 val vsew = vsewReg 925 when(vlmul === VLmul.m8){ 926 for (i <- 0 until 4) { 927 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 928 csBundle(i).lsrc(1) := src2 + (i * 2).U 929 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 930 csBundle(i).uopIdx := i.U 931 } 932 for (i <- 4 until 6) { 933 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 934 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 935 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 936 csBundle(i).uopIdx := i.U 937 } 938 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 939 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 940 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 941 csBundle(6).uopIdx := 6.U 942 when(vsew === VSew.e64) { 943 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 944 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 945 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 946 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 947 csBundle(7).uopIdx := 7.U 948 csBundle(8).lsrc(0) := src1 949 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 950 csBundle(8).ldest := dest 951 csBundle(8).uopIdx := 8.U 952 } 953 when(vsew === VSew.e32) { 954 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 955 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 956 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 957 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 958 csBundle(7).uopIdx := 7.U 959 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 960 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 961 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 962 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 963 csBundle(8).uopIdx := 8.U 964 csBundle(9).lsrc(0) := src1 965 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 966 csBundle(9).ldest := dest 967 csBundle(9).uopIdx := 9.U 968 } 969 when(vsew === VSew.e16) { 970 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 971 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 972 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 973 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 974 csBundle(7).uopIdx := 7.U 975 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 976 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 977 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 978 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 979 csBundle(8).uopIdx := 8.U 980 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 981 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 982 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 983 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 984 csBundle(9).uopIdx := 9.U 985 csBundle(10).lsrc(0) := src1 986 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 987 csBundle(10).ldest := dest 988 csBundle(10).uopIdx := 10.U 989 } 990 } 991 when(vlmul === VLmul.m4) { 992 for (i <- 0 until 2) { 993 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 994 csBundle(i).lsrc(1) := src2 + (i * 2).U 995 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 996 csBundle(i).uopIdx := i.U 997 } 998 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 999 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1000 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1001 csBundle(2).uopIdx := 2.U 1002 when(vsew === VSew.e64) { 1003 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 1004 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1005 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 1006 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 1007 csBundle(3).uopIdx := 3.U 1008 csBundle(4).lsrc(0) := src1 1009 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 1010 csBundle(4).ldest := dest 1011 csBundle(4).uopIdx := 4.U 1012 } 1013 when(vsew === VSew.e32) { 1014 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 1015 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1016 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 1017 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 1018 csBundle(3).uopIdx := 3.U 1019 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 1020 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 1021 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 1022 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 1023 csBundle(4).uopIdx := 4.U 1024 csBundle(5).lsrc(0) := src1 1025 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 1026 csBundle(5).ldest := dest 1027 csBundle(5).uopIdx := 5.U 1028 } 1029 when(vsew === VSew.e16) { 1030 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 1031 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1032 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 1033 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 1034 csBundle(3).uopIdx := 3.U 1035 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 1036 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 1037 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 1038 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 1039 csBundle(4).uopIdx := 4.U 1040 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 1041 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 1042 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 1043 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 1044 csBundle(5).uopIdx := 5.U 1045 csBundle(6).lsrc(0) := src1 1046 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 1047 csBundle(6).ldest := dest 1048 csBundle(6).uopIdx := 6.U 1049 } 1050 } 1051 when(vlmul === VLmul.m2) { 1052 csBundle(0).lsrc(0) := src2 + 1.U 1053 csBundle(0).lsrc(1) := src2 + 0.U 1054 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1055 csBundle(0).uopIdx := 0.U 1056 when(vsew === VSew.e64) { 1057 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1058 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1059 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1060 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 1061 csBundle(1).uopIdx := 1.U 1062 csBundle(2).lsrc(0) := src1 1063 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1064 csBundle(2).ldest := dest 1065 csBundle(2).uopIdx := 2.U 1066 } 1067 when(vsew === VSew.e32) { 1068 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1069 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1070 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1071 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 1072 csBundle(1).uopIdx := 1.U 1073 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1074 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1075 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1076 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 1077 csBundle(2).uopIdx := 2.U 1078 csBundle(3).lsrc(0) := src1 1079 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1080 csBundle(3).ldest := dest 1081 csBundle(3).uopIdx := 3.U 1082 } 1083 when(vsew === VSew.e16) { 1084 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1085 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1086 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1087 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 1088 csBundle(1).uopIdx := 1.U 1089 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1090 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1091 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1092 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 1093 csBundle(2).uopIdx := 2.U 1094 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 1095 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1096 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 1097 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 1098 csBundle(3).uopIdx := 3.U 1099 csBundle(4).lsrc(0) := src1 1100 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 1101 csBundle(4).ldest := dest 1102 csBundle(4).uopIdx := 4.U 1103 } 1104 } 1105 when(vlmul === VLmul.m1) { 1106 when(vsew === VSew.e64) { 1107 csBundle(0).lsrc(0) := src2 1108 csBundle(0).lsrc(1) := src2 1109 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1110 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1111 csBundle(0).uopIdx := 0.U 1112 csBundle(1).lsrc(0) := src1 1113 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1114 csBundle(1).ldest := dest 1115 csBundle(1).uopIdx := 1.U 1116 } 1117 when(vsew === VSew.e32) { 1118 csBundle(0).lsrc(0) := src2 1119 csBundle(0).lsrc(1) := src2 1120 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1121 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1122 csBundle(0).uopIdx := 0.U 1123 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1124 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1125 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1126 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1127 csBundle(1).uopIdx := 1.U 1128 csBundle(2).lsrc(0) := src1 1129 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1130 csBundle(2).ldest := dest 1131 csBundle(2).uopIdx := 2.U 1132 } 1133 when(vsew === VSew.e16) { 1134 csBundle(0).lsrc(0) := src2 1135 csBundle(0).lsrc(1) := src2 1136 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1137 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1138 csBundle(0).uopIdx := 0.U 1139 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1140 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1141 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1142 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1143 csBundle(1).uopIdx := 1.U 1144 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1145 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1146 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1147 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 1148 csBundle(2).uopIdx := 2.U 1149 csBundle(3).lsrc(0) := src1 1150 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1151 csBundle(3).ldest := dest 1152 csBundle(3).uopIdx := 3.U 1153 } 1154 } 1155 when(vlmul === VLmul.mf2) { 1156 when(vsew === VSew.e32) { 1157 csBundle(0).lsrc(0) := src2 1158 csBundle(0).lsrc(1) := src2 1159 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1160 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1161 csBundle(0).uopIdx := 0.U 1162 csBundle(1).lsrc(0) := src1 1163 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1164 csBundle(1).ldest := dest 1165 csBundle(1).uopIdx := 1.U 1166 } 1167 when(vsew === VSew.e16) { 1168 csBundle(0).lsrc(0) := src2 1169 csBundle(0).lsrc(1) := src2 1170 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1171 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1172 csBundle(0).uopIdx := 0.U 1173 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1174 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1175 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1176 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 1177 csBundle(1).uopIdx := 1.U 1178 csBundle(2).lsrc(0) := src1 1179 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1180 csBundle(2).ldest := dest 1181 csBundle(2).uopIdx := 2.U 1182 } 1183 } 1184 when(vlmul === VLmul.mf4) { 1185 when(vsew === VSew.e16) { 1186 csBundle(0).lsrc(0) := src2 1187 csBundle(0).lsrc(1) := src2 1188 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1189 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 1190 csBundle(0).uopIdx := 0.U 1191 csBundle(1).lsrc(0) := src1 1192 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1193 csBundle(1).ldest := dest 1194 csBundle(1).uopIdx := 1.U 1195 } 1196 } 1197 } 1198 1199 is(UopSplitType.VEC_VFREDOSUM) { 1200 import yunsuan.VfaluType 1201 val vlmul = vlmulReg 1202 val vsew = vsewReg 1203 val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum 1204 when(vlmul === VLmul.m8) { 1205 when(vsew === VSew.e64) { 1206 val vlmax = 16 1207 for (i <- 0 until vlmax) { 1208 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1209 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1210 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1211 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1212 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1213 csBundle(i).uopIdx := i.U 1214 } 1215 } 1216 when(vsew === VSew.e32) { 1217 val vlmax = 32 1218 for (i <- 0 until vlmax) { 1219 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1220 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1221 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1222 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1223 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1224 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1225 csBundle(i).uopIdx := i.U 1226 } 1227 } 1228 when(vsew === VSew.e16) { 1229 val vlmax = 64 1230 for (i <- 0 until vlmax) { 1231 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1232 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1233 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1234 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1235 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1236 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1237 csBundle(i).uopIdx := i.U 1238 } 1239 } 1240 } 1241 when(vlmul === VLmul.m4) { 1242 when(vsew === VSew.e64) { 1243 val vlmax = 8 1244 for (i <- 0 until vlmax) { 1245 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1246 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1247 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1248 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1249 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1250 csBundle(i).uopIdx := i.U 1251 } 1252 } 1253 when(vsew === VSew.e32) { 1254 val vlmax = 16 1255 for (i <- 0 until vlmax) { 1256 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1257 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1258 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1259 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1260 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1261 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1262 csBundle(i).uopIdx := i.U 1263 } 1264 } 1265 when(vsew === VSew.e16) { 1266 val vlmax = 32 1267 for (i <- 0 until vlmax) { 1268 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1269 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1270 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1271 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1272 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1273 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1274 csBundle(i).uopIdx := i.U 1275 } 1276 } 1277 } 1278 when(vlmul === VLmul.m2) { 1279 when(vsew === VSew.e64) { 1280 val vlmax = 4 1281 for (i <- 0 until vlmax) { 1282 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1283 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1284 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1285 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1286 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1287 csBundle(i).uopIdx := i.U 1288 } 1289 } 1290 when(vsew === VSew.e32) { 1291 val vlmax = 8 1292 for (i <- 0 until vlmax) { 1293 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1294 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1295 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1296 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1297 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1298 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1299 csBundle(i).uopIdx := i.U 1300 } 1301 } 1302 when(vsew === VSew.e16) { 1303 val vlmax = 16 1304 for (i <- 0 until vlmax) { 1305 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1306 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1307 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1308 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1309 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1310 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1311 csBundle(i).uopIdx := i.U 1312 } 1313 } 1314 } 1315 when(vlmul === VLmul.m1) { 1316 when(vsew === VSew.e64) { 1317 val vlmax = 2 1318 for (i <- 0 until vlmax) { 1319 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1320 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1321 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1322 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1323 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1324 csBundle(i).uopIdx := i.U 1325 } 1326 } 1327 when(vsew === VSew.e32) { 1328 val vlmax = 4 1329 for (i <- 0 until vlmax) { 1330 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1331 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1332 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1333 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1334 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1335 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1336 csBundle(i).uopIdx := i.U 1337 } 1338 } 1339 when(vsew === VSew.e16) { 1340 val vlmax = 8 1341 for (i <- 0 until vlmax) { 1342 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1343 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1344 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1345 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1346 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1347 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1348 csBundle(i).uopIdx := i.U 1349 } 1350 } 1351 } 1352 when(vlmul === VLmul.mf2) { 1353 when(vsew === VSew.e32) { 1354 val vlmax = 2 1355 for (i <- 0 until vlmax) { 1356 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1357 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1358 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1359 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1360 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1361 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1362 csBundle(i).uopIdx := i.U 1363 } 1364 } 1365 when(vsew === VSew.e16) { 1366 val vlmax = 4 1367 for (i <- 0 until vlmax) { 1368 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1369 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1370 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1371 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1372 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1373 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1374 csBundle(i).uopIdx := i.U 1375 } 1376 } 1377 } 1378 when(vlmul === VLmul.mf4) { 1379 when(vsew === VSew.e16) { 1380 val vlmax = 2 1381 for (i <- 0 until vlmax) { 1382 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1383 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1384 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1385 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1386 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1387 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1388 csBundle(i).uopIdx := i.U 1389 } 1390 } 1391 } 1392 } 1393 1394 is(UopSplitType.VEC_SLIDEUP) { 1395 // i to vector move 1396 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1397 csBundle(0).srcType(1) := SrcType.imm 1398 csBundle(0).srcType(2) := SrcType.imm 1399 csBundle(0).lsrc(1) := 0.U 1400 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1401 csBundle(0).fuType := FuType.i2v.U 1402 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1403 csBundle(0).vecWen := true.B 1404 // LMUL 1405 for (i <- 0 until MAX_VLMUL) 1406 for (j <- 0 to i) { 1407 val old_vd = if (j == 0) { 1408 dest + i.U 1409 } else (VECTOR_TMP_REG_LMUL + j).U 1410 val vd = if (j == i) { 1411 dest + i.U 1412 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1413 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1414 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1415 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1416 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1417 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1418 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1419 } 1420 } 1421 1422 is(UopSplitType.VEC_SLIDEDOWN) { 1423 // i to vector move 1424 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1425 csBundle(0).srcType(1) := SrcType.imm 1426 csBundle(0).srcType(2) := SrcType.imm 1427 csBundle(0).lsrc(1) := 0.U 1428 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1429 csBundle(0).fuType := FuType.i2v.U 1430 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1431 csBundle(0).vecWen := true.B 1432 // LMUL 1433 for (i <- 0 until MAX_VLMUL) 1434 for (j <- (0 to i).reverse) { 1435 when(i.U < lmul) { 1436 val old_vd = if (j == 0) { 1437 dest + lmul - 1.U - i.U 1438 } else (VECTOR_TMP_REG_LMUL + j).U 1439 val vd = if (j == i) { 1440 dest + lmul - 1.U - i.U 1441 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1442 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1443 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1444 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1445 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1446 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1447 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1448 } 1449 } 1450 } 1451 1452 is(UopSplitType.VEC_M0X) { 1453 // LMUL 1454 for (i <- 0 until MAX_VLMUL) { 1455 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1456 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1457 csBundle(i).srcType(0) := srcType0 1458 csBundle(i).srcType(1) := SrcType.vp 1459 csBundle(i).rfWen := false.B 1460 csBundle(i).fpWen := false.B 1461 csBundle(i).vecWen := true.B 1462 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1463 csBundle(i).lsrc(1) := src2 1464 // csBundle(i).lsrc(2) := dest + i.U DontCare 1465 csBundle(i).ldest := ldest 1466 csBundle(i).uopIdx := i.U 1467 } 1468 csBundle(numOfUop - 1.U).rfWen := Mux(dest === 0.U, false.B, true.B) 1469 csBundle(numOfUop - 1.U).fpWen := false.B 1470 csBundle(numOfUop - 1.U).vecWen := false.B 1471 csBundle(numOfUop - 1.U).ldest := dest 1472 } 1473 1474 is(UopSplitType.VEC_MVV) { 1475 // LMUL 1476 for (i <- 0 until MAX_VLMUL) { 1477 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1478 csBundle(i * 2 + 0).srcType(0) := srcType0 1479 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1480 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1481 csBundle(i * 2 + 0).lsrc(1) := src2 1482 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1483 csBundle(i * 2 + 0).ldest := dest + i.U 1484 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1485 1486 csBundle(i * 2 + 1).srcType(0) := srcType0 1487 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1488 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1489 csBundle(i * 2 + 1).lsrc(1) := src2 1490 // csBundle(i).lsrc(2) := dest + i.U DontCare 1491 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1492 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1493 } 1494 } 1495 is(UopSplitType.VEC_VWW) { 1496 for (i <- 0 until MAX_VLMUL*2) { 1497 when(i.U < lmul){ 1498 csBundle(i).srcType(2) := SrcType.DC 1499 csBundle(i).lsrc(0) := src2 + i.U 1500 csBundle(i).lsrc(1) := src2 + i.U 1501 // csBundle(i).lsrc(2) := dest + (2 * i).U 1502 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1503 csBundle(i).uopIdx := i.U 1504 } otherwise { 1505 csBundle(i).srcType(2) := SrcType.DC 1506 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1507 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1508 // csBundle(i).lsrc(2) := dest + (2 * i).U 1509 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1510 csBundle(i).uopIdx := i.U 1511 } 1512 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1513 csBundle(numOfUop-1.U).lsrc(0) := src1 1514 csBundle(numOfUop-1.U).lsrc(2) := dest 1515 csBundle(numOfUop-1.U).ldest := dest 1516 } 1517 } 1518 is(UopSplitType.VEC_RGATHER) { 1519 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1520 for (i <- 0 until len) 1521 for (j <- 0 until len) { 1522 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1523 // csBundle(i * len + j).srcType(1) := SrcType.vp 1524 // csBundle(i * len + j).srcType(2) := SrcType.vp 1525 csBundle(i * len + j).lsrc(0) := src1 + i.U 1526 csBundle(i * len + j).lsrc(1) := src2 + j.U 1527 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1528 csBundle(i * len + j).lsrc(2) := vd_old 1529 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1530 csBundle(i * len + j).ldest := vd 1531 csBundle(i * len + j).uopIdx := (i * len + j).U 1532 } 1533 } 1534 switch(vlmulReg) { 1535 is("b001".U ){ 1536 genCsBundle_VEC_RGATHER(2) 1537 } 1538 is("b010".U ){ 1539 genCsBundle_VEC_RGATHER(4) 1540 } 1541 is("b011".U ){ 1542 genCsBundle_VEC_RGATHER(8) 1543 } 1544 } 1545 } 1546 is(UopSplitType.VEC_RGATHER_VX) { 1547 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1548 for (i <- 0 until len) 1549 for (j <- 0 until len) { 1550 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1551 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1552 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1553 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1554 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1555 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1556 csBundle(i * len + j + 1).lsrc(2) := vd_old 1557 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1558 csBundle(i * len + j + 1).ldest := vd 1559 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1560 } 1561 } 1562 // i to vector move 1563 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1564 csBundle(0).srcType(1) := SrcType.imm 1565 csBundle(0).srcType(2) := SrcType.imm 1566 csBundle(0).lsrc(1) := 0.U 1567 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1568 csBundle(0).fuType := FuType.i2v.U 1569 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1570 csBundle(0).rfWen := false.B 1571 csBundle(0).fpWen := false.B 1572 csBundle(0).vecWen := true.B 1573 genCsBundle_RGATHER_VX(1) 1574 switch(vlmulReg) { 1575 is("b001".U ){ 1576 genCsBundle_RGATHER_VX(2) 1577 } 1578 is("b010".U ){ 1579 genCsBundle_RGATHER_VX(4) 1580 } 1581 is("b011".U ){ 1582 genCsBundle_RGATHER_VX(8) 1583 } 1584 } 1585 } 1586 is(UopSplitType.VEC_RGATHEREI16) { 1587 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1588 for (i <- 0 until len) 1589 for (j <- 0 until len) { 1590 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1591 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1592 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1593 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1594 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1595 csBundle((i * len + j)*2+0).ldest := vd0 1596 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1597 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1598 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1599 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1600 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1601 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1602 csBundle((i * len + j)*2+1).ldest := vd1 1603 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1604 } 1605 } 1606 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1607 for (i <- 0 until len) 1608 for (j <- 0 until len) { 1609 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1610 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1611 csBundle(i * len + j).lsrc(0) := src1 + i.U 1612 csBundle(i * len + j).lsrc(1) := src2 + j.U 1613 csBundle(i * len + j).lsrc(2) := vd_old 1614 csBundle(i * len + j).ldest := vd 1615 csBundle(i * len + j).uopIdx := (i * len + j).U 1616 } 1617 } 1618 def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={ 1619 for (i <- 0 until len) 1620 for (j <- 0 until len) { 1621 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1622 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1623 csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U 1624 csBundle(i * len + j).lsrc(1) := src2 + j.U 1625 csBundle(i * len + j).lsrc(2) := vd_old 1626 csBundle(i * len + j).ldest := vd 1627 csBundle(i * len + j).uopIdx := (i * len + j).U 1628 } 1629 } 1630 def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={ 1631 for (i <- 0 until len) 1632 for (j <- 0 until len) { 1633 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1634 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1635 csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U 1636 csBundle(i * len + j).lsrc(1) := src2 + j.U 1637 csBundle(i * len + j).lsrc(2) := vd_old 1638 csBundle(i * len + j).ldest := vd 1639 csBundle(i * len + j).uopIdx := (i * len + j).U 1640 } 1641 } 1642 when(!vsewReg.orR){ 1643 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1644 }.elsewhen(vsewReg === VSew.e32){ 1645 genCsBundle_VEC_RGATHEREI16_SEW32(1) 1646 }.elsewhen(vsewReg === VSew.e64){ 1647 genCsBundle_VEC_RGATHEREI16_SEW64(1) 1648 }.otherwise{ 1649 genCsBundle_VEC_RGATHEREI16(1) 1650 } 1651 switch(vlmulReg) { 1652 is("b001".U) { 1653 when(!vsewReg.orR) { 1654 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1655 }.elsewhen(vsewReg === VSew.e32){ 1656 genCsBundle_VEC_RGATHEREI16_SEW32(2) 1657 }.elsewhen(vsewReg === VSew.e64){ 1658 genCsBundle_VEC_RGATHEREI16_SEW64(2) 1659 }.otherwise{ 1660 genCsBundle_VEC_RGATHEREI16(2) 1661 } 1662 } 1663 is("b010".U) { 1664 when(!vsewReg.orR) { 1665 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1666 }.elsewhen(vsewReg === VSew.e32){ 1667 genCsBundle_VEC_RGATHEREI16_SEW32(4) 1668 }.elsewhen(vsewReg === VSew.e64){ 1669 genCsBundle_VEC_RGATHEREI16_SEW64(4) 1670 }.otherwise{ 1671 genCsBundle_VEC_RGATHEREI16(4) 1672 } 1673 } 1674 is("b011".U) { 1675 when(vsewReg === VSew.e32){ 1676 genCsBundle_VEC_RGATHEREI16_SEW32(8) 1677 }.elsewhen(vsewReg === VSew.e64){ 1678 genCsBundle_VEC_RGATHEREI16_SEW64(8) 1679 }.otherwise{ 1680 genCsBundle_VEC_RGATHEREI16(8) 1681 } 1682 } 1683 } 1684 } 1685 is(UopSplitType.VEC_COMPRESS) { 1686 def genCsBundle_VEC_COMPRESS(len:Int): Unit = { 1687 for (i <- 0 until len) { 1688 val jlen = if (i == len-1) i+1 else i+2 1689 for (j <- 0 until jlen) { 1690 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1691 val vd = if(i==len-1) (dest + j.U) else { 1692 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1693 } 1694 csBundle(i*(i+3)/2 + j).vecWen := true.B 1695 csBundle(i*(i+3)/2 + j).v0Wen := false.B 1696 val src13Type = if (j == i+1) DontCare else SrcType.vp 1697 csBundle(i*(i+3)/2 + j).srcType(0) := src13Type 1698 csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp 1699 csBundle(i*(i+3)/2 + j).srcType(2) := src13Type 1700 if (i == 0) { 1701 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1702 } else { 1703 csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1704 } 1705 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1706 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1707 csBundle(i*(i+3)/2 + j).ldest := vd 1708 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1709 } 1710 } 1711 } 1712 switch(vlmulReg) { 1713 is("b001".U ){ 1714 genCsBundle_VEC_COMPRESS(2) 1715 } 1716 is("b010".U ){ 1717 genCsBundle_VEC_COMPRESS(4) 1718 } 1719 is("b011".U ){ 1720 genCsBundle_VEC_COMPRESS(8) 1721 } 1722 } 1723 } 1724 is(UopSplitType.VEC_MVNR) { 1725 for (i <- 0 until MAX_VLMUL) { 1726 csBundle(i).lsrc(0) := src1 + i.U 1727 csBundle(i).lsrc(1) := src2 + i.U 1728 csBundle(i).lsrc(2) := dest + i.U 1729 csBundle(i).ldest := dest + i.U 1730 csBundle(i).uopIdx := i.U 1731 } 1732 } 1733 is(UopSplitType.VEC_US_LDST) { 1734 /* 1735 FMV.D.X 1736 */ 1737 csBundle(0).srcType(0) := SrcType.reg 1738 csBundle(0).srcType(1) := SrcType.imm 1739 csBundle(0).lsrc(1) := 0.U 1740 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1741 csBundle(0).fuType := FuType.i2v.U 1742 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1743 csBundle(0).rfWen := false.B 1744 csBundle(0).fpWen := false.B 1745 csBundle(0).vecWen := true.B 1746 csBundle(0).vlsInstr := true.B 1747 //LMUL 1748 for (i <- 0 until MAX_VLMUL) { 1749 csBundle(i + 1).srcType(0) := SrcType.vp 1750 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1751 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1752 csBundle(i + 1).ldest := dest + i.U 1753 csBundle(i + 1).uopIdx := i.U 1754 csBundle(i + 1).vlsInstr := true.B 1755 } 1756 csBundle.head.waitForward := isUsSegment 1757 csBundle(numOfUop - 1.U).blockBackward := isUsSegment 1758 } 1759 is(UopSplitType.VEC_US_FF_LD) { 1760 csBundle(0).srcType(0) := SrcType.reg 1761 csBundle(0).srcType(1) := SrcType.imm 1762 csBundle(0).lsrc(1) := 0.U 1763 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1764 csBundle(0).fuType := FuType.i2v.U 1765 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1766 csBundle(0).rfWen := false.B 1767 csBundle(0).fpWen := false.B 1768 csBundle(0).vecWen := true.B 1769 csBundle(0).vlsInstr := true.B 1770 //LMUL 1771 for (i <- 0 until MAX_VLMUL) { 1772 csBundle(i + 1).srcType(0) := SrcType.vp 1773 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1774 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1775 csBundle(i + 1).ldest := dest + i.U 1776 csBundle(i + 1).uopIdx := i.U 1777 csBundle(i + 1).vlsInstr := true.B 1778 } 1779 csBundle.head.waitForward := isUsSegment 1780 csBundle(numOfUop - 1.U).blockBackward := isUsSegment 1781 // last uop read vl and write vl 1782 csBundle(numOfUop - 1.U).srcType(0) := SrcType.no 1783 csBundle(numOfUop - 1.U).srcType(1) := SrcType.no 1784 csBundle(numOfUop - 1.U).srcType(2) := SrcType.no 1785 csBundle(numOfUop - 1.U).srcType(3) := SrcType.no 1786 csBundle(numOfUop - 1.U).srcType(4) := SrcType.vp 1787 csBundle(numOfUop - 1.U).lsrc(4) := Vl_IDX.U 1788 // vtype 1789 csBundle(numOfUop - 1.U).vecWen := false.B 1790 csBundle(numOfUop - 1.U).vlWen := true.B 1791 csBundle(numOfUop - 1.U).ldest := Vl_IDX.U 1792 } 1793 is(UopSplitType.VEC_S_LDST) { 1794 /* 1795 FMV.D.X 1796 */ 1797 csBundle(0).srcType(0) := SrcType.reg 1798 csBundle(0).srcType(1) := SrcType.imm 1799 csBundle(0).lsrc(1) := 0.U 1800 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1801 csBundle(0).fuType := FuType.i2v.U 1802 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1803 csBundle(0).rfWen := false.B 1804 csBundle(0).fpWen := false.B 1805 csBundle(0).vecWen := true.B 1806 csBundle(0).vlsInstr := true.B 1807 1808 csBundle(1).srcType(0) := SrcType.reg 1809 csBundle(1).srcType(1) := SrcType.imm 1810 csBundle(1).lsrc(0) := latchedInst.lsrc(1) 1811 csBundle(1).lsrc(1) := 0.U 1812 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1813 csBundle(1).fuType := FuType.i2v.U 1814 csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1815 csBundle(1).rfWen := false.B 1816 csBundle(1).fpWen := false.B 1817 csBundle(1).vecWen := true.B 1818 csBundle(1).vlsInstr := true.B 1819 1820 //LMUL 1821 for (i <- 0 until MAX_VLMUL) { 1822 csBundle(i + 2).srcType(0) := SrcType.vp 1823 csBundle(i + 2).srcType(1) := SrcType.vp 1824 csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1825 csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1826 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1827 csBundle(i + 2).ldest := dest + i.U 1828 csBundle(i + 2).uopIdx := i.U 1829 csBundle(i + 2).vlsInstr := true.B 1830 } 1831 csBundle.head.waitForward := isSdSegment 1832 csBundle(numOfUop - 1.U).blockBackward := isSdSegment 1833 } 1834 is(UopSplitType.VEC_I_LDST) { 1835 def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={ 1836 for (i <- 0 until MAX_VLMUL) { 1837 val vecWen = if (i < lmul * nf) true.B else false.B 1838 val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no 1839 csBundle(i + 1).srcType(0) := SrcType.vp 1840 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1841 csBundle(i + 1).srcType(1) := SrcType.no 1842 csBundle(i + 1).lsrc(1) := src2 + i.U 1843 csBundle(i + 1).srcType(2) := src2Type 1844 csBundle(i + 1).lsrc(2) := dest + i.U 1845 csBundle(i + 1).ldest := dest + i.U 1846 csBundle(i + 1).rfWen := false.B 1847 csBundle(i + 1).fpWen := false.B 1848 csBundle(i + 1).vecWen := vecWen 1849 csBundle(i + 1).uopIdx := i.U 1850 csBundle(i + 1).vlsInstr := true.B 1851 } 1852 } 1853 def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={ 1854 for (i <- 0 until MAX_VLMUL) { 1855 val src1Type = if (i < emul) SrcType.vp else SrcType.no 1856 csBundle(i + 1).srcType(1) := src1Type 1857 csBundle(i + 1).lsrc(1) := src2 + i.U 1858 } 1859 } 1860 1861 val vlmul = vlmulReg 1862 val vsew = Cat(0.U(1.W), vsewReg) 1863 val veew = Cat(0.U(1.W), width) 1864 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1865 val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Seq( 1866 "b001".U -> 1.U, 1867 "b010".U -> 2.U, 1868 "b011".U -> 3.U 1869 )) 1870 val simple_emul = MuxLookup(vemul, 0.U(2.W))(Seq( 1871 "b001".U -> 1.U, 1872 "b010".U -> 2.U, 1873 "b011".U -> 3.U 1874 )) 1875 csBundle(0).srcType(0) := SrcType.reg 1876 csBundle(0).srcType(1) := SrcType.imm 1877 csBundle(0).lsrc(1) := 0.U 1878 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1879 csBundle(0).fuType := FuType.i2v.U 1880 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1881 csBundle(0).rfWen := false.B 1882 csBundle(0).fpWen := false.B 1883 csBundle(0).vecWen := true.B 1884 csBundle(0).vlsInstr := true.B 1885 1886 //LMUL 1887 when(nf === 0.U) { 1888 for (i <- 0 until MAX_VLMUL) { 1889 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul) 1890 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1891 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1892 csBundle(i + 1).srcType(0) := SrcType.vp 1893 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1894 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1895 csBundle(i + 1).srcType(2) := SrcType.vp 1896 // lsrc2 is old vd 1897 csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1898 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1899 csBundle(i + 1).uopIdx := i.U 1900 csBundle(i + 1).vlsInstr := true.B 1901 } 1902 }.otherwise{ 1903 // nf > 1, is segment indexed load/store 1904 // gen src0, vd 1905 switch(simple_lmul) { 1906 is(0.U) { 1907 switch(nf) { 1908 is(1.U) { 1909 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2) 1910 } 1911 is(2.U) { 1912 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3) 1913 } 1914 is(3.U) { 1915 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4) 1916 } 1917 is(4.U) { 1918 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5) 1919 } 1920 is(5.U) { 1921 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6) 1922 } 1923 is(6.U) { 1924 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7) 1925 } 1926 is(7.U) { 1927 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8) 1928 } 1929 } 1930 } 1931 is(1.U) { 1932 switch(nf) { 1933 is(1.U) { 1934 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2) 1935 } 1936 is(2.U) { 1937 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3) 1938 } 1939 is(3.U) { 1940 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4) 1941 } 1942 } 1943 } 1944 is(2.U) { 1945 switch(nf) { 1946 is(1.U) { 1947 genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2) 1948 } 1949 } 1950 } 1951 } 1952 1953 // gen src1 1954 switch(simple_emul) { 1955 is(0.U) { 1956 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1) 1957 } 1958 is(1.U) { 1959 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2) 1960 } 1961 is(2.U) { 1962 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4) 1963 } 1964 is(3.U) { 1965 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8) 1966 } 1967 } 1968 1969 // when is vstore instructions, not set vecwen 1970 when(isVstore) { 1971 for (i <- 0 until MAX_VLMUL) { 1972 csBundle(i + 1).vecWen := false.B 1973 } 1974 } 1975 } 1976 csBundle.head.waitForward := isIxSegment 1977 csBundle(numOfUop - 1.U).blockBackward := isIxSegment 1978 } 1979 } 1980 1981 //readyFromRename Counter 1982 val readyCounter = Mux(outReadys.head, RenameWidth.U, 0.U) 1983 1984 // The left uops of the complex inst in ComplexDecoder can be send out this cycle 1985 val thisAllOut = uopRes <= readyCounter 1986 1987 val count = RegInit(0.U(log2Up(maxUopSize/RenameWidth + 1).W)) 1988 val countNext = WireInit(count) 1989 1990 switch(state) { 1991 is(s_idle) { 1992 when (inValid) { 1993 stateNext := s_active 1994 uopResNext := inUopInfo.numOfUop 1995 countNext := 0.U 1996 } 1997 } 1998 is(s_active) { 1999 when (thisAllOut) { 2000 when (inValid) { 2001 stateNext := s_active 2002 uopResNext := inUopInfo.numOfUop 2003 }.otherwise { 2004 stateNext := s_idle 2005 uopResNext := 0.U 2006 } 2007 countNext := 0.U 2008 }.otherwise { 2009 stateNext := s_active 2010 uopResNext := uopRes - readyCounter 2011 countNext := count + outReadys.head.asUInt 2012 } 2013 } 2014 } 2015 2016 state := Mux(io.redirect, s_idle, stateNext) 2017 uopRes := Mux(io.redirect, 0.U, uopResNext) 2018 count := Mux(io.redirect, 0.U, countNext) 2019 2020 val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes) 2021 2022 fixedDecodedInst := csBundle 2023 2024 // when vstart is not zero, the last uop will modify vstart to zero 2025 // therefore, blockback and flush pipe 2026 fixedDecodedInst(numOfUop - 1.U).flushPipe := (vstartReg =/= 0.U) || latchedInst.flushPipe 2027 val uopsSeq = (0 until RenameWidth).map(i => VecInit(fixedDecodedInst.zipWithIndex.filter(_._2 % RenameWidth == i).map(_._1))) 2028 for(i <- 0 until RenameWidth) { 2029 outValids(i) := complexNum > i.U 2030 outDecodedInsts(i) := uopsSeq(i)(count) 2031 } 2032 2033 outComplexNum := Mux(state === s_active, complexNum, 0.U) 2034 inReady := state === s_idle || state === s_active && thisAllOut 2035 2036 2037 XSError(inValid && inUopInfo.numOfUop === 0.U, 2038 p"uop number ${inUopInfo.numOfUop} is illegal, cannot be zero") 2039// val validSimple = Wire(Vec(DecodeWidth, Bool())) 2040// validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 2041// val notInf = Wire(Vec(DecodeWidth, Bool())) 2042// notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 2043// notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 2044// val notInfVec = Wire(Vec(DecodeWidth, Bool())) 2045// notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 2046// 2047// complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 2048// Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 2049// 0.U) 2050// validToRename.zipWithIndex.foreach{ 2051// case(dst, i) => 2052// val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 2053// dst := MuxCase(false.B, Seq( 2054// (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 2055// (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 2056// ).toSeq) 2057// } 2058// 2059// readyToIBuf.zipWithIndex.foreach { 2060// case (dst, i) => 2061// val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 2062// dst := MuxCase(true.B, Seq( 2063// (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 2064// (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 2065// ).toSeq) 2066// } 2067// 2068// io.deq.decodedInsts := decodedInsts 2069// io.deq.complexNum := complexNum 2070// io.deq.validToRename := validToRename 2071// io.deq.readyToIBuf := readyToIBuf 2072} 2073