1/*************************************************************************************** 2 * Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3 * Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4 * Copyright (c) 2020-2021 Peng Cheng Laboratory 5 * 6 * XiangShan is licensed under Mulan PSL v2. 7 * You can use this software according to the terms and conditions of the Mulan PSL v2. 8 * You may obtain a copy of Mulan PSL v2 at: 9 * http://license.coscl.org.cn/MulanPSL2 10 * 11 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14 * 15 * See the Mulan PSL v2 for more details. 16 ***************************************************************************************/ 17 18package xiangshan.backend.decode 19 20import org.chipsalliance.cde.config.Parameters 21import chisel3._ 22import chisel3.util._ 23import freechips.rocketchip.rocket.Instructions 24import freechips.rocketchip.util.uintToBitPat 25import utils._ 26import utility._ 27import xiangshan.ExceptionNO.illegalInstr 28import xiangshan._ 29import xiangshan.backend.fu.fpu.FPU 30import xiangshan.backend.fu.FuType 31import freechips.rocketchip.rocket.Instructions._ 32import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 33import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 34import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul, Vl} 35import yunsuan.VpermType 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(4.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={ 43 // only consider non segment indexed load/store 44 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 45 var offset = 1 << (emul - lmul) 46 for (i <- 0 until (1 << emul)) { 47 if (uopIdx == i) { 48 return (i, i / offset) 49 } 50 } 51 } else { // lmul > emul, uop num is depend on lmul * nf 52 var offset = 1 << (lmul - emul) 53 for (i <- 0 until (1 << lmul)) { 54 if (uopIdx == i) { 55 return (i / offset, i) 56 } 57 } 58 } 59 return (0, 0) 60 } 61 // strided load/store 62 var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq() 63 for (emul <- 0 until 4) { 64 for (lmul <- 0 until 4) { 65 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx) 66 var offsetVs2 = offset._1 67 var offsetVd = offset._2 68 combVemulNf :+= (emul, lmul, offsetVs2, offsetVd) 69 } 70 } 71 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 72 case (emul, lmul, offsetVs2, offsetVd) => 73 (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W))) 74 }, BitPat.N(6))) 75 outOffsetVs2 := out(5, 3) 76 outOffsetVd := out(2, 0) 77} 78 79trait VectorConstants { 80 val MAX_VLMUL = 8 81 val VECTOR_TMP_REG_LMUL = 32 // 32~46 -> 15 82 val VECTOR_COMPRESS = 1 // in v0 regfile 83 val MAX_INDEXED_LS_UOPNUM = 64 84} 85 86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 87 val redirect = Input(Bool()) 88 val csrCtrl = Input(new CustomCSRCtrlIO) 89 val vtypeBypass = Input(new VType) 90 // When the first inst in decode vector is complex inst, pass it in 91 val in = Flipped(DecoupledIO(new Bundle { 92 val simpleDecodedInst = new DecodedInst 93 val uopInfo = new UopInfo 94 })) 95 val out = new Bundle { 96 val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst)) 97 } 98 val complexNum = Output(UInt(3.W)) 99} 100 101/** 102 * @author zly 103 */ 104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 105 val io = IO(new DecodeUnitCompIO) 106 107 // alias 108 private val inReady = io.in.ready 109 private val inValid = io.in.valid 110 private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst) 111 private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields) 112 private val inUopInfo = io.in.bits.uopInfo 113 private val outValids = io.out.complexDecodedInsts.map(_.valid) 114 private val outReadys = io.out.complexDecodedInsts.map(_.ready) 115 private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits) 116 private val outComplexNum = io.complexNum 117 118 val maxUopSize = MaxUopSize 119 when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) { 120 when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) { 121 inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType) 122 }.elsewhen(inInstFields.RS1 === 0.U) { 123 inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType) 124 } 125 } 126 127 val latchedInst = RegEnable(inDecodedInst, inValid && inReady) 128 val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady) 129 //input bits 130 private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields) 131 132 val src1 = Cat(0.U(1.W), instFields.RS1) 133 val src2 = Cat(0.U(1.W), instFields.RS2) 134 val dest = Cat(0.U(1.W), instFields.RD) 135 136 val nf = instFields.NF 137 val width = instFields.WIDTH(1, 0) 138 139 //output of DecodeUnit 140 val numOfUop = Wire(UInt(log2Up(maxUopSize).W)) 141 val numOfWB = Wire(UInt(log2Up(maxUopSize).W)) 142 val lmul = Wire(UInt(4.W)) 143 val isVsetSimple = Wire(Bool()) 144 145 val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i))) 146 indexedLSRegOffset.map(_.src := 0.U) 147 148 //pre decode 149 lmul := latchedUopInfo.lmul 150 isVsetSimple := latchedInst.isVset 151 val vlmulReg = latchedInst.vpu.vlmul 152 val vsewReg = latchedInst.vpu.vsew 153 val vstartReg = latchedInst.vpu.vstart 154 155 //Type of uop Div 156 val typeOfSplit = latchedInst.uopSplitType 157 val src1Type = latchedInst.srcType(0) 158 val src1IsImm = src1Type === SrcType.imm 159 val src1IsFp = src1Type === SrcType.fp 160 161 val isVstore = FuType.isVStore(latchedInst.fuType) 162 163 // exception generator 164 val vecException = Module(new VecExceptionGen) 165 vecException.io.inst := latchedInst.instr 166 vecException.io.decodedInst := latchedInst 167 vecException.io.vtype := latchedInst.vpu.vtype 168 vecException.io.vstart := latchedInst.vpu.vstart 169 val illegalInst = vecException.io.illegalInst 170 171 numOfUop := latchedUopInfo.numOfUop 172 numOfWB := latchedUopInfo.numOfWB 173 174 //uops dispatch 175 val s_idle :: s_active :: Nil = Enum(2) 176 val state = RegInit(s_idle) 177 val stateNext = WireDefault(state) 178 val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W)) 179 val uopRes = RegInit(0.U(log2Up(maxUopSize).W)) 180 val uopResNext = WireInit(uopRes) 181 val e64 = 3.U(2.W) 182 val isUsSegment = instFields.MOP === 0.U && ((nf =/= 0.U && instFields.LUMOP === 0.U) || instFields.LUMOP === "b10000".U) 183 val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U 184 val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U 185 186 //uop div up to maxUopSize 187 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 188 val fixedDecodedInst = Wire(Vec(maxUopSize, new DecodedInst)) 189 190 csBundle.foreach { case dst => 191 dst := latchedInst 192 dst.numUops := latchedUopInfo.numOfUop 193 dst.numWB := latchedUopInfo.numOfWB 194 dst.exceptionVec(ExceptionNO.EX_II) := latchedInst.exceptionVec(ExceptionNO.EX_II) || illegalInst 195 dst.firstUop := false.B 196 dst.lastUop := false.B 197 dst.vlsInstr := false.B 198 } 199 200 csBundle(0).firstUop := true.B 201 csBundle(numOfUop - 1.U).lastUop := true.B 202 203 // when vstart is not zero, the last uop will modify vstart to zero 204 // therefore, blockback and flush pipe 205 csBundle(numOfUop - 1.U).blockBackward := vstartReg =/= 0.U 206 csBundle(0.U).flushPipe := vstartReg =/= 0.U 207 208 switch(typeOfSplit) { 209 is(UopSplitType.VSET) { 210 // In simple decoder, rfWen and vecWen are not set 211 when(isVsetSimple) { 212 // Default 213 // uop0 set rd, never flushPipe 214 csBundle(0).fuType := FuType.vsetiwi.U 215 csBundle(0).flushPipe := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U) 216 csBundle(0).blockBackward := false.B 217 csBundle(0).rfWen := true.B 218 // uop1 set vl, vsetvl will flushPipe 219 csBundle(1).ldest := Vl_IDX.U 220 csBundle(1).vecWen := false.B 221 csBundle(1).vlWen := true.B 222 csBundle(1).flushPipe := false.B 223 csBundle(1).blockBackward := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U) 224 when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 225 // write nothing, uop0 is a nop instruction 226 csBundle(0).rfWen := false.B 227 csBundle(0).fpWen := false.B 228 csBundle(0).vecWen := false.B 229 csBundle(0).vlWen := false.B 230 csBundle(1).fuType := FuType.vsetfwf.U 231 csBundle(1).srcType(0) := SrcType.no 232 csBundle(1).srcType(2) := SrcType.no 233 csBundle(1).srcType(3) := SrcType.no 234 csBundle(1).srcType(4) := SrcType.vp 235 csBundle(1).lsrc(4) := Vl_IDX.U 236 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 237 // uop0: mv vtype gpr to vector region 238 csBundle(0).srcType(0) := SrcType.xp 239 csBundle(0).srcType(1) := SrcType.no 240 csBundle(0).lsrc(0) := src2 241 csBundle(0).lsrc(1) := 0.U 242 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 243 csBundle(0).fuType := FuType.i2v.U 244 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 245 csBundle(0).rfWen := false.B 246 csBundle(0).fpWen := false.B 247 csBundle(0).vecWen := true.B 248 csBundle(0).vlWen := false.B 249 // uop1: uvsetvcfg_vv 250 csBundle(1).fuType := FuType.vsetfwf.U 251 // vl 252 csBundle(1).srcType(0) := SrcType.no 253 csBundle(1).srcType(2) := SrcType.no 254 csBundle(1).srcType(3) := SrcType.no 255 csBundle(1).srcType(4) := SrcType.vp 256 csBundle(1).lsrc(4) := Vl_IDX.U 257 // vtype 258 csBundle(1).srcType(1) := SrcType.vp 259 csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U 260 csBundle(1).vecWen := false.B 261 csBundle(1).vlWen := true.B 262 csBundle(1).ldest := Vl_IDX.U 263 }.elsewhen(dest === 0.U) { 264 // write nothing, uop0 is a nop instruction 265 csBundle(0).rfWen := false.B 266 csBundle(0).fpWen := false.B 267 csBundle(0).vecWen := false.B 268 csBundle(0).vlWen := false.B 269 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) { 270 // because vsetvl may modified src2 when src2 == rd, 271 // we need to modify vd in second uop to avoid dependency 272 // uop0 set vl 273 csBundle(0).fuType := FuType.vsetiwf.U 274 csBundle(0).ldest := Vl_IDX.U 275 csBundle(0).rfWen := false.B 276 csBundle(0).vlWen := true.B 277 // uop1 set rd 278 csBundle(1).fuType := FuType.vsetiwi.U 279 csBundle(1).ldest := dest 280 csBundle(1).rfWen := true.B 281 csBundle(1).vlWen := false.B 282 } 283 // use bypass vtype from vtypeGen 284 csBundle(0).vpu.connectVType(io.vtypeBypass) 285 csBundle(1).vpu.connectVType(io.vtypeBypass) 286 } 287 } 288 is(UopSplitType.VEC_VVV) { 289 for (i <- 0 until MAX_VLMUL) { 290 csBundle(i).lsrc(0) := src1 + i.U 291 csBundle(i).lsrc(1) := src2 + i.U 292 csBundle(i).lsrc(2) := dest + i.U 293 csBundle(i).ldest := dest + i.U 294 csBundle(i).uopIdx := i.U 295 } 296 } 297 is(UopSplitType.VEC_VFV) { 298 /* 299 f to vector move 300 */ 301 csBundle(0).srcType(0) := SrcType.fp 302 csBundle(0).srcType(1) := SrcType.imm 303 csBundle(0).srcType(2) := SrcType.imm 304 csBundle(0).lsrc(1) := 0.U 305 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 306 csBundle(0).fuType := FuType.f2v.U 307 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 308 csBundle(0).vecWen := true.B 309 csBundle(0).vpu.isReverse := false.B 310 /* 311 LMUL 312 */ 313 for (i <- 0 until MAX_VLMUL) { 314 csBundle(i + 1).srcType(0) := SrcType.vp 315 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 316 csBundle(i + 1).lsrc(1) := src2 + i.U 317 csBundle(i + 1).lsrc(2) := dest + i.U 318 csBundle(i + 1).ldest := dest + i.U 319 csBundle(i + 1).uopIdx := i.U 320 } 321 } 322 is(UopSplitType.VEC_EXT2) { 323 for (i <- 0 until MAX_VLMUL / 2) { 324 csBundle(2 * i).lsrc(1) := src2 + i.U 325 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 326 csBundle(2 * i).ldest := dest + (2 * i).U 327 csBundle(2 * i).uopIdx := (2 * i).U 328 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 329 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 330 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 331 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 332 } 333 } 334 is(UopSplitType.VEC_EXT4) { 335 for (i <- 0 until MAX_VLMUL / 4) { 336 csBundle(4 * i).lsrc(1) := src2 + i.U 337 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 338 csBundle(4 * i).ldest := dest + (4 * i).U 339 csBundle(4 * i).uopIdx := (4 * i).U 340 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 341 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 342 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 343 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 344 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 345 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 346 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 347 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 348 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 349 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 350 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 351 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 352 } 353 } 354 is(UopSplitType.VEC_EXT8) { 355 for (i <- 0 until MAX_VLMUL) { 356 csBundle(i).lsrc(1) := src2 357 csBundle(i).lsrc(2) := dest + i.U 358 csBundle(i).ldest := dest + i.U 359 csBundle(i).uopIdx := i.U 360 } 361 } 362 is(UopSplitType.VEC_0XV) { 363 /* 364 i/f to vector move 365 */ 366 csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg) 367 csBundle(0).srcType(1) := SrcType.imm 368 csBundle(0).srcType(2) := SrcType.imm 369 csBundle(0).lsrc(1) := 0.U 370 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 371 csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U) 372 csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 373 csBundle(0).rfWen := false.B 374 csBundle(0).fpWen := false.B 375 csBundle(0).vecWen := true.B 376 /* 377 vmv.s.x 378 */ 379 csBundle(1).srcType(0) := SrcType.vp 380 csBundle(1).srcType(1) := SrcType.imm 381 csBundle(1).srcType(2) := SrcType.vp 382 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 383 csBundle(1).lsrc(1) := 0.U 384 csBundle(1).lsrc(2) := dest 385 csBundle(1).ldest := dest 386 csBundle(1).rfWen := false.B 387 csBundle(1).fpWen := false.B 388 csBundle(1).vecWen := true.B 389 csBundle(1).uopIdx := 0.U 390 } 391 is(UopSplitType.VEC_VXV) { 392 /* 393 i to vector move 394 */ 395 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 396 csBundle(0).srcType(1) := SrcType.imm 397 csBundle(0).srcType(2) := SrcType.imm 398 csBundle(0).lsrc(1) := 0.U 399 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 400 csBundle(0).fuType := FuType.i2v.U 401 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 402 csBundle(0).vecWen := true.B 403 csBundle(0).vpu.isReverse := false.B 404 /* 405 LMUL 406 */ 407 for (i <- 0 until MAX_VLMUL) { 408 csBundle(i + 1).srcType(0) := SrcType.vp 409 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 410 csBundle(i + 1).lsrc(1) := src2 + i.U 411 csBundle(i + 1).lsrc(2) := dest + i.U 412 csBundle(i + 1).ldest := dest + i.U 413 csBundle(i + 1).uopIdx := i.U 414 } 415 } 416 is(UopSplitType.VEC_VVW) { 417 for (i <- 0 until MAX_VLMUL / 2) { 418 csBundle(2 * i).lsrc(0) := src1 + i.U 419 csBundle(2 * i).lsrc(1) := src2 + i.U 420 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 421 csBundle(2 * i).ldest := dest + (2 * i).U 422 csBundle(2 * i).uopIdx := (2 * i).U 423 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 424 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 425 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 426 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 427 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 428 } 429 } 430 is(UopSplitType.VEC_VFW) { 431 /* 432 f to vector move 433 */ 434 csBundle(0).srcType(0) := SrcType.fp 435 csBundle(0).srcType(1) := SrcType.imm 436 csBundle(0).srcType(2) := SrcType.imm 437 csBundle(0).lsrc(1) := 0.U 438 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 439 csBundle(0).fuType := FuType.f2v.U 440 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 441 csBundle(0).rfWen := false.B 442 csBundle(0).fpWen := false.B 443 csBundle(0).vecWen := true.B 444 445 for (i <- 0 until MAX_VLMUL / 2) { 446 csBundle(2 * i + 1).srcType(0) := SrcType.vp 447 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 448 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 449 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 450 csBundle(2 * i + 1).ldest := dest + (2 * i).U 451 csBundle(2 * i + 1).uopIdx := (2 * i).U 452 csBundle(2 * i + 2).srcType(0) := SrcType.vp 453 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 454 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 455 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 456 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 457 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 458 } 459 } 460 is(UopSplitType.VEC_WVW) { 461 for (i <- 0 until MAX_VLMUL / 2) { 462 csBundle(2 * i).lsrc(0) := src1 + i.U 463 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 464 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 465 csBundle(2 * i).ldest := dest + (2 * i).U 466 csBundle(2 * i).uopIdx := (2 * i).U 467 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 468 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 469 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 470 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 471 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 472 } 473 } 474 is(UopSplitType.VEC_VXW) { 475 /* 476 i to vector move 477 */ 478 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 479 csBundle(0).srcType(1) := SrcType.imm 480 csBundle(0).srcType(2) := SrcType.imm 481 csBundle(0).lsrc(1) := 0.U 482 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 483 csBundle(0).fuType := FuType.i2v.U 484 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 485 csBundle(0).vecWen := true.B 486 487 for (i <- 0 until MAX_VLMUL / 2) { 488 csBundle(2 * i + 1).srcType(0) := SrcType.vp 489 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 490 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 491 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 492 csBundle(2 * i + 1).ldest := dest + (2 * i).U 493 csBundle(2 * i + 1).uopIdx := (2 * i).U 494 csBundle(2 * i + 2).srcType(0) := SrcType.vp 495 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 496 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 497 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 498 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 499 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 500 } 501 } 502 is(UopSplitType.VEC_WXW) { 503 /* 504 i to vector move 505 */ 506 csBundle(0).srcType(0) := SrcType.reg 507 csBundle(0).srcType(1) := SrcType.imm 508 csBundle(0).srcType(2) := SrcType.imm 509 csBundle(0).lsrc(1) := 0.U 510 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 511 csBundle(0).fuType := FuType.i2v.U 512 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 513 csBundle(0).vecWen := true.B 514 515 for (i <- 0 until MAX_VLMUL / 2) { 516 csBundle(2 * i + 1).srcType(0) := SrcType.vp 517 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 518 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 519 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 520 csBundle(2 * i + 1).ldest := dest + (2 * i).U 521 csBundle(2 * i + 1).uopIdx := (2 * i).U 522 csBundle(2 * i + 2).srcType(0) := SrcType.vp 523 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 524 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 525 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 526 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 527 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 528 } 529 } 530 is(UopSplitType.VEC_WVV) { 531 for (i <- 0 until MAX_VLMUL / 2) { 532 533 csBundle(2 * i).lsrc(0) := src1 + i.U 534 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 535 csBundle(2 * i).lsrc(2) := dest + i.U 536 csBundle(2 * i).ldest := dest + i.U 537 csBundle(2 * i).uopIdx := (2 * i).U 538 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 539 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 540 csBundle(2 * i + 1).lsrc(2) := dest + i.U 541 csBundle(2 * i + 1).ldest := dest + i.U 542 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 543 } 544 } 545 is(UopSplitType.VEC_WFW) { 546 /* 547 f to vector move 548 */ 549 csBundle(0).srcType(0) := SrcType.fp 550 csBundle(0).srcType(1) := SrcType.imm 551 csBundle(0).srcType(2) := SrcType.imm 552 csBundle(0).lsrc(1) := 0.U 553 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 554 csBundle(0).fuType := FuType.f2v.U 555 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 556 csBundle(0).rfWen := false.B 557 csBundle(0).fpWen := false.B 558 csBundle(0).vecWen := true.B 559 560 for (i <- 0 until MAX_VLMUL / 2) { 561 csBundle(2 * i + 1).srcType(0) := SrcType.vp 562 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 563 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 564 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 565 csBundle(2 * i + 1).ldest := dest + (2 * i).U 566 csBundle(2 * i + 1).uopIdx := (2 * i).U 567 csBundle(2 * i + 2).srcType(0) := SrcType.vp 568 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 569 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 570 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 571 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 572 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 573 } 574 } 575 is(UopSplitType.VEC_WXV) { 576 /* 577 i to vector move 578 */ 579 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 580 csBundle(0).srcType(1) := SrcType.imm 581 csBundle(0).srcType(2) := SrcType.imm 582 csBundle(0).lsrc(1) := 0.U 583 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 584 csBundle(0).fuType := FuType.i2v.U 585 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 586 csBundle(0).vecWen := true.B 587 588 for (i <- 0 until MAX_VLMUL / 2) { 589 csBundle(2 * i + 1).srcType(0) := SrcType.vp 590 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 591 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 592 csBundle(2 * i + 1).lsrc(2) := dest + i.U 593 csBundle(2 * i + 1).ldest := dest + i.U 594 csBundle(2 * i + 1).uopIdx := (2 * i).U 595 csBundle(2 * i + 2).srcType(0) := SrcType.vp 596 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 597 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 598 csBundle(2 * i + 2).lsrc(2) := dest + i.U 599 csBundle(2 * i + 2).ldest := dest + i.U 600 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 601 } 602 } 603 is(UopSplitType.VEC_VVM) { 604 csBundle(0).lsrc(2) := dest 605 csBundle(0).ldest := dest 606 csBundle(0).uopIdx := 0.U 607 for (i <- 1 until MAX_VLMUL) { 608 csBundle(i).lsrc(0) := src1 + i.U 609 csBundle(i).lsrc(1) := src2 + i.U 610 csBundle(i).lsrc(2) := dest 611 csBundle(i).ldest := dest 612 csBundle(i).uopIdx := i.U 613 } 614 } 615 is(UopSplitType.VEC_VFM) { 616 /* 617 f to vector move 618 */ 619 csBundle(0).srcType(0) := SrcType.fp 620 csBundle(0).srcType(1) := SrcType.imm 621 csBundle(0).srcType(2) := SrcType.imm 622 csBundle(0).lsrc(1) := 0.U 623 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 624 csBundle(0).fuType := FuType.f2v.U 625 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 626 csBundle(0).rfWen := false.B 627 csBundle(0).fpWen := false.B 628 csBundle(0).vecWen := true.B 629 //LMUL 630 csBundle(1).srcType(0) := SrcType.vp 631 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 632 csBundle(1).lsrc(2) := dest 633 csBundle(1).ldest := dest 634 csBundle(1).uopIdx := 0.U 635 for (i <- 1 until MAX_VLMUL) { 636 csBundle(i + 1).srcType(0) := SrcType.vp 637 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 638 csBundle(i + 1).lsrc(1) := src2 + i.U 639 csBundle(i + 1).lsrc(2) := dest 640 csBundle(i + 1).ldest := dest 641 csBundle(i + 1).uopIdx := i.U 642 } 643 csBundle(numOfUop - 1.U).ldest := dest 644 } 645 is(UopSplitType.VEC_VXM) { 646 /* 647 i to vector move 648 */ 649 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 650 csBundle(0).srcType(1) := SrcType.imm 651 csBundle(0).srcType(2) := SrcType.imm 652 csBundle(0).lsrc(1) := 0.U 653 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 654 csBundle(0).fuType := FuType.i2v.U 655 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 656 csBundle(0).vecWen := true.B 657 //LMUL 658 csBundle(1).srcType(0) := SrcType.vp 659 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 660 csBundle(1).lsrc(2) := dest 661 csBundle(1).ldest := dest 662 csBundle(1).uopIdx := 0.U 663 for (i <- 1 until MAX_VLMUL) { 664 csBundle(i + 1).srcType(0) := SrcType.vp 665 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 666 csBundle(i + 1).lsrc(1) := src2 + i.U 667 csBundle(i + 1).lsrc(2) := dest 668 csBundle(i + 1).ldest := dest 669 csBundle(i + 1).uopIdx := i.U 670 } 671 csBundle(numOfUop - 1.U).ldest := dest 672 } 673 is(UopSplitType.VEC_SLIDE1UP) { 674 /* 675 i to vector move 676 */ 677 csBundle(0).srcType(0) := SrcType.reg 678 csBundle(0).srcType(1) := SrcType.imm 679 csBundle(0).srcType(2) := SrcType.imm 680 csBundle(0).lsrc(1) := 0.U 681 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 682 csBundle(0).fuType := FuType.i2v.U 683 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 684 csBundle(0).vecWen := true.B 685 //LMUL 686 csBundle(1).srcType(0) := SrcType.vp 687 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 688 csBundle(1).lsrc(2) := dest 689 csBundle(1).ldest := dest 690 csBundle(1).uopIdx := 0.U 691 for (i <- 1 until MAX_VLMUL) { 692 csBundle(i + 1).srcType(0) := SrcType.vp 693 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 694 csBundle(i + 1).lsrc(1) := src2 + i.U 695 csBundle(i + 1).lsrc(2) := dest + i.U 696 csBundle(i + 1).ldest := dest + i.U 697 csBundle(i + 1).uopIdx := i.U 698 } 699 } 700 is(UopSplitType.VEC_FSLIDE1UP) { 701 /* 702 f to vector move 703 */ 704 csBundle(0).srcType(0) := SrcType.fp 705 csBundle(0).srcType(1) := SrcType.imm 706 csBundle(0).srcType(2) := SrcType.imm 707 csBundle(0).lsrc(1) := 0.U 708 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 709 csBundle(0).fuType := FuType.f2v.U 710 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 711 csBundle(0).rfWen := false.B 712 csBundle(0).fpWen := false.B 713 csBundle(0).vecWen := true.B 714 //LMUL 715 csBundle(1).srcType(0) := SrcType.vp 716 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 717 csBundle(1).lsrc(1) := src2 718 csBundle(1).lsrc(2) := dest 719 csBundle(1).ldest := dest 720 csBundle(1).uopIdx := 0.U 721 for (i <- 1 until MAX_VLMUL) { 722 csBundle(i + 1).srcType(0) := SrcType.vp 723 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 724 csBundle(i + 1).lsrc(1) := src2 + i.U 725 csBundle(i + 1).lsrc(2) := dest + i.U 726 csBundle(i + 1).ldest := dest + i.U 727 csBundle(i + 1).uopIdx := i.U 728 } 729 } 730 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 731 /* 732 i to vector move 733 */ 734 csBundle(0).srcType(0) := SrcType.reg 735 csBundle(0).srcType(1) := SrcType.imm 736 csBundle(0).srcType(2) := SrcType.imm 737 csBundle(0).lsrc(1) := 0.U 738 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 739 csBundle(0).fuType := FuType.i2v.U 740 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 741 csBundle(0).vecWen := true.B 742 //LMUL 743 for (i <- 0 until MAX_VLMUL) { 744 csBundle(2 * i + 1).srcType(0) := SrcType.vp 745 csBundle(2 * i + 1).srcType(1) := SrcType.vp 746 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 747 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 748 csBundle(2 * i + 1).lsrc(2) := dest + i.U 749 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 750 csBundle(2 * i + 1).uopIdx := (2 * i).U 751 if (2 * i + 2 < MAX_VLMUL * 2) { 752 csBundle(2 * i + 2).srcType(0) := SrcType.vp 753 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 754 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 755 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 756 csBundle(2 * i + 2).ldest := dest + i.U 757 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 758 } 759 } 760 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 761 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 762 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 763 } 764 is(UopSplitType.VEC_FSLIDE1DOWN) { 765 /* 766 f to vector move 767 */ 768 csBundle(0).srcType(0) := SrcType.fp 769 csBundle(0).srcType(1) := SrcType.imm 770 csBundle(0).srcType(2) := SrcType.imm 771 csBundle(0).lsrc(1) := 0.U 772 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 773 csBundle(0).fuType := FuType.f2v.U 774 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 775 csBundle(0).rfWen := false.B 776 csBundle(0).fpWen := false.B 777 csBundle(0).vecWen := true.B 778 //LMUL 779 for (i <- 0 until MAX_VLMUL) { 780 csBundle(2 * i + 1).srcType(0) := SrcType.vp 781 csBundle(2 * i + 1).srcType(1) := SrcType.vp 782 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 783 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 784 csBundle(2 * i + 1).lsrc(2) := dest + i.U 785 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 786 csBundle(2 * i + 1).uopIdx := (2 * i).U 787 if (2 * i + 2 < MAX_VLMUL * 2) { 788 csBundle(2 * i + 2).srcType(0) := SrcType.vp 789 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 790 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 791 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 792 csBundle(2 * i + 2).ldest := dest + i.U 793 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 794 } 795 } 796 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 797 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 798 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 799 } 800 is(UopSplitType.VEC_VRED) { 801 when(vlmulReg === "b001".U) { 802 csBundle(0).srcType(2) := SrcType.DC 803 csBundle(0).lsrc(0) := src2 + 1.U 804 csBundle(0).lsrc(1) := src2 805 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 806 csBundle(0).uopIdx := 0.U 807 } 808 when(vlmulReg === "b010".U) { 809 csBundle(0).srcType(2) := SrcType.DC 810 csBundle(0).lsrc(0) := src2 + 1.U 811 csBundle(0).lsrc(1) := src2 812 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 813 csBundle(0).uopIdx := 0.U 814 815 csBundle(1).srcType(2) := SrcType.DC 816 csBundle(1).lsrc(0) := src2 + 3.U 817 csBundle(1).lsrc(1) := src2 + 2.U 818 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 819 csBundle(1).uopIdx := 1.U 820 821 csBundle(2).srcType(2) := SrcType.DC 822 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 823 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 824 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 825 csBundle(2).uopIdx := 2.U 826 } 827 when(vlmulReg === "b011".U) { 828 for (i <- 0 until MAX_VLMUL) { 829 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 830 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 831 csBundle(i).lsrc(1) := src2 + (i * 2).U 832 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 833 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 834 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 835 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 836 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 837 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 838 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 839 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 840 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 841 } 842 csBundle(i).srcType(2) := SrcType.DC 843 csBundle(i).uopIdx := i.U 844 } 845 } 846 when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) { 847 /* 848 * 2 <= vlmul <= 8 849 */ 850 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 851 csBundle(numOfUop - 1.U).lsrc(0) := src1 852 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 853 csBundle(numOfUop - 1.U).lsrc(2) := dest 854 csBundle(numOfUop - 1.U).ldest := dest 855 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 856 } 857 } 858 is(UopSplitType.VEC_VFRED) { 859 val vlmul = vlmulReg 860 val vsew = vsewReg 861 when(vlmul === VLmul.m8){ 862 for (i <- 0 until 4) { 863 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 864 csBundle(i).lsrc(1) := src2 + (i * 2).U 865 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 866 csBundle(i).uopIdx := i.U 867 } 868 for (i <- 4 until 6) { 869 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 870 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 871 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 872 csBundle(i).uopIdx := i.U 873 } 874 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 875 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 876 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 877 csBundle(6).uopIdx := 6.U 878 when(vsew === VSew.e64) { 879 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 880 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 881 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 882 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 883 csBundle(7).uopIdx := 7.U 884 csBundle(8).lsrc(0) := src1 885 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 886 csBundle(8).ldest := dest 887 csBundle(8).uopIdx := 8.U 888 } 889 when(vsew === VSew.e32) { 890 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 891 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 892 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 893 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 894 csBundle(7).uopIdx := 7.U 895 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 896 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 897 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 898 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 899 csBundle(8).uopIdx := 8.U 900 csBundle(9).lsrc(0) := src1 901 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 902 csBundle(9).ldest := dest 903 csBundle(9).uopIdx := 9.U 904 } 905 when(vsew === VSew.e16) { 906 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 907 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 908 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 909 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 910 csBundle(7).uopIdx := 7.U 911 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 912 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 913 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 914 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 915 csBundle(8).uopIdx := 8.U 916 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 917 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 918 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 919 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 920 csBundle(9).uopIdx := 9.U 921 csBundle(10).lsrc(0) := src1 922 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 923 csBundle(10).ldest := dest 924 csBundle(10).uopIdx := 10.U 925 } 926 } 927 when(vlmul === VLmul.m4) { 928 for (i <- 0 until 2) { 929 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 930 csBundle(i).lsrc(1) := src2 + (i * 2).U 931 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 932 csBundle(i).uopIdx := i.U 933 } 934 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 935 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 936 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 937 csBundle(2).uopIdx := 2.U 938 when(vsew === VSew.e64) { 939 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 940 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 941 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 942 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 943 csBundle(3).uopIdx := 3.U 944 csBundle(4).lsrc(0) := src1 945 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 946 csBundle(4).ldest := dest 947 csBundle(4).uopIdx := 4.U 948 } 949 when(vsew === VSew.e32) { 950 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 951 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 952 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 953 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 954 csBundle(3).uopIdx := 3.U 955 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 956 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 957 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 958 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 959 csBundle(4).uopIdx := 4.U 960 csBundle(5).lsrc(0) := src1 961 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 962 csBundle(5).ldest := dest 963 csBundle(5).uopIdx := 5.U 964 } 965 when(vsew === VSew.e16) { 966 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 967 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 968 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 969 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 970 csBundle(3).uopIdx := 3.U 971 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 972 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 973 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 974 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 975 csBundle(4).uopIdx := 4.U 976 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 977 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 978 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 979 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 980 csBundle(5).uopIdx := 5.U 981 csBundle(6).lsrc(0) := src1 982 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 983 csBundle(6).ldest := dest 984 csBundle(6).uopIdx := 6.U 985 } 986 } 987 when(vlmul === VLmul.m2) { 988 csBundle(0).lsrc(0) := src2 + 1.U 989 csBundle(0).lsrc(1) := src2 + 0.U 990 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 991 csBundle(0).uopIdx := 0.U 992 when(vsew === VSew.e64) { 993 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 994 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 995 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 996 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 997 csBundle(1).uopIdx := 1.U 998 csBundle(2).lsrc(0) := src1 999 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1000 csBundle(2).ldest := dest 1001 csBundle(2).uopIdx := 2.U 1002 } 1003 when(vsew === VSew.e32) { 1004 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1005 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1006 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1007 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 1008 csBundle(1).uopIdx := 1.U 1009 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1010 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1011 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1012 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 1013 csBundle(2).uopIdx := 2.U 1014 csBundle(3).lsrc(0) := src1 1015 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1016 csBundle(3).ldest := dest 1017 csBundle(3).uopIdx := 3.U 1018 } 1019 when(vsew === VSew.e16) { 1020 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1021 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1022 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1023 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 1024 csBundle(1).uopIdx := 1.U 1025 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1026 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1027 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1028 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 1029 csBundle(2).uopIdx := 2.U 1030 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 1031 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1032 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 1033 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 1034 csBundle(3).uopIdx := 3.U 1035 csBundle(4).lsrc(0) := src1 1036 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 1037 csBundle(4).ldest := dest 1038 csBundle(4).uopIdx := 4.U 1039 } 1040 } 1041 when(vlmul === VLmul.m1) { 1042 when(vsew === VSew.e64) { 1043 csBundle(0).lsrc(0) := src2 1044 csBundle(0).lsrc(1) := src2 1045 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1046 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1047 csBundle(0).uopIdx := 0.U 1048 csBundle(1).lsrc(0) := src1 1049 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1050 csBundle(1).ldest := dest 1051 csBundle(1).uopIdx := 1.U 1052 } 1053 when(vsew === VSew.e32) { 1054 csBundle(0).lsrc(0) := src2 1055 csBundle(0).lsrc(1) := src2 1056 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1057 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1058 csBundle(0).uopIdx := 0.U 1059 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1060 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1061 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1062 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1063 csBundle(1).uopIdx := 1.U 1064 csBundle(2).lsrc(0) := src1 1065 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1066 csBundle(2).ldest := dest 1067 csBundle(2).uopIdx := 2.U 1068 } 1069 when(vsew === VSew.e16) { 1070 csBundle(0).lsrc(0) := src2 1071 csBundle(0).lsrc(1) := src2 1072 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1073 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1074 csBundle(0).uopIdx := 0.U 1075 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1076 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1077 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1078 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1079 csBundle(1).uopIdx := 1.U 1080 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1081 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1082 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1083 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 1084 csBundle(2).uopIdx := 2.U 1085 csBundle(3).lsrc(0) := src1 1086 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1087 csBundle(3).ldest := dest 1088 csBundle(3).uopIdx := 3.U 1089 } 1090 } 1091 when(vlmul === VLmul.mf2) { 1092 when(vsew === VSew.e32) { 1093 csBundle(0).lsrc(0) := src2 1094 csBundle(0).lsrc(1) := src2 1095 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1096 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1097 csBundle(0).uopIdx := 0.U 1098 csBundle(1).lsrc(0) := src1 1099 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1100 csBundle(1).ldest := dest 1101 csBundle(1).uopIdx := 1.U 1102 } 1103 when(vsew === VSew.e16) { 1104 csBundle(0).lsrc(0) := src2 1105 csBundle(0).lsrc(1) := src2 1106 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1107 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1108 csBundle(0).uopIdx := 0.U 1109 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1110 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1111 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1112 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 1113 csBundle(1).uopIdx := 1.U 1114 csBundle(2).lsrc(0) := src1 1115 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1116 csBundle(2).ldest := dest 1117 csBundle(2).uopIdx := 2.U 1118 } 1119 } 1120 when(vlmul === VLmul.mf4) { 1121 when(vsew === VSew.e16) { 1122 csBundle(0).lsrc(0) := src2 1123 csBundle(0).lsrc(1) := src2 1124 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1125 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 1126 csBundle(0).uopIdx := 0.U 1127 csBundle(1).lsrc(0) := src1 1128 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1129 csBundle(1).ldest := dest 1130 csBundle(1).uopIdx := 1.U 1131 } 1132 } 1133 } 1134 1135 is(UopSplitType.VEC_VFREDOSUM) { 1136 import yunsuan.VfaluType 1137 val vlmul = vlmulReg 1138 val vsew = vsewReg 1139 val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum 1140 when(vlmul === VLmul.m8) { 1141 when(vsew === VSew.e64) { 1142 val vlmax = 16 1143 for (i <- 0 until vlmax) { 1144 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1145 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1146 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1147 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1148 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1149 csBundle(i).uopIdx := i.U 1150 } 1151 } 1152 when(vsew === VSew.e32) { 1153 val vlmax = 32 1154 for (i <- 0 until vlmax) { 1155 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1156 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1157 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1158 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1159 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1160 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1161 csBundle(i).uopIdx := i.U 1162 } 1163 } 1164 when(vsew === VSew.e16) { 1165 val vlmax = 64 1166 for (i <- 0 until vlmax) { 1167 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1168 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1169 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1170 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1171 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1172 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1173 csBundle(i).uopIdx := i.U 1174 } 1175 } 1176 } 1177 when(vlmul === VLmul.m4) { 1178 when(vsew === VSew.e64) { 1179 val vlmax = 8 1180 for (i <- 0 until vlmax) { 1181 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1182 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1183 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1184 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1185 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1186 csBundle(i).uopIdx := i.U 1187 } 1188 } 1189 when(vsew === VSew.e32) { 1190 val vlmax = 16 1191 for (i <- 0 until vlmax) { 1192 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1193 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1194 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1195 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1196 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1197 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1198 csBundle(i).uopIdx := i.U 1199 } 1200 } 1201 when(vsew === VSew.e16) { 1202 val vlmax = 32 1203 for (i <- 0 until vlmax) { 1204 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1205 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1206 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1207 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1208 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1209 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1210 csBundle(i).uopIdx := i.U 1211 } 1212 } 1213 } 1214 when(vlmul === VLmul.m2) { 1215 when(vsew === VSew.e64) { 1216 val vlmax = 4 1217 for (i <- 0 until vlmax) { 1218 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1219 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1220 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1221 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1222 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1223 csBundle(i).uopIdx := i.U 1224 } 1225 } 1226 when(vsew === VSew.e32) { 1227 val vlmax = 8 1228 for (i <- 0 until vlmax) { 1229 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1230 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1231 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1232 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1233 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1234 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1235 csBundle(i).uopIdx := i.U 1236 } 1237 } 1238 when(vsew === VSew.e16) { 1239 val vlmax = 16 1240 for (i <- 0 until vlmax) { 1241 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1242 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1243 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1244 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1245 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1246 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1247 csBundle(i).uopIdx := i.U 1248 } 1249 } 1250 } 1251 when(vlmul === VLmul.m1) { 1252 when(vsew === VSew.e64) { 1253 val vlmax = 2 1254 for (i <- 0 until vlmax) { 1255 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1256 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1257 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1258 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1259 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1260 csBundle(i).uopIdx := i.U 1261 } 1262 } 1263 when(vsew === VSew.e32) { 1264 val vlmax = 4 1265 for (i <- 0 until vlmax) { 1266 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1267 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1268 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1269 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1270 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1271 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1272 csBundle(i).uopIdx := i.U 1273 } 1274 } 1275 when(vsew === VSew.e16) { 1276 val vlmax = 8 1277 for (i <- 0 until vlmax) { 1278 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1279 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1280 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1281 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1282 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1283 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1284 csBundle(i).uopIdx := i.U 1285 } 1286 } 1287 } 1288 when(vlmul === VLmul.mf2) { 1289 when(vsew === VSew.e32) { 1290 val vlmax = 2 1291 for (i <- 0 until vlmax) { 1292 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1293 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1294 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1295 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1296 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1297 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1298 csBundle(i).uopIdx := i.U 1299 } 1300 } 1301 when(vsew === VSew.e16) { 1302 val vlmax = 4 1303 for (i <- 0 until vlmax) { 1304 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1305 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1306 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1307 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1308 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1309 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1310 csBundle(i).uopIdx := i.U 1311 } 1312 } 1313 } 1314 when(vlmul === VLmul.mf4) { 1315 when(vsew === VSew.e16) { 1316 val vlmax = 2 1317 for (i <- 0 until vlmax) { 1318 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1319 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1320 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1321 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1322 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1323 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1324 csBundle(i).uopIdx := i.U 1325 } 1326 } 1327 } 1328 } 1329 1330 is(UopSplitType.VEC_SLIDEUP) { 1331 // i to vector move 1332 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1333 csBundle(0).srcType(1) := SrcType.imm 1334 csBundle(0).srcType(2) := SrcType.imm 1335 csBundle(0).lsrc(1) := 0.U 1336 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1337 csBundle(0).fuType := FuType.i2v.U 1338 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1339 csBundle(0).vecWen := true.B 1340 // LMUL 1341 for (i <- 0 until MAX_VLMUL) 1342 for (j <- 0 to i) { 1343 val old_vd = if (j == 0) { 1344 dest + i.U 1345 } else (VECTOR_TMP_REG_LMUL + j).U 1346 val vd = if (j == i) { 1347 dest + i.U 1348 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1349 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1350 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1351 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1352 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1353 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1354 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1355 } 1356 } 1357 1358 is(UopSplitType.VEC_SLIDEDOWN) { 1359 // i to vector move 1360 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1361 csBundle(0).srcType(1) := SrcType.imm 1362 csBundle(0).srcType(2) := SrcType.imm 1363 csBundle(0).lsrc(1) := 0.U 1364 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1365 csBundle(0).fuType := FuType.i2v.U 1366 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1367 csBundle(0).vecWen := true.B 1368 // LMUL 1369 for (i <- 0 until MAX_VLMUL) 1370 for (j <- (0 to i).reverse) { 1371 when(i.U < lmul) { 1372 val old_vd = if (j == 0) { 1373 dest + lmul - 1.U - i.U 1374 } else (VECTOR_TMP_REG_LMUL + j).U 1375 val vd = if (j == i) { 1376 dest + lmul - 1.U - i.U 1377 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1378 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1379 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1380 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1381 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1382 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1383 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1384 } 1385 } 1386 } 1387 1388 is(UopSplitType.VEC_M0X) { 1389 // LMUL 1390 for (i <- 0 until MAX_VLMUL) { 1391 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1392 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1393 csBundle(i).srcType(0) := srcType0 1394 csBundle(i).srcType(1) := SrcType.vp 1395 csBundle(i).rfWen := false.B 1396 csBundle(i).fpWen := false.B 1397 csBundle(i).vecWen := true.B 1398 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1399 csBundle(i).lsrc(1) := src2 1400 // csBundle(i).lsrc(2) := dest + i.U DontCare 1401 csBundle(i).ldest := ldest 1402 csBundle(i).uopIdx := i.U 1403 } 1404 csBundle(numOfUop - 1.U).rfWen := Mux(dest === 0.U, false.B, true.B) 1405 csBundle(numOfUop - 1.U).fpWen := false.B 1406 csBundle(numOfUop - 1.U).vecWen := false.B 1407 csBundle(numOfUop - 1.U).ldest := dest 1408 } 1409 1410 is(UopSplitType.VEC_MVV) { 1411 // LMUL 1412 for (i <- 0 until MAX_VLMUL) { 1413 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1414 csBundle(i * 2 + 0).srcType(0) := srcType0 1415 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1416 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1417 csBundle(i * 2 + 0).lsrc(1) := src2 1418 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1419 csBundle(i * 2 + 0).ldest := dest + i.U 1420 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1421 1422 csBundle(i * 2 + 1).srcType(0) := srcType0 1423 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1424 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1425 csBundle(i * 2 + 1).lsrc(1) := src2 1426 // csBundle(i).lsrc(2) := dest + i.U DontCare 1427 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1428 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1429 } 1430 } 1431 is(UopSplitType.VEC_VWW) { 1432 for (i <- 0 until MAX_VLMUL*2) { 1433 when(i.U < lmul){ 1434 csBundle(i).srcType(2) := SrcType.DC 1435 csBundle(i).lsrc(0) := src2 + i.U 1436 csBundle(i).lsrc(1) := src2 + i.U 1437 // csBundle(i).lsrc(2) := dest + (2 * i).U 1438 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1439 csBundle(i).uopIdx := i.U 1440 } otherwise { 1441 csBundle(i).srcType(2) := SrcType.DC 1442 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1443 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1444 // csBundle(i).lsrc(2) := dest + (2 * i).U 1445 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1446 csBundle(i).uopIdx := i.U 1447 } 1448 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1449 csBundle(numOfUop-1.U).lsrc(0) := src1 1450 csBundle(numOfUop-1.U).lsrc(2) := dest 1451 csBundle(numOfUop-1.U).ldest := dest 1452 } 1453 } 1454 is(UopSplitType.VEC_RGATHER) { 1455 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1456 for (i <- 0 until len) 1457 for (j <- 0 until len) { 1458 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1459 // csBundle(i * len + j).srcType(1) := SrcType.vp 1460 // csBundle(i * len + j).srcType(2) := SrcType.vp 1461 csBundle(i * len + j).lsrc(0) := src1 + i.U 1462 csBundle(i * len + j).lsrc(1) := src2 + j.U 1463 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1464 csBundle(i * len + j).lsrc(2) := vd_old 1465 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1466 csBundle(i * len + j).ldest := vd 1467 csBundle(i * len + j).uopIdx := (i * len + j).U 1468 } 1469 } 1470 switch(vlmulReg) { 1471 is("b001".U ){ 1472 genCsBundle_VEC_RGATHER(2) 1473 } 1474 is("b010".U ){ 1475 genCsBundle_VEC_RGATHER(4) 1476 } 1477 is("b011".U ){ 1478 genCsBundle_VEC_RGATHER(8) 1479 } 1480 } 1481 } 1482 is(UopSplitType.VEC_RGATHER_VX) { 1483 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1484 for (i <- 0 until len) 1485 for (j <- 0 until len) { 1486 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1487 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1488 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1489 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1490 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1491 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1492 csBundle(i * len + j + 1).lsrc(2) := vd_old 1493 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1494 csBundle(i * len + j + 1).ldest := vd 1495 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1496 } 1497 } 1498 // i to vector move 1499 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1500 csBundle(0).srcType(1) := SrcType.imm 1501 csBundle(0).srcType(2) := SrcType.imm 1502 csBundle(0).lsrc(1) := 0.U 1503 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1504 csBundle(0).fuType := FuType.i2v.U 1505 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1506 csBundle(0).rfWen := false.B 1507 csBundle(0).fpWen := false.B 1508 csBundle(0).vecWen := true.B 1509 genCsBundle_RGATHER_VX(1) 1510 switch(vlmulReg) { 1511 is("b001".U ){ 1512 genCsBundle_RGATHER_VX(2) 1513 } 1514 is("b010".U ){ 1515 genCsBundle_RGATHER_VX(4) 1516 } 1517 is("b011".U ){ 1518 genCsBundle_RGATHER_VX(8) 1519 } 1520 } 1521 } 1522 is(UopSplitType.VEC_RGATHEREI16) { 1523 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1524 for (i <- 0 until len) 1525 for (j <- 0 until len) { 1526 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1527 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1528 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1529 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1530 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1531 csBundle((i * len + j)*2+0).ldest := vd0 1532 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1533 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1534 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1535 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1536 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1537 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1538 csBundle((i * len + j)*2+1).ldest := vd1 1539 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1540 } 1541 } 1542 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1543 for (i <- 0 until len) 1544 for (j <- 0 until len) { 1545 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1546 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1547 csBundle(i * len + j).lsrc(0) := src1 + i.U 1548 csBundle(i * len + j).lsrc(1) := src2 + j.U 1549 csBundle(i * len + j).lsrc(2) := vd_old 1550 csBundle(i * len + j).ldest := vd 1551 csBundle(i * len + j).uopIdx := (i * len + j).U 1552 } 1553 } 1554 def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={ 1555 for (i <- 0 until len) 1556 for (j <- 0 until len) { 1557 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1558 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1559 csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U 1560 csBundle(i * len + j).lsrc(1) := src2 + j.U 1561 csBundle(i * len + j).lsrc(2) := vd_old 1562 csBundle(i * len + j).ldest := vd 1563 csBundle(i * len + j).uopIdx := (i * len + j).U 1564 } 1565 } 1566 def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={ 1567 for (i <- 0 until len) 1568 for (j <- 0 until len) { 1569 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1570 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1571 csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U 1572 csBundle(i * len + j).lsrc(1) := src2 + j.U 1573 csBundle(i * len + j).lsrc(2) := vd_old 1574 csBundle(i * len + j).ldest := vd 1575 csBundle(i * len + j).uopIdx := (i * len + j).U 1576 } 1577 } 1578 when(!vsewReg.orR){ 1579 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1580 }.elsewhen(vsewReg === VSew.e32){ 1581 genCsBundle_VEC_RGATHEREI16_SEW32(1) 1582 }.elsewhen(vsewReg === VSew.e64){ 1583 genCsBundle_VEC_RGATHEREI16_SEW64(1) 1584 }.otherwise{ 1585 genCsBundle_VEC_RGATHEREI16(1) 1586 } 1587 switch(vlmulReg) { 1588 is("b001".U) { 1589 when(!vsewReg.orR) { 1590 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1591 }.elsewhen(vsewReg === VSew.e32){ 1592 genCsBundle_VEC_RGATHEREI16_SEW32(2) 1593 }.elsewhen(vsewReg === VSew.e64){ 1594 genCsBundle_VEC_RGATHEREI16_SEW64(2) 1595 }.otherwise{ 1596 genCsBundle_VEC_RGATHEREI16(2) 1597 } 1598 } 1599 is("b010".U) { 1600 when(!vsewReg.orR) { 1601 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1602 }.elsewhen(vsewReg === VSew.e32){ 1603 genCsBundle_VEC_RGATHEREI16_SEW32(4) 1604 }.elsewhen(vsewReg === VSew.e64){ 1605 genCsBundle_VEC_RGATHEREI16_SEW64(4) 1606 }.otherwise{ 1607 genCsBundle_VEC_RGATHEREI16(4) 1608 } 1609 } 1610 is("b011".U) { 1611 when(vsewReg === VSew.e32){ 1612 genCsBundle_VEC_RGATHEREI16_SEW32(8) 1613 }.elsewhen(vsewReg === VSew.e64){ 1614 genCsBundle_VEC_RGATHEREI16_SEW64(8) 1615 }.otherwise{ 1616 genCsBundle_VEC_RGATHEREI16(8) 1617 } 1618 } 1619 } 1620 } 1621 is(UopSplitType.VEC_COMPRESS) { 1622 def genCsBundle_VEC_COMPRESS(len:Int): Unit = { 1623 for (i <- 0 until len) { 1624 val jlen = if (i == len-1) i+1 else i+2 1625 for (j <- 0 until jlen) { 1626 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1627 val vd = if(i==len-1) (dest + j.U) else { 1628 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1629 } 1630 csBundle(i*(i+3)/2 + j).vecWen := true.B 1631 csBundle(i*(i+3)/2 + j).v0Wen := false.B 1632 val src13Type = if (j == i+1) DontCare else SrcType.vp 1633 csBundle(i*(i+3)/2 + j).srcType(0) := src13Type 1634 csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp 1635 csBundle(i*(i+3)/2 + j).srcType(2) := src13Type 1636 if (i == 0) { 1637 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1638 } else { 1639 csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1640 } 1641 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1642 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1643 csBundle(i*(i+3)/2 + j).ldest := vd 1644 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1645 } 1646 } 1647 } 1648 switch(vlmulReg) { 1649 is("b001".U ){ 1650 genCsBundle_VEC_COMPRESS(2) 1651 } 1652 is("b010".U ){ 1653 genCsBundle_VEC_COMPRESS(4) 1654 } 1655 is("b011".U ){ 1656 genCsBundle_VEC_COMPRESS(8) 1657 } 1658 } 1659 } 1660 is(UopSplitType.VEC_MVNR) { 1661 for (i <- 0 until MAX_VLMUL) { 1662 csBundle(i).lsrc(0) := src1 + i.U 1663 csBundle(i).lsrc(1) := src2 + i.U 1664 csBundle(i).lsrc(2) := dest + i.U 1665 csBundle(i).ldest := dest + i.U 1666 csBundle(i).uopIdx := i.U 1667 } 1668 } 1669 is(UopSplitType.VEC_US_LDST) { 1670 /* 1671 FMV.D.X 1672 */ 1673 csBundle(0).srcType(0) := SrcType.reg 1674 csBundle(0).srcType(1) := SrcType.imm 1675 csBundle(0).lsrc(1) := 0.U 1676 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1677 csBundle(0).fuType := FuType.i2v.U 1678 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1679 csBundle(0).rfWen := false.B 1680 csBundle(0).fpWen := false.B 1681 csBundle(0).vecWen := true.B 1682 csBundle(0).vlsInstr := true.B 1683 //LMUL 1684 for (i <- 0 until MAX_VLMUL) { 1685 csBundle(i + 1).srcType(0) := SrcType.vp 1686 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1687 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1688 csBundle(i + 1).ldest := dest + i.U 1689 csBundle(i + 1).uopIdx := i.U 1690 csBundle(i + 1).vlsInstr := true.B 1691 } 1692 csBundle.head.waitForward := isUsSegment 1693 csBundle(numOfUop - 1.U).blockBackward := isUsSegment 1694 } 1695 is(UopSplitType.VEC_US_FF_LD) { 1696 csBundle(0).srcType(0) := SrcType.reg 1697 csBundle(0).srcType(1) := SrcType.imm 1698 csBundle(0).lsrc(1) := 0.U 1699 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1700 csBundle(0).fuType := FuType.i2v.U 1701 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1702 csBundle(0).rfWen := false.B 1703 csBundle(0).fpWen := false.B 1704 csBundle(0).vecWen := true.B 1705 csBundle(0).vlsInstr := true.B 1706 //LMUL 1707 for (i <- 0 until MAX_VLMUL) { 1708 csBundle(i + 1).srcType(0) := SrcType.vp 1709 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1710 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1711 csBundle(i + 1).ldest := dest + i.U 1712 csBundle(i + 1).uopIdx := i.U 1713 csBundle(i + 1).vlsInstr := true.B 1714 } 1715 csBundle.head.waitForward := isUsSegment 1716 csBundle(numOfUop - 1.U).blockBackward := isUsSegment 1717 // last uop read vl and write vl 1718 csBundle(numOfUop - 1.U).srcType(0) := SrcType.no 1719 csBundle(numOfUop - 1.U).srcType(1) := SrcType.no 1720 csBundle(numOfUop - 1.U).srcType(2) := SrcType.no 1721 csBundle(numOfUop - 1.U).srcType(3) := SrcType.no 1722 csBundle(numOfUop - 1.U).srcType(4) := SrcType.vp 1723 csBundle(numOfUop - 1.U).lsrc(4) := Vl_IDX.U 1724 // vtype 1725 csBundle(numOfUop - 1.U).vecWen := false.B 1726 csBundle(numOfUop - 1.U).vlWen := true.B 1727 csBundle(numOfUop - 1.U).ldest := Vl_IDX.U 1728 } 1729 is(UopSplitType.VEC_S_LDST) { 1730 /* 1731 FMV.D.X 1732 */ 1733 csBundle(0).srcType(0) := SrcType.reg 1734 csBundle(0).srcType(1) := SrcType.imm 1735 csBundle(0).lsrc(1) := 0.U 1736 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1737 csBundle(0).fuType := FuType.i2v.U 1738 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1739 csBundle(0).rfWen := false.B 1740 csBundle(0).fpWen := false.B 1741 csBundle(0).vecWen := true.B 1742 csBundle(0).vlsInstr := true.B 1743 1744 csBundle(1).srcType(0) := SrcType.reg 1745 csBundle(1).srcType(1) := SrcType.imm 1746 csBundle(1).lsrc(0) := latchedInst.lsrc(1) 1747 csBundle(1).lsrc(1) := 0.U 1748 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1749 csBundle(1).fuType := FuType.i2v.U 1750 csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1751 csBundle(1).rfWen := false.B 1752 csBundle(1).fpWen := false.B 1753 csBundle(1).vecWen := true.B 1754 csBundle(1).vlsInstr := true.B 1755 1756 //LMUL 1757 for (i <- 0 until MAX_VLMUL) { 1758 csBundle(i + 2).srcType(0) := SrcType.vp 1759 csBundle(i + 2).srcType(1) := SrcType.vp 1760 csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1761 csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1762 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1763 csBundle(i + 2).ldest := dest + i.U 1764 csBundle(i + 2).uopIdx := i.U 1765 csBundle(i + 2).vlsInstr := true.B 1766 } 1767 csBundle.head.waitForward := isSdSegment 1768 csBundle(numOfUop - 1.U).blockBackward := isSdSegment 1769 } 1770 is(UopSplitType.VEC_I_LDST) { 1771 def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={ 1772 for (i <- 0 until MAX_VLMUL) { 1773 val vecWen = if (i < lmul * nf) true.B else false.B 1774 val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no 1775 csBundle(i + 1).srcType(0) := SrcType.vp 1776 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1777 csBundle(i + 1).srcType(1) := SrcType.no 1778 csBundle(i + 1).lsrc(1) := src2 + i.U 1779 csBundle(i + 1).srcType(2) := src2Type 1780 csBundle(i + 1).lsrc(2) := dest + i.U 1781 csBundle(i + 1).ldest := dest + i.U 1782 csBundle(i + 1).rfWen := false.B 1783 csBundle(i + 1).fpWen := false.B 1784 csBundle(i + 1).vecWen := vecWen 1785 csBundle(i + 1).uopIdx := i.U 1786 csBundle(i + 1).vlsInstr := true.B 1787 } 1788 } 1789 def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={ 1790 for (i <- 0 until MAX_VLMUL) { 1791 val src1Type = if (i < emul) SrcType.vp else SrcType.no 1792 csBundle(i + 1).srcType(1) := src1Type 1793 csBundle(i + 1).lsrc(1) := src2 + i.U 1794 } 1795 } 1796 1797 val vlmul = vlmulReg 1798 val vsew = Cat(0.U(1.W), vsewReg) 1799 val veew = Cat(0.U(1.W), width) 1800 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1801 val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Seq( 1802 "b001".U -> 1.U, 1803 "b010".U -> 2.U, 1804 "b011".U -> 3.U 1805 )) 1806 val simple_emul = MuxLookup(vemul, 0.U(2.W))(Seq( 1807 "b001".U -> 1.U, 1808 "b010".U -> 2.U, 1809 "b011".U -> 3.U 1810 )) 1811 csBundle(0).srcType(0) := SrcType.reg 1812 csBundle(0).srcType(1) := SrcType.imm 1813 csBundle(0).lsrc(1) := 0.U 1814 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1815 csBundle(0).fuType := FuType.i2v.U 1816 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1817 csBundle(0).rfWen := false.B 1818 csBundle(0).fpWen := false.B 1819 csBundle(0).vecWen := true.B 1820 csBundle(0).vlsInstr := true.B 1821 1822 //LMUL 1823 when(nf === 0.U) { 1824 for (i <- 0 until MAX_VLMUL) { 1825 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul) 1826 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1827 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1828 csBundle(i + 1).srcType(0) := SrcType.vp 1829 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1830 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1831 csBundle(i + 1).srcType(2) := SrcType.vp 1832 // lsrc2 is old vd 1833 csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1834 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1835 csBundle(i + 1).uopIdx := i.U 1836 csBundle(i + 1).vlsInstr := true.B 1837 } 1838 }.otherwise{ 1839 // nf > 1, is segment indexed load/store 1840 // gen src0, vd 1841 switch(simple_lmul) { 1842 is(0.U) { 1843 switch(nf) { 1844 is(1.U) { 1845 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2) 1846 } 1847 is(2.U) { 1848 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3) 1849 } 1850 is(3.U) { 1851 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4) 1852 } 1853 is(4.U) { 1854 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5) 1855 } 1856 is(5.U) { 1857 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6) 1858 } 1859 is(6.U) { 1860 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7) 1861 } 1862 is(7.U) { 1863 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8) 1864 } 1865 } 1866 } 1867 is(1.U) { 1868 switch(nf) { 1869 is(1.U) { 1870 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2) 1871 } 1872 is(2.U) { 1873 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3) 1874 } 1875 is(3.U) { 1876 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4) 1877 } 1878 } 1879 } 1880 is(2.U) { 1881 switch(nf) { 1882 is(1.U) { 1883 genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2) 1884 } 1885 } 1886 } 1887 } 1888 1889 // gen src1 1890 switch(simple_emul) { 1891 is(0.U) { 1892 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1) 1893 } 1894 is(1.U) { 1895 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2) 1896 } 1897 is(2.U) { 1898 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4) 1899 } 1900 is(3.U) { 1901 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8) 1902 } 1903 } 1904 1905 // when is vstore instructions, not set vecwen 1906 when(isVstore) { 1907 for (i <- 0 until MAX_VLMUL) { 1908 csBundle(i + 1).vecWen := false.B 1909 } 1910 } 1911 } 1912 csBundle.head.waitForward := isIxSegment 1913 csBundle(numOfUop - 1.U).blockBackward := isIxSegment 1914 } 1915 } 1916 1917 //readyFromRename Counter 1918 val readyCounter = Mux(outReadys.head, RenameWidth.U, 0.U) 1919 1920 // The left uops of the complex inst in ComplexDecoder can be send out this cycle 1921 val thisAllOut = uopRes <= readyCounter 1922 1923 switch(state) { 1924 is(s_idle) { 1925 when (inValid) { 1926 stateNext := s_active 1927 uopResNext := inUopInfo.numOfUop 1928 } 1929 } 1930 is(s_active) { 1931 when (thisAllOut) { 1932 when (inValid) { 1933 stateNext := s_active 1934 uopResNext := inUopInfo.numOfUop 1935 }.otherwise { 1936 stateNext := s_idle 1937 uopResNext := 0.U 1938 } 1939 }.otherwise { 1940 stateNext := s_active 1941 uopResNext := uopRes - readyCounter 1942 } 1943 } 1944 } 1945 1946 state := Mux(io.redirect, s_idle, stateNext) 1947 uopRes := Mux(io.redirect, 0.U, uopResNext) 1948 1949 val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes) 1950 1951 fixedDecodedInst := csBundle 1952 1953 // when vstart is not zero, the last uop will modify vstart to zero 1954 // therefore, blockback and flush pipe 1955 fixedDecodedInst(numOfUop - 1.U).flushPipe := (vstartReg =/= 0.U) || latchedInst.flushPipe 1956 1957 for(i <- 0 until RenameWidth) { 1958 outValids(i) := complexNum > i.U 1959 outDecodedInsts(i) := fixedDecodedInst(i.U + numOfUop - uopRes) 1960 } 1961 1962 outComplexNum := Mux(state === s_active, complexNum, 0.U) 1963 inReady := state === s_idle || state === s_active && thisAllOut 1964 1965 1966 XSError(inValid && inUopInfo.numOfUop === 0.U, 1967 p"uop number ${inUopInfo.numOfUop} is illegal, cannot be zero") 1968// val validSimple = Wire(Vec(DecodeWidth, Bool())) 1969// validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1970// val notInf = Wire(Vec(DecodeWidth, Bool())) 1971// notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1972// notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1973// val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1974// notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1975// 1976// complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1977// Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1978// 0.U) 1979// validToRename.zipWithIndex.foreach{ 1980// case(dst, i) => 1981// val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1982// dst := MuxCase(false.B, Seq( 1983// (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1984// (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1985// ).toSeq) 1986// } 1987// 1988// readyToIBuf.zipWithIndex.foreach { 1989// case (dst, i) => 1990// val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1991// dst := MuxCase(true.B, Seq( 1992// (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1993// (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1994// ).toSeq) 1995// } 1996// 1997// io.deq.decodedInsts := decodedInsts 1998// io.deq.complexNum := complexNum 1999// io.deq.validToRename := validToRename 2000// io.deq.readyToIBuf := readyToIBuf 2001} 2002