1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul} 34import yunsuan.VpermType 35import scala.collection.Seq 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(4.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={ 43 // only consider non segment indexed load/store 44 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 45 var offset = 1 << (emul - lmul) 46 for (i <- 0 until (1 << emul)) { 47 if (uopIdx == i) { 48 return (i, i / offset) 49 } 50 } 51 } else { // lmul > emul, uop num is depend on lmul * nf 52 var offset = 1 << (lmul - emul) 53 for (i <- 0 until (1 << lmul)) { 54 if (uopIdx == i) { 55 return (i / offset, i) 56 } 57 } 58 } 59 return (0, 0) 60 } 61 // strided load/store 62 var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq() 63 for (emul <- 0 until 4) { 64 for (lmul <- 0 until 4) { 65 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx) 66 var offsetVs2 = offset._1 67 var offsetVd = offset._2 68 combVemulNf :+= (emul, lmul, offsetVs2, offsetVd) 69 } 70 } 71 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 72 case (emul, lmul, offsetVs2, offsetVd) => 73 (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W))) 74 }, BitPat.N(6))) 75 outOffsetVs2 := out(5, 3) 76 outOffsetVd := out(2, 0) 77} 78 79trait VectorConstants { 80 val MAX_VLMUL = 8 81 val VECTOR_TMP_REG_LMUL = 33 // 33~47 -> 15 82 val VECTOR_COMPRESS = 1 // in v0 regfile 83 val MAX_INDEXED_LS_UOPNUM = 64 84} 85 86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 87 val redirect = Input(Bool()) 88 val csrCtrl = Input(new CustomCSRCtrlIO) 89 val vtypeBypass = Input(new VType) 90 // When the first inst in decode vector is complex inst, pass it in 91 val in = Flipped(DecoupledIO(new Bundle { 92 val simpleDecodedInst = new DecodedInst 93 val uopInfo = new UopInfo 94 })) 95 val out = new Bundle { 96 val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst)) 97 } 98 val complexNum = Output(UInt(3.W)) 99} 100 101/** 102 * @author zly 103 */ 104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 105 val io = IO(new DecodeUnitCompIO) 106 107 // alias 108 private val inReady = io.in.ready 109 private val inValid = io.in.valid 110 private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst) 111 private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields) 112 private val inUopInfo = io.in.bits.uopInfo 113 private val outValids = io.out.complexDecodedInsts.map(_.valid) 114 private val outReadys = io.out.complexDecodedInsts.map(_.ready) 115 private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits) 116 private val outComplexNum = io.complexNum 117 118 val maxUopSize = MaxUopSize 119 when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) { 120 when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) { 121 inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType) 122 }.elsewhen(inInstFields.RS1 === 0.U) { 123 inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType) 124 } 125 } 126 127 val latchedInst = RegEnable(inDecodedInst, inValid && inReady) 128 val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady) 129 //input bits 130 private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields) 131 132 val src1 = Cat(0.U(1.W), instFields.RS1) 133 val src2 = Cat(0.U(1.W), instFields.RS2) 134 val dest = Cat(0.U(1.W), instFields.RD) 135 136 val nf = instFields.NF 137 val width = instFields.WIDTH(1, 0) 138 139 //output of DecodeUnit 140 val numOfUop = Wire(UInt(log2Up(maxUopSize).W)) 141 val numOfWB = Wire(UInt(log2Up(maxUopSize).W)) 142 val lmul = Wire(UInt(4.W)) 143 val isVsetSimple = Wire(Bool()) 144 145 val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i))) 146 indexedLSRegOffset.map(_.src := 0.U) 147 148 //pre decode 149 lmul := latchedUopInfo.lmul 150 isVsetSimple := latchedInst.isVset 151 val vlmulReg = latchedInst.vpu.vlmul 152 val vsewReg = latchedInst.vpu.vsew 153 val vstartReg = latchedInst.vpu.vstart 154 155 //Type of uop Div 156 val typeOfSplit = latchedInst.uopSplitType 157 val src1Type = latchedInst.srcType(0) 158 val src1IsImm = src1Type === SrcType.imm 159 val src1IsFp = src1Type === SrcType.fp 160 161 val isVstore = FuType.isVStore(latchedInst.fuType) 162 163 numOfUop := latchedUopInfo.numOfUop 164 numOfWB := latchedUopInfo.numOfWB 165 166 //uops dispatch 167 val s_idle :: s_active :: Nil = Enum(2) 168 val state = RegInit(s_idle) 169 val stateNext = WireDefault(state) 170 val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W)) 171 val uopRes = RegInit(0.U(log2Up(maxUopSize).W)) 172 val uopResNext = WireInit(uopRes) 173 val e64 = 3.U(2.W) 174 val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U) 175 val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U 176 val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U 177 178 //uop div up to maxUopSize 179 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 180 csBundle.foreach { case dst => 181 dst := latchedInst 182 dst.numUops := latchedUopInfo.numOfUop 183 dst.numWB := latchedUopInfo.numOfWB 184 dst.firstUop := false.B 185 dst.lastUop := false.B 186 dst.vlsInstr := false.B 187 } 188 189 csBundle(0).firstUop := true.B 190 csBundle(numOfUop - 1.U).lastUop := true.B 191 192 // when vstart is not zero, the last uop will modify vstart to zero 193 // therefore, blockback and flush pipe 194 csBundle(numOfUop - 1.U).blockBackward := vstartReg =/= 0.U 195 csBundle(numOfUop - 1.U).flushPipe := vstartReg =/= 0.U 196 197 switch(typeOfSplit) { 198 is(UopSplitType.VSET) { 199 // In simple decoder, rfWen and vecWen are not set 200 when(isVsetSimple) { 201 // Default 202 // uop0 set rd, never flushPipe 203 csBundle(0).fuType := FuType.vsetiwi.U 204 csBundle(0).flushPipe := false.B 205 csBundle(0).blockBackward := false.B 206 csBundle(0).rfWen := true.B 207 // uop1 set vl, vsetvl will flushPipe 208 csBundle(1).ldest := Vl_IDX.U 209 csBundle(1).vecWen := false.B 210 csBundle(1).vlWen := true.B 211 when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 212 // write nothing, uop0 is a nop instruction 213 csBundle(0).rfWen := false.B 214 csBundle(0).fpWen := false.B 215 csBundle(0).vecWen := false.B 216 csBundle(0).vlWen := false.B 217 csBundle(1).fuType := FuType.vsetfwf.U 218 csBundle(1).srcType(0) := SrcType.no 219 csBundle(1).srcType(2) := SrcType.no 220 csBundle(1).srcType(3) := SrcType.no 221 csBundle(1).srcType(4) := SrcType.vp 222 csBundle(1).lsrc(4) := Vl_IDX.U 223 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 224 // uop0: mv vtype gpr to vector region 225 csBundle(0).srcType(0) := SrcType.xp 226 csBundle(0).srcType(1) := SrcType.no 227 csBundle(0).lsrc(0) := src2 228 csBundle(0).lsrc(1) := 0.U 229 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 230 csBundle(0).fuType := FuType.i2v.U 231 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 232 csBundle(0).rfWen := false.B 233 csBundle(0).fpWen := false.B 234 csBundle(0).vecWen := true.B 235 csBundle(0).vlWen := false.B 236 // uop1: uvsetvcfg_vv 237 csBundle(1).fuType := FuType.vsetfwf.U 238 // vl 239 csBundle(1).srcType(0) := SrcType.no 240 csBundle(1).srcType(2) := SrcType.no 241 csBundle(1).srcType(3) := SrcType.no 242 csBundle(1).srcType(4) := SrcType.vp 243 csBundle(1).lsrc(4) := Vl_IDX.U 244 // vtype 245 csBundle(1).srcType(1) := SrcType.vp 246 csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U 247 csBundle(1).vecWen := false.B 248 csBundle(1).vlWen := true.B 249 csBundle(1).ldest := Vl_IDX.U 250 }.elsewhen(dest === 0.U) { 251 // write nothing, uop0 is a nop instruction 252 csBundle(0).rfWen := false.B 253 csBundle(0).fpWen := false.B 254 csBundle(0).vecWen := false.B 255 csBundle(0).vlWen := false.B 256 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) { 257 // because vsetvl may modified src2 when src2 == rd, 258 // we need to modify vd in second uop to avoid dependency 259 // uop0 set vl 260 csBundle(0).fuType := FuType.vsetiwf.U 261 csBundle(0).ldest := Vl_IDX.U 262 csBundle(0).rfWen := false.B 263 csBundle(0).vlWen := true.B 264 // uop1 set rd 265 csBundle(1).fuType := FuType.vsetiwi.U 266 csBundle(1).ldest := dest 267 csBundle(1).rfWen := true.B 268 csBundle(1).vlWen := false.B 269 } 270 // use bypass vtype from vtypeGen 271 csBundle(0).vpu.connectVType(io.vtypeBypass) 272 csBundle(1).vpu.connectVType(io.vtypeBypass) 273 } 274 } 275 is(UopSplitType.VEC_VVV) { 276 for (i <- 0 until MAX_VLMUL) { 277 csBundle(i).lsrc(0) := src1 + i.U 278 csBundle(i).lsrc(1) := src2 + i.U 279 csBundle(i).lsrc(2) := dest + i.U 280 csBundle(i).ldest := dest + i.U 281 csBundle(i).uopIdx := i.U 282 } 283 } 284 is(UopSplitType.VEC_VFV) { 285 /* 286 f to vector move 287 */ 288 csBundle(0).srcType(0) := SrcType.fp 289 csBundle(0).srcType(1) := SrcType.imm 290 csBundle(0).srcType(2) := SrcType.imm 291 csBundle(0).lsrc(1) := 0.U 292 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 293 csBundle(0).fuType := FuType.f2v.U 294 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 295 csBundle(0).vecWen := true.B 296 csBundle(0).vpu.isReverse := false.B 297 /* 298 LMUL 299 */ 300 for (i <- 0 until MAX_VLMUL) { 301 csBundle(i + 1).srcType(0) := SrcType.vp 302 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 303 csBundle(i + 1).lsrc(1) := src2 + i.U 304 csBundle(i + 1).lsrc(2) := dest + i.U 305 csBundle(i + 1).ldest := dest + i.U 306 csBundle(i + 1).uopIdx := i.U 307 } 308 } 309 is(UopSplitType.VEC_EXT2) { 310 for (i <- 0 until MAX_VLMUL / 2) { 311 csBundle(2 * i).lsrc(1) := src2 + i.U 312 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 313 csBundle(2 * i).ldest := dest + (2 * i).U 314 csBundle(2 * i).uopIdx := (2 * i).U 315 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 316 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 317 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 318 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 319 } 320 } 321 is(UopSplitType.VEC_EXT4) { 322 for (i <- 0 until MAX_VLMUL / 4) { 323 csBundle(4 * i).lsrc(1) := src2 + i.U 324 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 325 csBundle(4 * i).ldest := dest + (4 * i).U 326 csBundle(4 * i).uopIdx := (4 * i).U 327 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 328 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 329 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 330 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 331 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 332 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 333 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 334 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 335 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 336 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 337 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 338 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 339 } 340 } 341 is(UopSplitType.VEC_EXT8) { 342 for (i <- 0 until MAX_VLMUL) { 343 csBundle(i).lsrc(1) := src2 344 csBundle(i).lsrc(2) := dest + i.U 345 csBundle(i).ldest := dest + i.U 346 csBundle(i).uopIdx := i.U 347 } 348 } 349 is(UopSplitType.VEC_0XV) { 350 /* 351 i/f to vector move 352 */ 353 csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg) 354 csBundle(0).srcType(1) := SrcType.imm 355 csBundle(0).srcType(2) := SrcType.imm 356 csBundle(0).lsrc(1) := 0.U 357 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 358 csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U) 359 csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 360 csBundle(0).rfWen := false.B 361 csBundle(0).fpWen := false.B 362 csBundle(0).vecWen := true.B 363 /* 364 vmv.s.x 365 */ 366 csBundle(1).srcType(0) := SrcType.vp 367 csBundle(1).srcType(1) := SrcType.imm 368 csBundle(1).srcType(2) := SrcType.vp 369 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 370 csBundle(1).lsrc(1) := 0.U 371 csBundle(1).lsrc(2) := dest 372 csBundle(1).ldest := dest 373 csBundle(1).rfWen := false.B 374 csBundle(1).fpWen := false.B 375 csBundle(1).vecWen := true.B 376 csBundle(1).uopIdx := 0.U 377 } 378 is(UopSplitType.VEC_VXV) { 379 /* 380 i to vector move 381 */ 382 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 383 csBundle(0).srcType(1) := SrcType.imm 384 csBundle(0).srcType(2) := SrcType.imm 385 csBundle(0).lsrc(1) := 0.U 386 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 387 csBundle(0).fuType := FuType.i2v.U 388 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 389 csBundle(0).vecWen := true.B 390 csBundle(0).vpu.isReverse := false.B 391 /* 392 LMUL 393 */ 394 for (i <- 0 until MAX_VLMUL) { 395 csBundle(i + 1).srcType(0) := SrcType.vp 396 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 397 csBundle(i + 1).lsrc(1) := src2 + i.U 398 csBundle(i + 1).lsrc(2) := dest + i.U 399 csBundle(i + 1).ldest := dest + i.U 400 csBundle(i + 1).uopIdx := i.U 401 } 402 } 403 is(UopSplitType.VEC_VVW) { 404 for (i <- 0 until MAX_VLMUL / 2) { 405 csBundle(2 * i).lsrc(0) := src1 + i.U 406 csBundle(2 * i).lsrc(1) := src2 + i.U 407 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 408 csBundle(2 * i).ldest := dest + (2 * i).U 409 csBundle(2 * i).uopIdx := (2 * i).U 410 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 411 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 412 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 413 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 414 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 415 } 416 } 417 is(UopSplitType.VEC_VFW) { 418 /* 419 f to vector move 420 */ 421 csBundle(0).srcType(0) := SrcType.fp 422 csBundle(0).srcType(1) := SrcType.imm 423 csBundle(0).srcType(2) := SrcType.imm 424 csBundle(0).lsrc(1) := 0.U 425 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 426 csBundle(0).fuType := FuType.f2v.U 427 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 428 csBundle(0).rfWen := false.B 429 csBundle(0).fpWen := false.B 430 csBundle(0).vecWen := true.B 431 432 for (i <- 0 until MAX_VLMUL / 2) { 433 csBundle(2 * i + 1).srcType(0) := SrcType.vp 434 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 435 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 436 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 437 csBundle(2 * i + 1).ldest := dest + (2 * i).U 438 csBundle(2 * i + 1).uopIdx := (2 * i).U 439 csBundle(2 * i + 2).srcType(0) := SrcType.vp 440 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 441 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 442 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 443 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 444 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 445 } 446 } 447 is(UopSplitType.VEC_WVW) { 448 for (i <- 0 until MAX_VLMUL / 2) { 449 csBundle(2 * i).lsrc(0) := src1 + i.U 450 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 451 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 452 csBundle(2 * i).ldest := dest + (2 * i).U 453 csBundle(2 * i).uopIdx := (2 * i).U 454 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 455 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 456 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 457 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 458 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 459 } 460 } 461 is(UopSplitType.VEC_VXW) { 462 /* 463 i to vector move 464 */ 465 csBundle(0).srcType(0) := SrcType.reg 466 csBundle(0).srcType(1) := SrcType.imm 467 csBundle(0).srcType(2) := SrcType.imm 468 csBundle(0).lsrc(1) := 0.U 469 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 470 csBundle(0).fuType := FuType.i2v.U 471 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 472 csBundle(0).vecWen := true.B 473 474 for (i <- 0 until MAX_VLMUL / 2) { 475 csBundle(2 * i + 1).srcType(0) := SrcType.vp 476 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 477 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 478 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 479 csBundle(2 * i + 1).ldest := dest + (2 * i).U 480 csBundle(2 * i + 1).uopIdx := (2 * i).U 481 csBundle(2 * i + 2).srcType(0) := SrcType.vp 482 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 483 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 484 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 485 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 486 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 487 } 488 } 489 is(UopSplitType.VEC_WXW) { 490 /* 491 i to vector move 492 */ 493 csBundle(0).srcType(0) := SrcType.reg 494 csBundle(0).srcType(1) := SrcType.imm 495 csBundle(0).srcType(2) := SrcType.imm 496 csBundle(0).lsrc(1) := 0.U 497 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 498 csBundle(0).fuType := FuType.i2v.U 499 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 500 csBundle(0).vecWen := true.B 501 502 for (i <- 0 until MAX_VLMUL / 2) { 503 csBundle(2 * i + 1).srcType(0) := SrcType.vp 504 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 505 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 506 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 507 csBundle(2 * i + 1).ldest := dest + (2 * i).U 508 csBundle(2 * i + 1).uopIdx := (2 * i).U 509 csBundle(2 * i + 2).srcType(0) := SrcType.vp 510 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 511 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 512 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 513 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 514 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 515 } 516 } 517 is(UopSplitType.VEC_WVV) { 518 for (i <- 0 until MAX_VLMUL / 2) { 519 520 csBundle(2 * i).lsrc(0) := src1 + i.U 521 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 522 csBundle(2 * i).lsrc(2) := dest + i.U 523 csBundle(2 * i).ldest := dest + i.U 524 csBundle(2 * i).uopIdx := (2 * i).U 525 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 526 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 527 csBundle(2 * i + 1).lsrc(2) := dest + i.U 528 csBundle(2 * i + 1).ldest := dest + i.U 529 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 530 } 531 } 532 is(UopSplitType.VEC_WFW) { 533 /* 534 f to vector move 535 */ 536 csBundle(0).srcType(0) := SrcType.fp 537 csBundle(0).srcType(1) := SrcType.imm 538 csBundle(0).srcType(2) := SrcType.imm 539 csBundle(0).lsrc(1) := 0.U 540 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 541 csBundle(0).fuType := FuType.f2v.U 542 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 543 csBundle(0).rfWen := false.B 544 csBundle(0).fpWen := false.B 545 csBundle(0).vecWen := true.B 546 547 for (i <- 0 until MAX_VLMUL / 2) { 548 csBundle(2 * i + 1).srcType(0) := SrcType.vp 549 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 550 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 551 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 552 csBundle(2 * i + 1).ldest := dest + (2 * i).U 553 csBundle(2 * i + 1).uopIdx := (2 * i).U 554 csBundle(2 * i + 2).srcType(0) := SrcType.vp 555 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 556 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 557 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 558 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 559 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 560 } 561 } 562 is(UopSplitType.VEC_WXV) { 563 /* 564 i to vector move 565 */ 566 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 567 csBundle(0).srcType(1) := SrcType.imm 568 csBundle(0).srcType(2) := SrcType.imm 569 csBundle(0).lsrc(1) := 0.U 570 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 571 csBundle(0).fuType := FuType.i2v.U 572 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 573 csBundle(0).vecWen := true.B 574 575 for (i <- 0 until MAX_VLMUL / 2) { 576 csBundle(2 * i + 1).srcType(0) := SrcType.vp 577 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 578 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 579 csBundle(2 * i + 1).lsrc(2) := dest + i.U 580 csBundle(2 * i + 1).ldest := dest + i.U 581 csBundle(2 * i + 1).uopIdx := (2 * i).U 582 csBundle(2 * i + 2).srcType(0) := SrcType.vp 583 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 584 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 585 csBundle(2 * i + 2).lsrc(2) := dest + i.U 586 csBundle(2 * i + 2).ldest := dest + i.U 587 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 588 } 589 } 590 is(UopSplitType.VEC_VVM) { 591 csBundle(0).lsrc(2) := dest 592 csBundle(0).ldest := dest 593 csBundle(0).uopIdx := 0.U 594 for (i <- 1 until MAX_VLMUL) { 595 csBundle(i).lsrc(0) := src1 + i.U 596 csBundle(i).lsrc(1) := src2 + i.U 597 csBundle(i).lsrc(2) := dest 598 csBundle(i).ldest := dest 599 csBundle(i).uopIdx := i.U 600 } 601 } 602 is(UopSplitType.VEC_VFM) { 603 /* 604 f to vector move 605 */ 606 csBundle(0).srcType(0) := SrcType.fp 607 csBundle(0).srcType(1) := SrcType.imm 608 csBundle(0).srcType(2) := SrcType.imm 609 csBundle(0).lsrc(1) := 0.U 610 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 611 csBundle(0).fuType := FuType.f2v.U 612 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 613 csBundle(0).rfWen := false.B 614 csBundle(0).fpWen := false.B 615 csBundle(0).vecWen := true.B 616 //LMUL 617 csBundle(1).srcType(0) := SrcType.vp 618 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 619 csBundle(1).lsrc(2) := dest 620 csBundle(1).ldest := dest 621 csBundle(1).uopIdx := 0.U 622 for (i <- 1 until MAX_VLMUL) { 623 csBundle(i + 1).srcType(0) := SrcType.vp 624 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 625 csBundle(i + 1).lsrc(1) := src2 + i.U 626 csBundle(i + 1).lsrc(2) := dest 627 csBundle(i + 1).ldest := dest 628 csBundle(i + 1).uopIdx := i.U 629 } 630 csBundle(numOfUop - 1.U).ldest := dest 631 } 632 is(UopSplitType.VEC_VXM) { 633 /* 634 i to vector move 635 */ 636 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 637 csBundle(0).srcType(1) := SrcType.imm 638 csBundle(0).srcType(2) := SrcType.imm 639 csBundle(0).lsrc(1) := 0.U 640 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 641 csBundle(0).fuType := FuType.i2v.U 642 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 643 csBundle(0).vecWen := true.B 644 //LMUL 645 csBundle(1).srcType(0) := SrcType.vp 646 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 647 csBundle(1).lsrc(2) := dest 648 csBundle(1).ldest := dest 649 csBundle(1).uopIdx := 0.U 650 for (i <- 1 until MAX_VLMUL) { 651 csBundle(i + 1).srcType(0) := SrcType.vp 652 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 653 csBundle(i + 1).lsrc(1) := src2 + i.U 654 csBundle(i + 1).lsrc(2) := dest 655 csBundle(i + 1).ldest := dest 656 csBundle(i + 1).uopIdx := i.U 657 } 658 csBundle(numOfUop - 1.U).ldest := dest 659 } 660 is(UopSplitType.VEC_SLIDE1UP) { 661 /* 662 i to vector move 663 */ 664 csBundle(0).srcType(0) := SrcType.reg 665 csBundle(0).srcType(1) := SrcType.imm 666 csBundle(0).srcType(2) := SrcType.imm 667 csBundle(0).lsrc(1) := 0.U 668 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 669 csBundle(0).fuType := FuType.i2v.U 670 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 671 csBundle(0).vecWen := true.B 672 //LMUL 673 csBundle(1).srcType(0) := SrcType.vp 674 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 675 csBundle(1).lsrc(2) := dest 676 csBundle(1).ldest := dest 677 csBundle(1).uopIdx := 0.U 678 for (i <- 1 until MAX_VLMUL) { 679 csBundle(i + 1).srcType(0) := SrcType.vp 680 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 681 csBundle(i + 1).lsrc(1) := src2 + i.U 682 csBundle(i + 1).lsrc(2) := dest + i.U 683 csBundle(i + 1).ldest := dest + i.U 684 csBundle(i + 1).uopIdx := i.U 685 } 686 } 687 is(UopSplitType.VEC_FSLIDE1UP) { 688 /* 689 f to vector move 690 */ 691 csBundle(0).srcType(0) := SrcType.fp 692 csBundle(0).srcType(1) := SrcType.imm 693 csBundle(0).srcType(2) := SrcType.imm 694 csBundle(0).lsrc(1) := 0.U 695 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 696 csBundle(0).fuType := FuType.f2v.U 697 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 698 csBundle(0).rfWen := false.B 699 csBundle(0).fpWen := false.B 700 csBundle(0).vecWen := true.B 701 //LMUL 702 csBundle(1).srcType(0) := SrcType.vp 703 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 704 csBundle(1).lsrc(1) := src2 705 csBundle(1).lsrc(2) := dest 706 csBundle(1).ldest := dest 707 csBundle(1).uopIdx := 0.U 708 for (i <- 1 until MAX_VLMUL) { 709 csBundle(i + 1).srcType(0) := SrcType.vp 710 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 711 csBundle(i + 1).lsrc(1) := src2 + i.U 712 csBundle(i + 1).lsrc(2) := dest + i.U 713 csBundle(i + 1).ldest := dest + i.U 714 csBundle(i + 1).uopIdx := i.U 715 } 716 } 717 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 718 /* 719 i to vector move 720 */ 721 csBundle(0).srcType(0) := SrcType.reg 722 csBundle(0).srcType(1) := SrcType.imm 723 csBundle(0).srcType(2) := SrcType.imm 724 csBundle(0).lsrc(1) := 0.U 725 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 726 csBundle(0).fuType := FuType.i2v.U 727 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 728 csBundle(0).vecWen := true.B 729 //LMUL 730 for (i <- 0 until MAX_VLMUL) { 731 csBundle(2 * i + 1).srcType(0) := SrcType.vp 732 csBundle(2 * i + 1).srcType(1) := SrcType.vp 733 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 734 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 735 csBundle(2 * i + 1).lsrc(2) := dest + i.U 736 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 737 csBundle(2 * i + 1).uopIdx := (2 * i).U 738 if (2 * i + 2 < MAX_VLMUL * 2) { 739 csBundle(2 * i + 2).srcType(0) := SrcType.vp 740 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 741 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 742 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 743 csBundle(2 * i + 2).ldest := dest + i.U 744 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 745 } 746 } 747 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 748 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 749 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 750 } 751 is(UopSplitType.VEC_FSLIDE1DOWN) { 752 /* 753 f to vector move 754 */ 755 csBundle(0).srcType(0) := SrcType.fp 756 csBundle(0).srcType(1) := SrcType.imm 757 csBundle(0).srcType(2) := SrcType.imm 758 csBundle(0).lsrc(1) := 0.U 759 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 760 csBundle(0).fuType := FuType.f2v.U 761 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 762 csBundle(0).rfWen := false.B 763 csBundle(0).fpWen := false.B 764 csBundle(0).vecWen := true.B 765 //LMUL 766 for (i <- 0 until MAX_VLMUL) { 767 csBundle(2 * i + 1).srcType(0) := SrcType.vp 768 csBundle(2 * i + 1).srcType(1) := SrcType.vp 769 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 770 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 771 csBundle(2 * i + 1).lsrc(2) := dest + i.U 772 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 773 csBundle(2 * i + 1).uopIdx := (2 * i).U 774 if (2 * i + 2 < MAX_VLMUL * 2) { 775 csBundle(2 * i + 2).srcType(0) := SrcType.vp 776 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 777 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 778 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 779 csBundle(2 * i + 2).ldest := dest + i.U 780 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 781 } 782 } 783 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 784 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 785 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 786 } 787 is(UopSplitType.VEC_VRED) { 788 when(vlmulReg === "b001".U) { 789 csBundle(0).srcType(2) := SrcType.DC 790 csBundle(0).lsrc(0) := src2 + 1.U 791 csBundle(0).lsrc(1) := src2 792 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 793 csBundle(0).uopIdx := 0.U 794 } 795 when(vlmulReg === "b010".U) { 796 csBundle(0).srcType(2) := SrcType.DC 797 csBundle(0).lsrc(0) := src2 + 1.U 798 csBundle(0).lsrc(1) := src2 799 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 800 csBundle(0).uopIdx := 0.U 801 802 csBundle(1).srcType(2) := SrcType.DC 803 csBundle(1).lsrc(0) := src2 + 3.U 804 csBundle(1).lsrc(1) := src2 + 2.U 805 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 806 csBundle(1).uopIdx := 1.U 807 808 csBundle(2).srcType(2) := SrcType.DC 809 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 810 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 811 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 812 csBundle(2).uopIdx := 2.U 813 } 814 when(vlmulReg === "b011".U) { 815 for (i <- 0 until MAX_VLMUL) { 816 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 817 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 818 csBundle(i).lsrc(1) := src2 + (i * 2).U 819 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 820 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 821 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 822 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 823 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 824 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 825 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 826 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 827 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 828 } 829 csBundle(i).srcType(2) := SrcType.DC 830 csBundle(i).uopIdx := i.U 831 } 832 } 833 when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) { 834 /* 835 * 2 <= vlmul <= 8 836 */ 837 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 838 csBundle(numOfUop - 1.U).lsrc(0) := src1 839 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 840 csBundle(numOfUop - 1.U).lsrc(2) := dest 841 csBundle(numOfUop - 1.U).ldest := dest 842 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 843 } 844 } 845 is(UopSplitType.VEC_VFRED) { 846 val vlmul = vlmulReg 847 val vsew = vsewReg 848 when(vlmul === VLmul.m8){ 849 for (i <- 0 until 4) { 850 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 851 csBundle(i).lsrc(1) := src2 + (i * 2).U 852 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 853 csBundle(i).uopIdx := i.U 854 } 855 for (i <- 4 until 6) { 856 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 857 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 858 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 859 csBundle(i).uopIdx := i.U 860 } 861 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 862 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 863 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 864 csBundle(6).uopIdx := 6.U 865 when(vsew === VSew.e64) { 866 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 867 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 868 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 869 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 870 csBundle(7).uopIdx := 7.U 871 csBundle(8).lsrc(0) := src1 872 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 873 csBundle(8).ldest := dest 874 csBundle(8).uopIdx := 8.U 875 } 876 when(vsew === VSew.e32) { 877 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 878 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 879 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 880 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 881 csBundle(7).uopIdx := 7.U 882 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 883 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 884 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 885 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 886 csBundle(8).uopIdx := 8.U 887 csBundle(9).lsrc(0) := src1 888 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 889 csBundle(9).ldest := dest 890 csBundle(9).uopIdx := 9.U 891 } 892 when(vsew === VSew.e16) { 893 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 894 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 895 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 896 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 897 csBundle(7).uopIdx := 7.U 898 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 899 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 900 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 901 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 902 csBundle(8).uopIdx := 8.U 903 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 904 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 905 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 906 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 907 csBundle(9).uopIdx := 9.U 908 csBundle(10).lsrc(0) := src1 909 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 910 csBundle(10).ldest := dest 911 csBundle(10).uopIdx := 10.U 912 } 913 } 914 when(vlmul === VLmul.m4) { 915 for (i <- 0 until 2) { 916 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 917 csBundle(i).lsrc(1) := src2 + (i * 2).U 918 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 919 csBundle(i).uopIdx := i.U 920 } 921 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 922 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 923 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 924 csBundle(2).uopIdx := 2.U 925 when(vsew === VSew.e64) { 926 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 927 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 928 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 929 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 930 csBundle(3).uopIdx := 3.U 931 csBundle(4).lsrc(0) := src1 932 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 933 csBundle(4).ldest := dest 934 csBundle(4).uopIdx := 4.U 935 } 936 when(vsew === VSew.e32) { 937 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 938 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 939 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 940 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 941 csBundle(3).uopIdx := 3.U 942 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 943 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 944 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 945 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 946 csBundle(4).uopIdx := 4.U 947 csBundle(5).lsrc(0) := src1 948 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 949 csBundle(5).ldest := dest 950 csBundle(5).uopIdx := 5.U 951 } 952 when(vsew === VSew.e16) { 953 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 954 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 955 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 956 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 957 csBundle(3).uopIdx := 3.U 958 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 959 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 960 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 961 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 962 csBundle(4).uopIdx := 4.U 963 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 964 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 965 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 966 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 967 csBundle(5).uopIdx := 5.U 968 csBundle(6).lsrc(0) := src1 969 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 970 csBundle(6).ldest := dest 971 csBundle(6).uopIdx := 6.U 972 } 973 } 974 when(vlmul === VLmul.m2) { 975 csBundle(0).lsrc(0) := src2 + 1.U 976 csBundle(0).lsrc(1) := src2 + 0.U 977 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 978 csBundle(0).uopIdx := 0.U 979 when(vsew === VSew.e64) { 980 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 981 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 982 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 983 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 984 csBundle(1).uopIdx := 1.U 985 csBundle(2).lsrc(0) := src1 986 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 987 csBundle(2).ldest := dest 988 csBundle(2).uopIdx := 2.U 989 } 990 when(vsew === VSew.e32) { 991 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 992 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 993 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 994 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 995 csBundle(1).uopIdx := 1.U 996 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 997 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 998 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 999 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 1000 csBundle(2).uopIdx := 2.U 1001 csBundle(3).lsrc(0) := src1 1002 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1003 csBundle(3).ldest := dest 1004 csBundle(3).uopIdx := 3.U 1005 } 1006 when(vsew === VSew.e16) { 1007 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1008 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1009 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1010 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 1011 csBundle(1).uopIdx := 1.U 1012 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1013 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1014 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1015 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 1016 csBundle(2).uopIdx := 2.U 1017 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 1018 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1019 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 1020 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 1021 csBundle(3).uopIdx := 3.U 1022 csBundle(4).lsrc(0) := src1 1023 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 1024 csBundle(4).ldest := dest 1025 csBundle(4).uopIdx := 4.U 1026 } 1027 } 1028 when(vlmul === VLmul.m1) { 1029 when(vsew === VSew.e64) { 1030 csBundle(0).lsrc(0) := src2 1031 csBundle(0).lsrc(1) := src2 1032 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1033 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1034 csBundle(0).uopIdx := 0.U 1035 csBundle(1).lsrc(0) := src1 1036 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1037 csBundle(1).ldest := dest 1038 csBundle(1).uopIdx := 1.U 1039 } 1040 when(vsew === VSew.e32) { 1041 csBundle(0).lsrc(0) := src2 1042 csBundle(0).lsrc(1) := src2 1043 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1044 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1045 csBundle(0).uopIdx := 0.U 1046 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1047 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1048 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1049 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1050 csBundle(1).uopIdx := 1.U 1051 csBundle(2).lsrc(0) := src1 1052 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1053 csBundle(2).ldest := dest 1054 csBundle(2).uopIdx := 2.U 1055 } 1056 when(vsew === VSew.e16) { 1057 csBundle(0).lsrc(0) := src2 1058 csBundle(0).lsrc(1) := src2 1059 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1060 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1061 csBundle(0).uopIdx := 0.U 1062 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1063 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1064 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1065 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1066 csBundle(1).uopIdx := 1.U 1067 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1068 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1069 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1070 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 1071 csBundle(2).uopIdx := 2.U 1072 csBundle(3).lsrc(0) := src1 1073 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1074 csBundle(3).ldest := dest 1075 csBundle(3).uopIdx := 3.U 1076 } 1077 } 1078 when(vlmul === VLmul.mf2) { 1079 when(vsew === VSew.e32) { 1080 csBundle(0).lsrc(0) := src2 1081 csBundle(0).lsrc(1) := src2 1082 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1083 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1084 csBundle(0).uopIdx := 0.U 1085 csBundle(1).lsrc(0) := src1 1086 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1087 csBundle(1).ldest := dest 1088 csBundle(1).uopIdx := 1.U 1089 } 1090 when(vsew === VSew.e16) { 1091 csBundle(0).lsrc(0) := src2 1092 csBundle(0).lsrc(1) := src2 1093 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1094 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1095 csBundle(0).uopIdx := 0.U 1096 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1097 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1098 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1099 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 1100 csBundle(1).uopIdx := 1.U 1101 csBundle(2).lsrc(0) := src1 1102 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1103 csBundle(2).ldest := dest 1104 csBundle(2).uopIdx := 2.U 1105 } 1106 } 1107 when(vlmul === VLmul.mf4) { 1108 when(vsew === VSew.e16) { 1109 csBundle(0).lsrc(0) := src2 1110 csBundle(0).lsrc(1) := src2 1111 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1112 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 1113 csBundle(0).uopIdx := 0.U 1114 csBundle(1).lsrc(0) := src1 1115 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1116 csBundle(1).ldest := dest 1117 csBundle(1).uopIdx := 1.U 1118 } 1119 } 1120 } 1121 1122 is(UopSplitType.VEC_VFREDOSUM) { 1123 import yunsuan.VfaluType 1124 val vlmul = vlmulReg 1125 val vsew = vsewReg 1126 val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum 1127 when(vlmul === VLmul.m8) { 1128 when(vsew === VSew.e64) { 1129 val vlmax = 16 1130 for (i <- 0 until vlmax) { 1131 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1132 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1133 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1134 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1135 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1136 csBundle(i).uopIdx := i.U 1137 } 1138 } 1139 when(vsew === VSew.e32) { 1140 val vlmax = 32 1141 for (i <- 0 until vlmax) { 1142 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1143 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1144 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1145 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1146 csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B) 1147 csBundle(i).uopIdx := i.U 1148 } 1149 } 1150 when(vsew === VSew.e16) { 1151 val vlmax = 64 1152 for (i <- 0 until vlmax) { 1153 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1154 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1155 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1156 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1157 csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B) 1158 csBundle(i).uopIdx := i.U 1159 } 1160 } 1161 } 1162 when(vlmul === VLmul.m4) { 1163 when(vsew === VSew.e64) { 1164 val vlmax = 8 1165 for (i <- 0 until vlmax) { 1166 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1167 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1168 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1169 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1170 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1171 csBundle(i).uopIdx := i.U 1172 } 1173 } 1174 when(vsew === VSew.e32) { 1175 val vlmax = 16 1176 for (i <- 0 until vlmax) { 1177 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1178 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1179 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1180 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1181 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1182 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1183 csBundle(i).uopIdx := i.U 1184 } 1185 } 1186 when(vsew === VSew.e16) { 1187 val vlmax = 32 1188 for (i <- 0 until vlmax) { 1189 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1190 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1191 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1192 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1193 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1194 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1195 csBundle(i).uopIdx := i.U 1196 } 1197 } 1198 } 1199 when(vlmul === VLmul.m2) { 1200 when(vsew === VSew.e64) { 1201 val vlmax = 4 1202 for (i <- 0 until vlmax) { 1203 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1204 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1205 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1206 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1207 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1208 csBundle(i).uopIdx := i.U 1209 } 1210 } 1211 when(vsew === VSew.e32) { 1212 val vlmax = 8 1213 for (i <- 0 until vlmax) { 1214 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1215 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1216 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1217 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1218 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1219 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1220 csBundle(i).uopIdx := i.U 1221 } 1222 } 1223 when(vsew === VSew.e16) { 1224 val vlmax = 16 1225 for (i <- 0 until vlmax) { 1226 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1227 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1228 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1229 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1230 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1231 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1232 csBundle(i).uopIdx := i.U 1233 } 1234 } 1235 } 1236 when(vlmul === VLmul.m1) { 1237 when(vsew === VSew.e64) { 1238 val vlmax = 2 1239 for (i <- 0 until vlmax) { 1240 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1241 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1242 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1243 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1244 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1245 csBundle(i).uopIdx := i.U 1246 } 1247 } 1248 when(vsew === VSew.e32) { 1249 val vlmax = 4 1250 for (i <- 0 until vlmax) { 1251 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1252 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1253 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1254 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1255 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1256 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1257 csBundle(i).uopIdx := i.U 1258 } 1259 } 1260 when(vsew === VSew.e16) { 1261 val vlmax = 8 1262 for (i <- 0 until vlmax) { 1263 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1264 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1265 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1266 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1267 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1268 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1269 csBundle(i).uopIdx := i.U 1270 } 1271 } 1272 } 1273 when(vlmul === VLmul.mf2) { 1274 when(vsew === VSew.e32) { 1275 val vlmax = 2 1276 for (i <- 0 until vlmax) { 1277 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1278 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1279 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1280 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1281 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1282 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1283 csBundle(i).uopIdx := i.U 1284 } 1285 } 1286 when(vsew === VSew.e16) { 1287 val vlmax = 4 1288 for (i <- 0 until vlmax) { 1289 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1290 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1291 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1292 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1293 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1294 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1295 csBundle(i).uopIdx := i.U 1296 } 1297 } 1298 } 1299 when(vlmul === VLmul.mf4) { 1300 when(vsew === VSew.e16) { 1301 val vlmax = 2 1302 for (i <- 0 until vlmax) { 1303 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1304 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1305 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1306 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1307 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1308 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1309 csBundle(i).uopIdx := i.U 1310 } 1311 } 1312 } 1313 } 1314 1315 is(UopSplitType.VEC_SLIDEUP) { 1316 // i to vector move 1317 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1318 csBundle(0).srcType(1) := SrcType.imm 1319 csBundle(0).srcType(2) := SrcType.imm 1320 csBundle(0).lsrc(1) := 0.U 1321 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1322 csBundle(0).fuType := FuType.i2v.U 1323 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1324 csBundle(0).vecWen := true.B 1325 // LMUL 1326 for (i <- 0 until MAX_VLMUL) 1327 for (j <- 0 to i) { 1328 val old_vd = if (j == 0) { 1329 dest + i.U 1330 } else (VECTOR_TMP_REG_LMUL + j).U 1331 val vd = if (j == i) { 1332 dest + i.U 1333 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1334 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1335 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1336 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1337 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1338 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1339 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1340 } 1341 } 1342 1343 is(UopSplitType.VEC_SLIDEDOWN) { 1344 // i to vector move 1345 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1346 csBundle(0).srcType(1) := SrcType.imm 1347 csBundle(0).srcType(2) := SrcType.imm 1348 csBundle(0).lsrc(1) := 0.U 1349 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1350 csBundle(0).fuType := FuType.i2v.U 1351 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1352 csBundle(0).vecWen := true.B 1353 // LMUL 1354 for (i <- 0 until MAX_VLMUL) 1355 for (j <- (0 to i).reverse) { 1356 when(i.U < lmul) { 1357 val old_vd = if (j == 0) { 1358 dest + lmul - 1.U - i.U 1359 } else (VECTOR_TMP_REG_LMUL + j).U 1360 val vd = if (j == i) { 1361 dest + lmul - 1.U - i.U 1362 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1363 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1364 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1365 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1366 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1367 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1368 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1369 } 1370 } 1371 } 1372 1373 is(UopSplitType.VEC_M0X) { 1374 // LMUL 1375 for (i <- 0 until MAX_VLMUL) { 1376 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1377 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1378 csBundle(i).srcType(0) := srcType0 1379 csBundle(i).srcType(1) := SrcType.vp 1380 csBundle(i).rfWen := false.B 1381 csBundle(i).fpWen := false.B 1382 csBundle(i).vecWen := true.B 1383 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1384 csBundle(i).lsrc(1) := src2 1385 // csBundle(i).lsrc(2) := dest + i.U DontCare 1386 csBundle(i).ldest := ldest 1387 csBundle(i).uopIdx := i.U 1388 } 1389 csBundle(numOfUop - 1.U).rfWen := Mux(dest === 0.U, false.B, true.B) 1390 csBundle(numOfUop - 1.U).fpWen := false.B 1391 csBundle(numOfUop - 1.U).vecWen := false.B 1392 csBundle(numOfUop - 1.U).ldest := dest 1393 } 1394 1395 is(UopSplitType.VEC_MVV) { 1396 // LMUL 1397 for (i <- 0 until MAX_VLMUL) { 1398 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1399 csBundle(i * 2 + 0).srcType(0) := srcType0 1400 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1401 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1402 csBundle(i * 2 + 0).lsrc(1) := src2 1403 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1404 csBundle(i * 2 + 0).ldest := dest + i.U 1405 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1406 1407 csBundle(i * 2 + 1).srcType(0) := srcType0 1408 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1409 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1410 csBundle(i * 2 + 1).lsrc(1) := src2 1411 // csBundle(i).lsrc(2) := dest + i.U DontCare 1412 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1413 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1414 } 1415 } 1416 is(UopSplitType.VEC_VWW) { 1417 for (i <- 0 until MAX_VLMUL*2) { 1418 when(i.U < lmul){ 1419 csBundle(i).srcType(2) := SrcType.DC 1420 csBundle(i).lsrc(0) := src2 + i.U 1421 csBundle(i).lsrc(1) := src2 + i.U 1422 // csBundle(i).lsrc(2) := dest + (2 * i).U 1423 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1424 csBundle(i).uopIdx := i.U 1425 } otherwise { 1426 csBundle(i).srcType(2) := SrcType.DC 1427 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1428 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1429 // csBundle(i).lsrc(2) := dest + (2 * i).U 1430 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1431 csBundle(i).uopIdx := i.U 1432 } 1433 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1434 csBundle(numOfUop-1.U).lsrc(0) := src1 1435 csBundle(numOfUop-1.U).lsrc(2) := dest 1436 csBundle(numOfUop-1.U).ldest := dest 1437 } 1438 } 1439 is(UopSplitType.VEC_RGATHER) { 1440 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1441 for (i <- 0 until len) 1442 for (j <- 0 until len) { 1443 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1444 // csBundle(i * len + j).srcType(1) := SrcType.vp 1445 // csBundle(i * len + j).srcType(2) := SrcType.vp 1446 csBundle(i * len + j).lsrc(0) := src1 + i.U 1447 csBundle(i * len + j).lsrc(1) := src2 + j.U 1448 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1449 csBundle(i * len + j).lsrc(2) := vd_old 1450 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1451 csBundle(i * len + j).ldest := vd 1452 csBundle(i * len + j).uopIdx := (i * len + j).U 1453 } 1454 } 1455 switch(vlmulReg) { 1456 is("b001".U ){ 1457 genCsBundle_VEC_RGATHER(2) 1458 } 1459 is("b010".U ){ 1460 genCsBundle_VEC_RGATHER(4) 1461 } 1462 is("b011".U ){ 1463 genCsBundle_VEC_RGATHER(8) 1464 } 1465 } 1466 } 1467 is(UopSplitType.VEC_RGATHER_VX) { 1468 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1469 for (i <- 0 until len) 1470 for (j <- 0 until len) { 1471 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1472 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1473 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1474 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1475 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1476 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1477 csBundle(i * len + j + 1).lsrc(2) := vd_old 1478 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1479 csBundle(i * len + j + 1).ldest := vd 1480 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1481 } 1482 } 1483 // i to vector move 1484 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1485 csBundle(0).srcType(1) := SrcType.imm 1486 csBundle(0).srcType(2) := SrcType.imm 1487 csBundle(0).lsrc(1) := 0.U 1488 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1489 csBundle(0).fuType := FuType.i2v.U 1490 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1491 csBundle(0).rfWen := false.B 1492 csBundle(0).fpWen := false.B 1493 csBundle(0).vecWen := true.B 1494 genCsBundle_RGATHER_VX(1) 1495 switch(vlmulReg) { 1496 is("b001".U ){ 1497 genCsBundle_RGATHER_VX(2) 1498 } 1499 is("b010".U ){ 1500 genCsBundle_RGATHER_VX(4) 1501 } 1502 is("b011".U ){ 1503 genCsBundle_RGATHER_VX(8) 1504 } 1505 } 1506 } 1507 is(UopSplitType.VEC_RGATHEREI16) { 1508 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1509 for (i <- 0 until len) 1510 for (j <- 0 until len) { 1511 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1512 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1513 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1514 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1515 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1516 csBundle((i * len + j)*2+0).ldest := vd0 1517 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1518 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1519 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1520 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1521 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1522 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1523 csBundle((i * len + j)*2+1).ldest := vd1 1524 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1525 } 1526 } 1527 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1528 for (i <- 0 until len) 1529 for (j <- 0 until len) { 1530 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1531 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1532 csBundle(i * len + j).lsrc(0) := src1 + i.U 1533 csBundle(i * len + j).lsrc(1) := src2 + j.U 1534 csBundle(i * len + j).lsrc(2) := vd_old 1535 csBundle(i * len + j).ldest := vd 1536 csBundle(i * len + j).uopIdx := (i * len + j).U 1537 } 1538 } 1539 def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={ 1540 for (i <- 0 until len) 1541 for (j <- 0 until len) { 1542 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1543 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1544 csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U 1545 csBundle(i * len + j).lsrc(1) := src2 + j.U 1546 csBundle(i * len + j).lsrc(2) := vd_old 1547 csBundle(i * len + j).ldest := vd 1548 csBundle(i * len + j).uopIdx := (i * len + j).U 1549 } 1550 } 1551 def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={ 1552 for (i <- 0 until len) 1553 for (j <- 0 until len) { 1554 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1555 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1556 csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U 1557 csBundle(i * len + j).lsrc(1) := src2 + j.U 1558 csBundle(i * len + j).lsrc(2) := vd_old 1559 csBundle(i * len + j).ldest := vd 1560 csBundle(i * len + j).uopIdx := (i * len + j).U 1561 } 1562 } 1563 when(!vsewReg.orR){ 1564 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1565 }.elsewhen(vsewReg === VSew.e32){ 1566 genCsBundle_VEC_RGATHEREI16_SEW32(1) 1567 }.elsewhen(vsewReg === VSew.e64){ 1568 genCsBundle_VEC_RGATHEREI16_SEW64(1) 1569 }.otherwise{ 1570 genCsBundle_VEC_RGATHEREI16(1) 1571 } 1572 switch(vlmulReg) { 1573 is("b001".U) { 1574 when(!vsewReg.orR) { 1575 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1576 }.elsewhen(vsewReg === VSew.e32){ 1577 genCsBundle_VEC_RGATHEREI16_SEW32(2) 1578 }.elsewhen(vsewReg === VSew.e64){ 1579 genCsBundle_VEC_RGATHEREI16_SEW64(2) 1580 }.otherwise{ 1581 genCsBundle_VEC_RGATHEREI16(2) 1582 } 1583 } 1584 is("b010".U) { 1585 when(!vsewReg.orR) { 1586 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1587 }.elsewhen(vsewReg === VSew.e32){ 1588 genCsBundle_VEC_RGATHEREI16_SEW32(4) 1589 }.elsewhen(vsewReg === VSew.e64){ 1590 genCsBundle_VEC_RGATHEREI16_SEW64(4) 1591 }.otherwise{ 1592 genCsBundle_VEC_RGATHEREI16(4) 1593 } 1594 } 1595 is("b011".U) { 1596 when(vsewReg === VSew.e32){ 1597 genCsBundle_VEC_RGATHEREI16_SEW32(8) 1598 }.elsewhen(vsewReg === VSew.e64){ 1599 genCsBundle_VEC_RGATHEREI16_SEW64(8) 1600 }.otherwise{ 1601 genCsBundle_VEC_RGATHEREI16(8) 1602 } 1603 } 1604 } 1605 } 1606 is(UopSplitType.VEC_COMPRESS) { 1607 def genCsBundle_VEC_COMPRESS(len:Int): Unit = { 1608 for (i <- 0 until len) { 1609 val jlen = if (i == len-1) i+1 else i+2 1610 for (j <- 0 until jlen) { 1611 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1612 val vd = if(i==len-1) (dest + j.U) else { 1613 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1614 } 1615 csBundle(i*(i+3)/2 + j).vecWen := true.B 1616 csBundle(i*(i+3)/2 + j).v0Wen := false.B 1617 val src13Type = if (j == i+1) DontCare else SrcType.vp 1618 csBundle(i*(i+3)/2 + j).srcType(0) := src13Type 1619 csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp 1620 csBundle(i*(i+3)/2 + j).srcType(2) := src13Type 1621 if (i == 0) { 1622 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1623 } else { 1624 csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1625 } 1626 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1627 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1628 csBundle(i*(i+3)/2 + j).ldest := vd 1629 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1630 } 1631 } 1632 } 1633 switch(vlmulReg) { 1634 is("b001".U ){ 1635 genCsBundle_VEC_COMPRESS(2) 1636 } 1637 is("b010".U ){ 1638 genCsBundle_VEC_COMPRESS(4) 1639 } 1640 is("b011".U ){ 1641 genCsBundle_VEC_COMPRESS(8) 1642 } 1643 } 1644 } 1645 is(UopSplitType.VEC_MVNR) { 1646 for (i <- 0 until MAX_VLMUL) { 1647 csBundle(i).lsrc(0) := src1 + i.U 1648 csBundle(i).lsrc(1) := src2 + i.U 1649 csBundle(i).lsrc(2) := dest + i.U 1650 csBundle(i).ldest := dest + i.U 1651 csBundle(i).uopIdx := i.U 1652 } 1653 } 1654 is(UopSplitType.VEC_US_LDST) { 1655 /* 1656 FMV.D.X 1657 */ 1658 csBundle(0).srcType(0) := SrcType.reg 1659 csBundle(0).srcType(1) := SrcType.imm 1660 csBundle(0).lsrc(1) := 0.U 1661 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1662 csBundle(0).fuType := FuType.i2v.U 1663 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1664 csBundle(0).rfWen := false.B 1665 csBundle(0).fpWen := false.B 1666 csBundle(0).vecWen := true.B 1667 csBundle(0).vlsInstr := true.B 1668 //LMUL 1669 for (i <- 0 until MAX_VLMUL) { 1670 csBundle(i + 1).srcType(0) := SrcType.vp 1671 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1672 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1673 csBundle(i + 1).ldest := dest + i.U 1674 csBundle(i + 1).uopIdx := i.U 1675 csBundle(i + 1).vlsInstr := true.B 1676 } 1677 csBundle.head.waitForward := isUsSegment 1678 csBundle(numOfUop - 1.U).blockBackward := isUsSegment 1679 } 1680 is(UopSplitType.VEC_S_LDST) { 1681 /* 1682 FMV.D.X 1683 */ 1684 csBundle(0).srcType(0) := SrcType.reg 1685 csBundle(0).srcType(1) := SrcType.imm 1686 csBundle(0).lsrc(1) := 0.U 1687 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1688 csBundle(0).fuType := FuType.i2v.U 1689 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1690 csBundle(0).rfWen := false.B 1691 csBundle(0).fpWen := false.B 1692 csBundle(0).vecWen := true.B 1693 csBundle(0).vlsInstr := true.B 1694 1695 csBundle(1).srcType(0) := SrcType.reg 1696 csBundle(1).srcType(1) := SrcType.imm 1697 csBundle(1).lsrc(0) := latchedInst.lsrc(1) 1698 csBundle(1).lsrc(1) := 0.U 1699 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1700 csBundle(1).fuType := FuType.i2v.U 1701 csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1702 csBundle(1).rfWen := false.B 1703 csBundle(1).fpWen := false.B 1704 csBundle(1).vecWen := true.B 1705 csBundle(1).vlsInstr := true.B 1706 1707 //LMUL 1708 for (i <- 0 until MAX_VLMUL) { 1709 csBundle(i + 2).srcType(0) := SrcType.vp 1710 csBundle(i + 2).srcType(1) := SrcType.vp 1711 csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1712 csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1713 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1714 csBundle(i + 2).ldest := dest + i.U 1715 csBundle(i + 2).uopIdx := i.U 1716 csBundle(i + 2).vlsInstr := true.B 1717 } 1718 csBundle.head.waitForward := isSdSegment 1719 csBundle(numOfUop - 1.U).blockBackward := isSdSegment 1720 } 1721 is(UopSplitType.VEC_I_LDST) { 1722 def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={ 1723 for (i <- 0 until MAX_VLMUL) { 1724 val vecWen = if (i < lmul * nf) true.B else false.B 1725 val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no 1726 csBundle(i + 1).srcType(0) := SrcType.vp 1727 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1728 csBundle(i + 1).srcType(1) := SrcType.no 1729 csBundle(i + 1).lsrc(1) := src2 + i.U 1730 csBundle(i + 1).srcType(2) := src2Type 1731 csBundle(i + 1).lsrc(2) := dest + i.U 1732 csBundle(i + 1).ldest := dest + i.U 1733 csBundle(i + 1).rfWen := false.B 1734 csBundle(i + 1).fpWen := false.B 1735 csBundle(i + 1).vecWen := vecWen 1736 csBundle(i + 1).uopIdx := i.U 1737 csBundle(i + 1).vlsInstr := true.B 1738 } 1739 } 1740 def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={ 1741 for (i <- 0 until MAX_VLMUL) { 1742 val src1Type = if (i < emul) SrcType.vp else SrcType.no 1743 csBundle(i + 1).srcType(1) := src1Type 1744 csBundle(i + 1).lsrc(1) := src2 + i.U 1745 } 1746 } 1747 1748 val vlmul = vlmulReg 1749 val vsew = Cat(0.U(1.W), vsewReg) 1750 val veew = Cat(0.U(1.W), width) 1751 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1752 val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array( 1753 "b001".U -> 1.U, 1754 "b010".U -> 2.U, 1755 "b011".U -> 3.U 1756 )) 1757 val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array( 1758 "b001".U -> 1.U, 1759 "b010".U -> 2.U, 1760 "b011".U -> 3.U 1761 )) 1762 csBundle(0).srcType(0) := SrcType.reg 1763 csBundle(0).srcType(1) := SrcType.imm 1764 csBundle(0).lsrc(1) := 0.U 1765 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1766 csBundle(0).fuType := FuType.i2v.U 1767 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1768 csBundle(0).rfWen := false.B 1769 csBundle(0).fpWen := false.B 1770 csBundle(0).vecWen := true.B 1771 csBundle(0).vlsInstr := true.B 1772 1773 //LMUL 1774 when(nf === 0.U) { 1775 for (i <- 0 until MAX_VLMUL) { 1776 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul) 1777 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1778 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1779 csBundle(i + 1).srcType(0) := SrcType.vp 1780 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1781 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1782 csBundle(i + 1).srcType(2) := SrcType.vp 1783 // lsrc2 is old vd 1784 csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1785 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1786 csBundle(i + 1).uopIdx := i.U 1787 csBundle(i + 1).vlsInstr := true.B 1788 } 1789 }.otherwise{ 1790 // nf > 1, is segment indexed load/store 1791 // gen src0, vd 1792 switch(simple_lmul) { 1793 is(0.U) { 1794 switch(nf) { 1795 is(1.U) { 1796 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2) 1797 } 1798 is(2.U) { 1799 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3) 1800 } 1801 is(3.U) { 1802 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4) 1803 } 1804 is(4.U) { 1805 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5) 1806 } 1807 is(5.U) { 1808 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6) 1809 } 1810 is(6.U) { 1811 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7) 1812 } 1813 is(7.U) { 1814 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8) 1815 } 1816 } 1817 } 1818 is(1.U) { 1819 switch(nf) { 1820 is(1.U) { 1821 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2) 1822 } 1823 is(2.U) { 1824 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3) 1825 } 1826 is(3.U) { 1827 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4) 1828 } 1829 } 1830 } 1831 is(2.U) { 1832 switch(nf) { 1833 is(1.U) { 1834 genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2) 1835 } 1836 } 1837 } 1838 } 1839 1840 // gen src1 1841 switch(simple_emul) { 1842 is(0.U) { 1843 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1) 1844 } 1845 is(1.U) { 1846 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2) 1847 } 1848 is(2.U) { 1849 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4) 1850 } 1851 is(3.U) { 1852 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8) 1853 } 1854 } 1855 1856 // when is vstore instructions, not set vecwen 1857 when(isVstore) { 1858 for (i <- 0 until MAX_VLMUL) { 1859 csBundle(i + 1).vecWen := false.B 1860 } 1861 } 1862 } 1863 csBundle.head.waitForward := isIxSegment 1864 csBundle(numOfUop - 1.U).blockBackward := isIxSegment 1865 } 1866 } 1867 1868 //readyFromRename Counter 1869 val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U) 1870 1871 // The left uops of the complex inst in ComplexDecoder can be send out this cycle 1872 val thisAllOut = uopRes <= readyCounter 1873 1874 switch(state) { 1875 is(s_idle) { 1876 when (inValid) { 1877 stateNext := s_active 1878 uopResNext := inUopInfo.numOfUop 1879 } 1880 } 1881 is(s_active) { 1882 when (thisAllOut) { 1883 when (inValid) { 1884 stateNext := s_active 1885 uopResNext := inUopInfo.numOfUop 1886 }.otherwise { 1887 stateNext := s_idle 1888 uopResNext := 0.U 1889 } 1890 }.otherwise { 1891 stateNext := s_active 1892 uopResNext := uopRes - readyCounter 1893 } 1894 } 1895 } 1896 1897 state := Mux(io.redirect, s_idle, stateNext) 1898 uopRes := Mux(io.redirect, 0.U, uopResNext) 1899 1900 val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes) 1901 1902 for(i <- 0 until RenameWidth) { 1903 outValids(i) := complexNum > i.U 1904 outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1)) 1905 } 1906 1907 outComplexNum := Mux(state === s_active, complexNum, 0.U) 1908 inReady := state === s_idle || state === s_active && thisAllOut 1909 1910// val validSimple = Wire(Vec(DecodeWidth, Bool())) 1911// validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1912// val notInf = Wire(Vec(DecodeWidth, Bool())) 1913// notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1914// notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1915// val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1916// notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1917// 1918// complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1919// Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1920// 0.U) 1921// validToRename.zipWithIndex.foreach{ 1922// case(dst, i) => 1923// val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1924// dst := MuxCase(false.B, Seq( 1925// (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1926// (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1927// ).toSeq) 1928// } 1929// 1930// readyToIBuf.zipWithIndex.foreach { 1931// case (dst, i) => 1932// val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1933// dst := MuxCase(true.B, Seq( 1934// (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1935// (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1936// ).toSeq) 1937// } 1938// 1939// io.deq.decodedInsts := decodedInsts 1940// io.deq.complexNum := complexNum 1941// io.deq.validToRename := validToRename 1942// io.deq.readyToIBuf := readyToIBuf 1943} 1944