1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul} 34import yunsuan.VpermType 35import scala.collection.Seq 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(7.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 val outIsFirstUopInVd = IO(Output(Bool())) 43 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={ 44 if (lmul * nfields <= 8) { 45 for (k <-0 until nfields) { 46 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 47 var offset = 1 << (emul - lmul) 48 for (i <- 0 until (1 << emul)) { 49 if (uopIdx == k * (1 << emul) + i) { 50 return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0) 51 } 52 } 53 } else { // lmul > emul, uop num is depend on lmul * nf 54 var offset = 1 << (lmul - emul) 55 for (i <- 0 until (1 << lmul)) { 56 if (uopIdx == k * (1 << lmul) + i) { 57 return (i / offset, i + k * (1 << lmul), 1) 58 } 59 } 60 } 61 } 62 } 63 return (0, 0, 1) 64 } 65 // strided load/store 66 var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq() 67 for (emul <- 0 until 4) { 68 for (lmul <- 0 until 4) { 69 for (nf <- 0 until 8) { 70 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx) 71 var offsetVs2 = offset._1 72 var offsetVd = offset._2 73 var isFirstUopInVd = offset._3 74 combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) 75 } 76 } 77 } 78 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 79 case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) => 80 (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W))) 81 }, BitPat.N(7))) 82 outOffsetVs2 := out(5, 3) 83 outOffsetVd := out(2, 0) 84 outIsFirstUopInVd := out(6).asBool 85} 86 87trait VectorConstants { 88 val MAX_VLMUL = 8 89 val FP_TMP_REG_MV = 32 90 val VECTOR_TMP_REG_LMUL = 33 // 33~47 -> 15 91 val MAX_INDEXED_LS_UOPNUM = 64 92} 93 94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 95 val redirect = Input(Bool()) 96 val csrCtrl = Input(new CustomCSRCtrlIO) 97 val vtypeBypass = Input(new VType) 98 // When the first inst in decode vector is complex inst, pass it in 99 val in = Flipped(DecoupledIO(new Bundle { 100 val simpleDecodedInst = new DecodedInst 101 val uopInfo = new UopInfo 102 })) 103 val out = new Bundle { 104 val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst)) 105 } 106 val complexNum = Output(UInt(3.W)) 107} 108 109/** 110 * @author zly 111 */ 112class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 113 val io = IO(new DecodeUnitCompIO) 114 115 // alias 116 private val inReady = io.in.ready 117 private val inValid = io.in.valid 118 private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst) 119 private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields) 120 private val inUopInfo = io.in.bits.uopInfo 121 private val outValids = io.out.complexDecodedInsts.map(_.valid) 122 private val outReadys = io.out.complexDecodedInsts.map(_.ready) 123 private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits) 124 private val outComplexNum = io.complexNum 125 126 val maxUopSize = MaxUopSize 127 when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) { 128 when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) { 129 inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType) 130 }.elsewhen(inInstFields.RS1 === 0.U) { 131 inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType) 132 } 133 } 134 135 val latchedInst = RegEnable(inDecodedInst, inValid && inReady) 136 val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady) 137 //input bits 138 private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields) 139 140 val src1 = Cat(0.U(1.W), instFields.RS1) 141 val src2 = Cat(0.U(1.W), instFields.RS2) 142 val dest = Cat(0.U(1.W), instFields.RD) 143 144 val nf = instFields.NF 145 val width = instFields.WIDTH(1, 0) 146 147 //output of DecodeUnit 148 val numOfUop = Wire(UInt(log2Up(maxUopSize).W)) 149 val numOfWB = Wire(UInt(log2Up(maxUopSize).W)) 150 val lmul = Wire(UInt(4.W)) 151 val isVsetSimple = Wire(Bool()) 152 153 val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i))) 154 indexedLSRegOffset.map(_.src := 0.U) 155 156 //pre decode 157 lmul := latchedUopInfo.lmul 158 isVsetSimple := latchedInst.isVset 159 val vlmulReg = latchedInst.vpu.vlmul 160 val vsewReg = latchedInst.vpu.vsew 161 162 //Type of uop Div 163 val typeOfSplit = latchedInst.uopSplitType 164 val src1Type = latchedInst.srcType(0) 165 val src1IsImm = src1Type === SrcType.imm 166 val src1IsFp = src1Type === SrcType.fp 167 168 numOfUop := latchedUopInfo.numOfUop 169 numOfWB := latchedUopInfo.numOfWB 170 171 //uops dispatch 172 val s_idle :: s_active :: Nil = Enum(2) 173 val state = RegInit(s_idle) 174 val stateNext = WireDefault(state) 175 val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W)) 176 val uopRes = RegInit(0.U(log2Up(maxUopSize).W)) 177 val uopResNext = WireInit(uopRes) 178 val e64 = 3.U(2.W) 179 val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U) 180 val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U 181 val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U 182 183 //uop div up to maxUopSize 184 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 185 csBundle.foreach { case dst => 186 dst := latchedInst 187 dst.numUops := latchedUopInfo.numOfUop 188 dst.numWB := latchedUopInfo.numOfWB 189 dst.firstUop := false.B 190 dst.lastUop := false.B 191 dst.vlsInstr := false.B 192 } 193 194 csBundle(0).firstUop := true.B 195 csBundle(numOfUop - 1.U).lastUop := true.B 196 197 switch(typeOfSplit) { 198 is(UopSplitType.VSET) { 199 // In simple decoder, rfWen and vecWen are not set 200 when(isVsetSimple) { 201 // Default 202 // uop0 set rd, never flushPipe 203 csBundle(0).fuType := FuType.vsetiwi.U 204 csBundle(0).flushPipe := false.B 205 csBundle(0).rfWen := true.B 206 // uop1 set vl, vsetvl will flushPipe 207 csBundle(1).ldest := VCONFIG_IDX.U 208 csBundle(1).vecWen := true.B 209 when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 210 // write nothing, uop0 is a nop instruction 211 csBundle(0).rfWen := false.B 212 csBundle(0).fpWen := false.B 213 csBundle(0).vecWen := false.B 214 csBundle(1).fuType := FuType.vsetfwf.U 215 csBundle(1).srcType(0) := SrcType.vp 216 csBundle(1).lsrc(0) := VCONFIG_IDX.U 217 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 218 // uop0: mv vtype gpr to vector region 219 csBundle(0).srcType(0) := SrcType.xp 220 csBundle(0).srcType(1) := SrcType.no 221 csBundle(0).lsrc(1) := 0.U 222 csBundle(0).ldest := FP_TMP_REG_MV.U 223 csBundle(0).fuType := FuType.i2v.U 224 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 225 csBundle(0).rfWen := false.B 226 csBundle(0).fpWen := true.B 227 csBundle(0).vecWen := false.B 228 csBundle(0).flushPipe := false.B 229 // uop1: uvsetvcfg_vv 230 csBundle(1).fuType := FuType.vsetfwf.U 231 // vl 232 csBundle(1).srcType(0) := SrcType.vp 233 csBundle(1).lsrc(0) := VCONFIG_IDX.U 234 // vtype 235 csBundle(1).srcType(1) := SrcType.fp 236 csBundle(1).lsrc(1) := FP_TMP_REG_MV.U 237 csBundle(1).vecWen := true.B 238 csBundle(1).ldest := VCONFIG_IDX.U 239 } 240 // use bypass vtype from vtypeGen 241 csBundle(0).vpu.connectVType(io.vtypeBypass) 242 csBundle(1).vpu.connectVType(io.vtypeBypass) 243 } 244 } 245 is(UopSplitType.VEC_VVV) { 246 for (i <- 0 until MAX_VLMUL) { 247 csBundle(i).lsrc(0) := src1 + i.U 248 csBundle(i).lsrc(1) := src2 + i.U 249 csBundle(i).lsrc(2) := dest + i.U 250 csBundle(i).ldest := dest + i.U 251 csBundle(i).uopIdx := i.U 252 } 253 } 254 is(UopSplitType.VEC_VFV) { 255 /* 256 i to vector move 257 */ 258 csBundle(0).srcType(0) := SrcType.fp 259 csBundle(0).srcType(1) := SrcType.imm 260 csBundle(0).lsrc(1) := 0.U 261 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 262 csBundle(0).fuType := FuType.f2v.U 263 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 264 csBundle(0).vecWen := true.B 265 csBundle(0).vpu.isReverse := false.B 266 /* 267 LMUL 268 */ 269 for (i <- 0 until MAX_VLMUL) { 270 csBundle(i + 1).srcType(0) := SrcType.vp 271 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 272 csBundle(i + 1).lsrc(1) := src2 + i.U 273 csBundle(i + 1).lsrc(2) := dest + i.U 274 csBundle(i + 1).ldest := dest + i.U 275 csBundle(i + 1).uopIdx := i.U 276 } 277 } 278 is(UopSplitType.VEC_EXT2) { 279 for (i <- 0 until MAX_VLMUL / 2) { 280 csBundle(2 * i).lsrc(1) := src2 + i.U 281 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 282 csBundle(2 * i).ldest := dest + (2 * i).U 283 csBundle(2 * i).uopIdx := (2 * i).U 284 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 285 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 286 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 287 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 288 } 289 } 290 is(UopSplitType.VEC_EXT4) { 291 for (i <- 0 until MAX_VLMUL / 4) { 292 csBundle(4 * i).lsrc(1) := src2 + i.U 293 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 294 csBundle(4 * i).ldest := dest + (4 * i).U 295 csBundle(4 * i).uopIdx := (4 * i).U 296 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 297 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 298 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 299 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 300 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 301 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 302 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 303 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 304 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 305 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 306 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 307 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 308 } 309 } 310 is(UopSplitType.VEC_EXT8) { 311 for (i <- 0 until MAX_VLMUL) { 312 csBundle(i).lsrc(1) := src2 313 csBundle(i).lsrc(2) := dest + i.U 314 csBundle(i).ldest := dest + i.U 315 csBundle(i).uopIdx := i.U 316 } 317 } 318 is(UopSplitType.VEC_0XV) { 319 /* 320 i/f to vector move 321 */ 322 csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg) 323 csBundle(0).srcType(1) := SrcType.imm 324 csBundle(0).lsrc(1) := 0.U 325 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 326 csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U) 327 csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 328 csBundle(0).rfWen := false.B 329 csBundle(0).fpWen := false.B 330 csBundle(0).vecWen := true.B 331 /* 332 vmv.s.x 333 */ 334 csBundle(1).srcType(0) := SrcType.vp 335 csBundle(1).srcType(1) := SrcType.imm 336 csBundle(1).srcType(2) := SrcType.vp 337 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 338 csBundle(1).lsrc(1) := 0.U 339 csBundle(1).lsrc(2) := dest 340 csBundle(1).ldest := dest 341 csBundle(1).rfWen := false.B 342 csBundle(1).fpWen := false.B 343 csBundle(1).vecWen := true.B 344 csBundle(1).uopIdx := 0.U 345 } 346 is(UopSplitType.VEC_VXV) { 347 /* 348 i to vector move 349 */ 350 csBundle(0).srcType(0) := SrcType.reg 351 csBundle(0).srcType(1) := SrcType.imm 352 csBundle(0).lsrc(1) := 0.U 353 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 354 csBundle(0).fuType := FuType.i2v.U 355 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 356 csBundle(0).vecWen := true.B 357 csBundle(0).vpu.isReverse := false.B 358 /* 359 LMUL 360 */ 361 for (i <- 0 until MAX_VLMUL) { 362 csBundle(i + 1).srcType(0) := SrcType.vp 363 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 364 csBundle(i + 1).lsrc(1) := src2 + i.U 365 csBundle(i + 1).lsrc(2) := dest + i.U 366 csBundle(i + 1).ldest := dest + i.U 367 csBundle(i + 1).uopIdx := i.U 368 } 369 } 370 is(UopSplitType.VEC_VVW) { 371 for (i <- 0 until MAX_VLMUL / 2) { 372 csBundle(2 * i).lsrc(0) := src1 + i.U 373 csBundle(2 * i).lsrc(1) := src2 + i.U 374 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 375 csBundle(2 * i).ldest := dest + (2 * i).U 376 csBundle(2 * i).uopIdx := (2 * i).U 377 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 378 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 379 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 380 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 381 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 382 } 383 } 384 is(UopSplitType.VEC_VFW) { 385 /* 386 f to vector move 387 */ 388 csBundle(0).srcType(0) := SrcType.fp 389 csBundle(0).srcType(1) := SrcType.imm 390 csBundle(0).lsrc(1) := 0.U 391 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 392 csBundle(0).fuType := FuType.f2v.U 393 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 394 csBundle(0).rfWen := false.B 395 csBundle(0).fpWen := false.B 396 csBundle(0).vecWen := true.B 397 398 for (i <- 0 until MAX_VLMUL / 2) { 399 csBundle(2 * i + 1).srcType(0) := SrcType.vp 400 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 401 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 402 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 403 csBundle(2 * i + 1).ldest := dest + (2 * i).U 404 csBundle(2 * i + 1).uopIdx := (2 * i).U 405 csBundle(2 * i + 2).srcType(0) := SrcType.vp 406 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 407 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 408 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 409 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 410 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 411 } 412 } 413 is(UopSplitType.VEC_WVW) { 414 for (i <- 0 until MAX_VLMUL / 2) { 415 csBundle(2 * i).lsrc(0) := src1 + i.U 416 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 417 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 418 csBundle(2 * i).ldest := dest + (2 * i).U 419 csBundle(2 * i).uopIdx := (2 * i).U 420 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 421 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 422 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 423 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 424 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 425 } 426 } 427 is(UopSplitType.VEC_VXW) { 428 /* 429 i to vector move 430 */ 431 csBundle(0).srcType(0) := SrcType.reg 432 csBundle(0).srcType(1) := SrcType.imm 433 csBundle(0).lsrc(1) := 0.U 434 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 435 csBundle(0).fuType := FuType.i2v.U 436 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 437 csBundle(0).vecWen := true.B 438 439 for (i <- 0 until MAX_VLMUL / 2) { 440 csBundle(2 * i + 1).srcType(0) := SrcType.vp 441 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 442 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 443 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 444 csBundle(2 * i + 1).ldest := dest + (2 * i).U 445 csBundle(2 * i + 1).uopIdx := (2 * i).U 446 csBundle(2 * i + 2).srcType(0) := SrcType.vp 447 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 448 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 449 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 450 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 451 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 452 } 453 } 454 is(UopSplitType.VEC_WXW) { 455 /* 456 i to vector move 457 */ 458 csBundle(0).srcType(0) := SrcType.reg 459 csBundle(0).srcType(1) := SrcType.imm 460 csBundle(0).lsrc(1) := 0.U 461 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 462 csBundle(0).fuType := FuType.i2v.U 463 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 464 csBundle(0).vecWen := true.B 465 466 for (i <- 0 until MAX_VLMUL / 2) { 467 csBundle(2 * i + 1).srcType(0) := SrcType.vp 468 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 469 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 470 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 471 csBundle(2 * i + 1).ldest := dest + (2 * i).U 472 csBundle(2 * i + 1).uopIdx := (2 * i).U 473 csBundle(2 * i + 2).srcType(0) := SrcType.vp 474 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 475 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 476 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 477 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 478 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 479 } 480 } 481 is(UopSplitType.VEC_WVV) { 482 for (i <- 0 until MAX_VLMUL / 2) { 483 484 csBundle(2 * i).lsrc(0) := src1 + i.U 485 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 486 csBundle(2 * i).lsrc(2) := dest + i.U 487 csBundle(2 * i).ldest := dest + i.U 488 csBundle(2 * i).uopIdx := (2 * i).U 489 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 490 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 491 csBundle(2 * i + 1).lsrc(2) := dest + i.U 492 csBundle(2 * i + 1).ldest := dest + i.U 493 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 494 } 495 } 496 is(UopSplitType.VEC_WFW) { 497 /* 498 f to vector move 499 */ 500 csBundle(0).srcType(0) := SrcType.fp 501 csBundle(0).srcType(1) := SrcType.imm 502 csBundle(0).lsrc(1) := 0.U 503 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 504 csBundle(0).fuType := FuType.f2v.U 505 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 506 csBundle(0).rfWen := false.B 507 csBundle(0).fpWen := false.B 508 csBundle(0).vecWen := true.B 509 510 for (i <- 0 until MAX_VLMUL / 2) { 511 csBundle(2 * i + 1).srcType(0) := SrcType.vp 512 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 513 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 514 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 515 csBundle(2 * i + 1).ldest := dest + (2 * i).U 516 csBundle(2 * i + 1).uopIdx := (2 * i).U 517 csBundle(2 * i + 2).srcType(0) := SrcType.vp 518 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 519 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 520 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 521 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 522 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 523 } 524 } 525 is(UopSplitType.VEC_WXV) { 526 /* 527 i to vector move 528 */ 529 csBundle(0).srcType(0) := SrcType.reg 530 csBundle(0).srcType(1) := SrcType.imm 531 csBundle(0).lsrc(1) := 0.U 532 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 533 csBundle(0).fuType := FuType.i2v.U 534 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 535 csBundle(0).vecWen := true.B 536 537 for (i <- 0 until MAX_VLMUL / 2) { 538 csBundle(2 * i + 1).srcType(0) := SrcType.vp 539 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 540 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 541 csBundle(2 * i + 1).lsrc(2) := dest + i.U 542 csBundle(2 * i + 1).ldest := dest + i.U 543 csBundle(2 * i + 1).uopIdx := (2 * i).U 544 csBundle(2 * i + 2).srcType(0) := SrcType.vp 545 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 546 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 547 csBundle(2 * i + 2).lsrc(2) := dest + i.U 548 csBundle(2 * i + 2).ldest := dest + i.U 549 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 550 } 551 } 552 is(UopSplitType.VEC_VVM) { 553 csBundle(0).lsrc(2) := dest 554 csBundle(0).ldest := dest 555 csBundle(0).uopIdx := 0.U 556 for (i <- 1 until MAX_VLMUL) { 557 csBundle(i).lsrc(0) := src1 + i.U 558 csBundle(i).lsrc(1) := src2 + i.U 559 csBundle(i).lsrc(2) := dest 560 csBundle(i).ldest := dest 561 csBundle(i).uopIdx := i.U 562 } 563 } 564 is(UopSplitType.VEC_VFM) { 565 /* 566 f to vector move 567 */ 568 csBundle(0).srcType(0) := SrcType.fp 569 csBundle(0).srcType(1) := SrcType.imm 570 csBundle(0).lsrc(1) := 0.U 571 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 572 csBundle(0).fuType := FuType.f2v.U 573 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 574 csBundle(0).rfWen := false.B 575 csBundle(0).fpWen := false.B 576 csBundle(0).vecWen := true.B 577 //LMUL 578 csBundle(1).srcType(0) := SrcType.vp 579 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 580 csBundle(1).lsrc(2) := dest 581 csBundle(1).ldest := dest 582 csBundle(1).uopIdx := 0.U 583 for (i <- 1 until MAX_VLMUL) { 584 csBundle(i + 1).srcType(0) := SrcType.vp 585 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 586 csBundle(i + 1).lsrc(1) := src2 + i.U 587 csBundle(i + 1).lsrc(2) := dest 588 csBundle(i + 1).ldest := dest 589 csBundle(i + 1).uopIdx := i.U 590 } 591 csBundle(numOfUop - 1.U).ldest := dest 592 } 593 is(UopSplitType.VEC_VXM) { 594 /* 595 i to vector move 596 */ 597 csBundle(0).srcType(0) := SrcType.reg 598 csBundle(0).srcType(1) := SrcType.imm 599 csBundle(0).lsrc(1) := 0.U 600 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 601 csBundle(0).fuType := FuType.i2v.U 602 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 603 csBundle(0).vecWen := true.B 604 //LMUL 605 csBundle(1).srcType(0) := SrcType.vp 606 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 607 csBundle(1).lsrc(2) := dest 608 csBundle(1).ldest := dest 609 csBundle(1).uopIdx := 0.U 610 for (i <- 1 until MAX_VLMUL) { 611 csBundle(i + 1).srcType(0) := SrcType.vp 612 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 613 csBundle(i + 1).lsrc(1) := src2 + i.U 614 csBundle(i + 1).lsrc(2) := dest 615 csBundle(i + 1).ldest := dest 616 csBundle(i + 1).uopIdx := i.U 617 } 618 csBundle(numOfUop - 1.U).ldest := dest 619 } 620 is(UopSplitType.VEC_SLIDE1UP) { 621 /* 622 i to vector move 623 */ 624 csBundle(0).srcType(0) := SrcType.reg 625 csBundle(0).srcType(1) := SrcType.imm 626 csBundle(0).lsrc(1) := 0.U 627 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 628 csBundle(0).fuType := FuType.i2v.U 629 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 630 csBundle(0).vecWen := true.B 631 //LMUL 632 csBundle(1).srcType(0) := SrcType.vp 633 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 634 csBundle(1).lsrc(2) := dest 635 csBundle(1).ldest := dest 636 csBundle(1).uopIdx := 0.U 637 for (i <- 1 until MAX_VLMUL) { 638 csBundle(i + 1).srcType(0) := SrcType.vp 639 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 640 csBundle(i + 1).lsrc(1) := src2 + i.U 641 csBundle(i + 1).lsrc(2) := dest + i.U 642 csBundle(i + 1).ldest := dest + i.U 643 csBundle(i + 1).uopIdx := i.U 644 } 645 } 646 is(UopSplitType.VEC_FSLIDE1UP) { 647 /* 648 i to vector move 649 */ 650 csBundle(0).srcType(0) := SrcType.fp 651 csBundle(0).srcType(1) := SrcType.imm 652 csBundle(0).lsrc(1) := 0.U 653 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 654 csBundle(0).fuType := FuType.f2v.U 655 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 656 csBundle(0).rfWen := false.B 657 csBundle(0).fpWen := false.B 658 csBundle(0).vecWen := true.B 659 //LMUL 660 csBundle(1).srcType(0) := SrcType.vp 661 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 662 csBundle(1).lsrc(1) := src2 663 csBundle(1).lsrc(2) := dest 664 csBundle(1).ldest := dest 665 csBundle(1).uopIdx := 0.U 666 for (i <- 1 until MAX_VLMUL) { 667 csBundle(i + 1).srcType(0) := SrcType.vp 668 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 669 csBundle(i + 1).lsrc(1) := src2 + i.U 670 csBundle(i + 1).lsrc(2) := dest + i.U 671 csBundle(i + 1).ldest := dest + i.U 672 csBundle(i + 1).uopIdx := i.U 673 } 674 } 675 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 676 /* 677 i to vector move 678 */ 679 csBundle(0).srcType(0) := SrcType.reg 680 csBundle(0).srcType(1) := SrcType.imm 681 csBundle(0).lsrc(1) := 0.U 682 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 683 csBundle(0).fuType := FuType.i2v.U 684 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 685 csBundle(0).vecWen := true.B 686 //LMUL 687 for (i <- 0 until MAX_VLMUL) { 688 csBundle(2 * i + 1).srcType(0) := SrcType.vp 689 csBundle(2 * i + 1).srcType(1) := SrcType.vp 690 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 691 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 692 csBundle(2 * i + 1).lsrc(2) := dest + i.U 693 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 694 csBundle(2 * i + 1).uopIdx := (2 * i).U 695 if (2 * i + 2 < MAX_VLMUL * 2) { 696 csBundle(2 * i + 2).srcType(0) := SrcType.vp 697 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 698 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 699 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 700 csBundle(2 * i + 2).ldest := dest + i.U 701 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 702 } 703 } 704 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 705 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 706 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 707 } 708 is(UopSplitType.VEC_FSLIDE1DOWN) { 709 /* 710 i to vector move 711 */ 712 csBundle(0).srcType(0) := SrcType.fp 713 csBundle(0).srcType(1) := SrcType.imm 714 csBundle(0).lsrc(1) := 0.U 715 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 716 csBundle(0).fuType := FuType.f2v.U 717 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 718 csBundle(0).rfWen := false.B 719 csBundle(0).fpWen := false.B 720 csBundle(0).vecWen := true.B 721 //LMUL 722 for (i <- 0 until MAX_VLMUL) { 723 csBundle(2 * i + 1).srcType(0) := SrcType.vp 724 csBundle(2 * i + 1).srcType(1) := SrcType.vp 725 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 726 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 727 csBundle(2 * i + 1).lsrc(2) := dest + i.U 728 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 729 csBundle(2 * i + 1).uopIdx := (2 * i).U 730 if (2 * i + 2 < MAX_VLMUL * 2) { 731 csBundle(2 * i + 2).srcType(0) := SrcType.vp 732 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 733 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 734 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 735 csBundle(2 * i + 2).ldest := dest + i.U 736 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 737 } 738 } 739 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 740 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 741 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 742 } 743 is(UopSplitType.VEC_VRED) { 744 when(vlmulReg === "b001".U) { 745 csBundle(0).srcType(2) := SrcType.DC 746 csBundle(0).lsrc(0) := src2 + 1.U 747 csBundle(0).lsrc(1) := src2 748 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 749 csBundle(0).uopIdx := 0.U 750 } 751 when(vlmulReg === "b010".U) { 752 csBundle(0).srcType(2) := SrcType.DC 753 csBundle(0).lsrc(0) := src2 + 1.U 754 csBundle(0).lsrc(1) := src2 755 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 756 csBundle(0).uopIdx := 0.U 757 758 csBundle(1).srcType(2) := SrcType.DC 759 csBundle(1).lsrc(0) := src2 + 3.U 760 csBundle(1).lsrc(1) := src2 + 2.U 761 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 762 csBundle(1).uopIdx := 1.U 763 764 csBundle(2).srcType(2) := SrcType.DC 765 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 766 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 767 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 768 csBundle(2).uopIdx := 2.U 769 } 770 when(vlmulReg === "b011".U) { 771 for (i <- 0 until MAX_VLMUL) { 772 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 773 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 774 csBundle(i).lsrc(1) := src2 + (i * 2).U 775 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 776 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 777 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 778 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 779 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 780 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 781 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 782 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 783 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 784 } 785 csBundle(i).srcType(2) := SrcType.DC 786 csBundle(i).uopIdx := i.U 787 } 788 } 789 when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) { 790 /* 791 * 2 <= vlmul <= 8 792 */ 793 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 794 csBundle(numOfUop - 1.U).lsrc(0) := src1 795 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 796 csBundle(numOfUop - 1.U).lsrc(2) := dest 797 csBundle(numOfUop - 1.U).ldest := dest 798 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 799 } 800 } 801 is(UopSplitType.VEC_VFRED) { 802 val vlmul = vlmulReg 803 val vsew = vsewReg 804 when(vlmul === VLmul.m8){ 805 for (i <- 0 until 4) { 806 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 807 csBundle(i).lsrc(1) := src2 + (i * 2).U 808 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 809 csBundle(i).uopIdx := i.U 810 } 811 for (i <- 4 until 6) { 812 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 813 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 814 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 815 csBundle(i).uopIdx := i.U 816 } 817 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 818 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 819 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 820 csBundle(6).uopIdx := 6.U 821 when(vsew === VSew.e64) { 822 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 823 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 824 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 825 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 826 csBundle(7).uopIdx := 7.U 827 csBundle(8).lsrc(0) := src1 828 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 829 csBundle(8).ldest := dest 830 csBundle(8).uopIdx := 8.U 831 } 832 when(vsew === VSew.e32) { 833 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 834 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 835 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 836 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 837 csBundle(7).uopIdx := 7.U 838 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 839 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 840 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 841 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 842 csBundle(8).uopIdx := 8.U 843 csBundle(9).lsrc(0) := src1 844 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 845 csBundle(9).ldest := dest 846 csBundle(9).uopIdx := 9.U 847 } 848 when(vsew === VSew.e16) { 849 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 850 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 851 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 852 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 853 csBundle(7).uopIdx := 7.U 854 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 855 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 856 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 857 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 858 csBundle(8).uopIdx := 8.U 859 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 860 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 861 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 862 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 863 csBundle(9).uopIdx := 9.U 864 csBundle(10).lsrc(0) := src1 865 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 866 csBundle(10).ldest := dest 867 csBundle(10).uopIdx := 10.U 868 } 869 } 870 when(vlmul === VLmul.m4) { 871 for (i <- 0 until 2) { 872 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 873 csBundle(i).lsrc(1) := src2 + (i * 2).U 874 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 875 csBundle(i).uopIdx := i.U 876 } 877 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 878 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 879 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 880 csBundle(2).uopIdx := 2.U 881 when(vsew === VSew.e64) { 882 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 883 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 884 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 885 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 886 csBundle(3).uopIdx := 3.U 887 csBundle(4).lsrc(0) := src1 888 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 889 csBundle(4).ldest := dest 890 csBundle(4).uopIdx := 4.U 891 } 892 when(vsew === VSew.e32) { 893 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 894 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 895 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 896 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 897 csBundle(3).uopIdx := 3.U 898 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 899 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 900 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 901 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 902 csBundle(4).uopIdx := 4.U 903 csBundle(5).lsrc(0) := src1 904 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 905 csBundle(5).ldest := dest 906 csBundle(5).uopIdx := 5.U 907 } 908 when(vsew === VSew.e16) { 909 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 910 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 911 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 912 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 913 csBundle(3).uopIdx := 3.U 914 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 915 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 916 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 917 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 918 csBundle(4).uopIdx := 4.U 919 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 920 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 921 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 922 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 923 csBundle(5).uopIdx := 5.U 924 csBundle(6).lsrc(0) := src1 925 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 926 csBundle(6).ldest := dest 927 csBundle(6).uopIdx := 6.U 928 } 929 } 930 when(vlmul === VLmul.m2) { 931 csBundle(0).lsrc(0) := src2 + 1.U 932 csBundle(0).lsrc(1) := src2 + 0.U 933 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 934 csBundle(0).uopIdx := 0.U 935 when(vsew === VSew.e64) { 936 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 937 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 938 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 939 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 940 csBundle(1).uopIdx := 1.U 941 csBundle(2).lsrc(0) := src1 942 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 943 csBundle(2).ldest := dest 944 csBundle(2).uopIdx := 2.U 945 } 946 when(vsew === VSew.e32) { 947 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 948 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 949 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 950 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 951 csBundle(1).uopIdx := 1.U 952 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 953 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 954 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 955 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 956 csBundle(2).uopIdx := 2.U 957 csBundle(3).lsrc(0) := src1 958 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 959 csBundle(3).ldest := dest 960 csBundle(3).uopIdx := 3.U 961 } 962 when(vsew === VSew.e16) { 963 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 964 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 965 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 966 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 967 csBundle(1).uopIdx := 1.U 968 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 969 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 970 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 971 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 972 csBundle(2).uopIdx := 2.U 973 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 974 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 975 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 976 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 977 csBundle(3).uopIdx := 3.U 978 csBundle(4).lsrc(0) := src1 979 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 980 csBundle(4).ldest := dest 981 csBundle(4).uopIdx := 4.U 982 } 983 } 984 when(vlmul === VLmul.m1) { 985 when(vsew === VSew.e64) { 986 csBundle(0).lsrc(0) := src2 987 csBundle(0).lsrc(1) := src2 988 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 989 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 990 csBundle(0).uopIdx := 0.U 991 csBundle(1).lsrc(0) := src1 992 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 993 csBundle(1).ldest := dest 994 csBundle(1).uopIdx := 1.U 995 } 996 when(vsew === VSew.e32) { 997 csBundle(0).lsrc(0) := src2 998 csBundle(0).lsrc(1) := src2 999 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1000 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1001 csBundle(0).uopIdx := 0.U 1002 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1003 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1004 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1005 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1006 csBundle(1).uopIdx := 1.U 1007 csBundle(2).lsrc(0) := src1 1008 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1009 csBundle(2).ldest := dest 1010 csBundle(2).uopIdx := 2.U 1011 } 1012 when(vsew === VSew.e16) { 1013 csBundle(0).lsrc(0) := src2 1014 csBundle(0).lsrc(1) := src2 1015 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1016 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1017 csBundle(0).uopIdx := 0.U 1018 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1019 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1020 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1021 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1022 csBundle(1).uopIdx := 1.U 1023 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1024 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1025 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1026 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 1027 csBundle(2).uopIdx := 2.U 1028 csBundle(3).lsrc(0) := src1 1029 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1030 csBundle(3).ldest := dest 1031 csBundle(3).uopIdx := 3.U 1032 } 1033 } 1034 when(vlmul === VLmul.mf2) { 1035 when(vsew === VSew.e32) { 1036 csBundle(0).lsrc(0) := src2 1037 csBundle(0).lsrc(1) := src2 1038 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1039 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1040 csBundle(0).uopIdx := 0.U 1041 csBundle(1).lsrc(0) := src1 1042 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1043 csBundle(1).ldest := dest 1044 csBundle(1).uopIdx := 1.U 1045 } 1046 when(vsew === VSew.e16) { 1047 csBundle(0).lsrc(0) := src2 1048 csBundle(0).lsrc(1) := src2 1049 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1050 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1051 csBundle(0).uopIdx := 0.U 1052 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1053 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1054 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1055 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 1056 csBundle(1).uopIdx := 1.U 1057 csBundle(2).lsrc(0) := src1 1058 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1059 csBundle(2).ldest := dest 1060 csBundle(2).uopIdx := 2.U 1061 } 1062 } 1063 when(vlmul === VLmul.mf4) { 1064 when(vsew === VSew.e16) { 1065 csBundle(0).lsrc(0) := src2 1066 csBundle(0).lsrc(1) := src2 1067 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1068 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 1069 csBundle(0).uopIdx := 0.U 1070 csBundle(1).lsrc(0) := src1 1071 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1072 csBundle(1).ldest := dest 1073 csBundle(1).uopIdx := 1.U 1074 } 1075 } 1076 } 1077 1078 is(UopSplitType.VEC_VFREDOSUM) { 1079 import yunsuan.VfaluType 1080 val vlmul = vlmulReg 1081 val vsew = vsewReg 1082 val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum 1083 when(vlmul === VLmul.m8) { 1084 when(vsew === VSew.e64) { 1085 val vlmax = 16 1086 for (i <- 0 until vlmax) { 1087 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1088 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1089 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1090 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1091 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1092 csBundle(i).uopIdx := i.U 1093 } 1094 } 1095 when(vsew === VSew.e32) { 1096 val vlmax = 32 1097 for (i <- 0 until vlmax) { 1098 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1099 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1100 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1101 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1102 csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B) 1103 csBundle(i).uopIdx := i.U 1104 } 1105 } 1106 when(vsew === VSew.e16) { 1107 val vlmax = 64 1108 for (i <- 0 until vlmax) { 1109 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1110 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1111 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1112 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1113 csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B) 1114 csBundle(i).uopIdx := i.U 1115 } 1116 } 1117 } 1118 when(vlmul === VLmul.m4) { 1119 when(vsew === VSew.e64) { 1120 val vlmax = 8 1121 for (i <- 0 until vlmax) { 1122 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1123 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1124 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1125 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1126 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1127 csBundle(i).uopIdx := i.U 1128 } 1129 } 1130 when(vsew === VSew.e32) { 1131 val vlmax = 16 1132 for (i <- 0 until vlmax) { 1133 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1134 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1135 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1136 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1137 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1138 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1139 csBundle(i).uopIdx := i.U 1140 } 1141 } 1142 when(vsew === VSew.e16) { 1143 val vlmax = 32 1144 for (i <- 0 until vlmax) { 1145 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1146 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1147 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1148 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1149 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1150 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1151 csBundle(i).uopIdx := i.U 1152 } 1153 } 1154 } 1155 when(vlmul === VLmul.m2) { 1156 when(vsew === VSew.e64) { 1157 val vlmax = 4 1158 for (i <- 0 until vlmax) { 1159 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1160 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1161 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1162 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1163 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1164 csBundle(i).uopIdx := i.U 1165 } 1166 } 1167 when(vsew === VSew.e32) { 1168 val vlmax = 8 1169 for (i <- 0 until vlmax) { 1170 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1171 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1172 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1173 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1174 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1175 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1176 csBundle(i).uopIdx := i.U 1177 } 1178 } 1179 when(vsew === VSew.e16) { 1180 val vlmax = 16 1181 for (i <- 0 until vlmax) { 1182 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1183 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1184 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1185 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1186 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1187 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1188 csBundle(i).uopIdx := i.U 1189 } 1190 } 1191 } 1192 when(vlmul === VLmul.m1) { 1193 when(vsew === VSew.e64) { 1194 val vlmax = 2 1195 for (i <- 0 until vlmax) { 1196 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1197 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1198 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1199 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1200 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1201 csBundle(i).uopIdx := i.U 1202 } 1203 } 1204 when(vsew === VSew.e32) { 1205 val vlmax = 4 1206 for (i <- 0 until vlmax) { 1207 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1208 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1209 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1210 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1211 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1212 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1213 csBundle(i).uopIdx := i.U 1214 } 1215 } 1216 when(vsew === VSew.e16) { 1217 val vlmax = 8 1218 for (i <- 0 until vlmax) { 1219 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1220 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1221 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1222 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1223 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1224 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1225 csBundle(i).uopIdx := i.U 1226 } 1227 } 1228 } 1229 when(vlmul === VLmul.mf2) { 1230 when(vsew === VSew.e32) { 1231 val vlmax = 2 1232 for (i <- 0 until vlmax) { 1233 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1234 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1235 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1236 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1237 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1238 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1239 csBundle(i).uopIdx := i.U 1240 } 1241 } 1242 when(vsew === VSew.e16) { 1243 val vlmax = 4 1244 for (i <- 0 until vlmax) { 1245 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1246 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1247 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1248 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1249 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1250 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1251 csBundle(i).uopIdx := i.U 1252 } 1253 } 1254 } 1255 when(vlmul === VLmul.mf4) { 1256 when(vsew === VSew.e16) { 1257 val vlmax = 2 1258 for (i <- 0 until vlmax) { 1259 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1260 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1261 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1262 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1263 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1264 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1265 csBundle(i).uopIdx := i.U 1266 } 1267 } 1268 } 1269 } 1270 1271 is(UopSplitType.VEC_SLIDEUP) { 1272 // i to vector move 1273 csBundle(0).srcType(0) := SrcType.reg 1274 csBundle(0).srcType(1) := SrcType.imm 1275 csBundle(0).lsrc(1) := 0.U 1276 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1277 csBundle(0).fuType := FuType.i2v.U 1278 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1279 csBundle(0).vecWen := true.B 1280 // LMUL 1281 for (i <- 0 until MAX_VLMUL) 1282 for (j <- 0 to i) { 1283 val old_vd = if (j == 0) { 1284 dest + i.U 1285 } else (VECTOR_TMP_REG_LMUL + j).U 1286 val vd = if (j == i) { 1287 dest + i.U 1288 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1289 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1290 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1291 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1292 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1293 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1294 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1295 } 1296 } 1297 1298 is(UopSplitType.VEC_SLIDEDOWN) { 1299 // i to vector move 1300 csBundle(0).srcType(0) := SrcType.reg 1301 csBundle(0).srcType(1) := SrcType.imm 1302 csBundle(0).lsrc(1) := 0.U 1303 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1304 csBundle(0).fuType := FuType.i2v.U 1305 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1306 csBundle(0).vecWen := true.B 1307 // LMUL 1308 for (i <- 0 until MAX_VLMUL) 1309 for (j <- (0 to i).reverse) { 1310 when(i.U < lmul) { 1311 val old_vd = if (j == 0) { 1312 dest + lmul - 1.U - i.U 1313 } else (VECTOR_TMP_REG_LMUL + j).U 1314 val vd = if (j == i) { 1315 dest + lmul - 1.U - i.U 1316 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1317 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1318 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1319 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1320 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1321 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1322 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1323 } 1324 } 1325 } 1326 1327 is(UopSplitType.VEC_M0X) { 1328 // LMUL 1329 for (i <- 0 until MAX_VLMUL) { 1330 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1331 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1332 csBundle(i).srcType(0) := srcType0 1333 csBundle(i).srcType(1) := SrcType.vp 1334 csBundle(i).rfWen := false.B 1335 csBundle(i).fpWen := false.B 1336 csBundle(i).vecWen := true.B 1337 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1338 csBundle(i).lsrc(1) := src2 1339 // csBundle(i).lsrc(2) := dest + i.U DontCare 1340 csBundle(i).ldest := ldest 1341 csBundle(i).uopIdx := i.U 1342 } 1343 csBundle(lmul - 1.U).rfWen := true.B 1344 csBundle(lmul - 1.U).fpWen := false.B 1345 csBundle(lmul - 1.U).vecWen := false.B 1346 csBundle(lmul - 1.U).ldest := dest 1347 } 1348 1349 is(UopSplitType.VEC_MVV) { 1350 // LMUL 1351 for (i <- 0 until MAX_VLMUL) { 1352 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1353 csBundle(i * 2 + 0).srcType(0) := srcType0 1354 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1355 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1356 csBundle(i * 2 + 0).lsrc(1) := src2 1357 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1358 csBundle(i * 2 + 0).ldest := dest + i.U 1359 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1360 1361 csBundle(i * 2 + 1).srcType(0) := srcType0 1362 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1363 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1364 csBundle(i * 2 + 1).lsrc(1) := src2 1365 // csBundle(i).lsrc(2) := dest + i.U DontCare 1366 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1367 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1368 } 1369 } 1370 1371 is(UopSplitType.VEC_M0X_VFIRST) { 1372 // LMUL 1373 csBundle(0).rfWen := true.B 1374 csBundle(0).fpWen := false.B 1375 csBundle(0).vecWen := false.B 1376 csBundle(0).ldest := dest 1377 } 1378 is(UopSplitType.VEC_VWW) { 1379 for (i <- 0 until MAX_VLMUL*2) { 1380 when(i.U < lmul){ 1381 csBundle(i).srcType(2) := SrcType.DC 1382 csBundle(i).lsrc(0) := src2 + i.U 1383 csBundle(i).lsrc(1) := src2 + i.U 1384 // csBundle(i).lsrc(2) := dest + (2 * i).U 1385 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1386 csBundle(i).uopIdx := i.U 1387 } otherwise { 1388 csBundle(i).srcType(2) := SrcType.DC 1389 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1390 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1391 // csBundle(i).lsrc(2) := dest + (2 * i).U 1392 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1393 csBundle(i).uopIdx := i.U 1394 } 1395 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1396 csBundle(numOfUop-1.U).lsrc(0) := src1 1397 csBundle(numOfUop-1.U).lsrc(2) := dest 1398 csBundle(numOfUop-1.U).ldest := dest 1399 } 1400 } 1401 is(UopSplitType.VEC_RGATHER) { 1402 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1403 for (i <- 0 until len) 1404 for (j <- 0 until len) { 1405 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1406 // csBundle(i * len + j).srcType(1) := SrcType.vp 1407 // csBundle(i * len + j).srcType(2) := SrcType.vp 1408 csBundle(i * len + j).lsrc(0) := src1 + i.U 1409 csBundle(i * len + j).lsrc(1) := src2 + j.U 1410 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1411 csBundle(i * len + j).lsrc(2) := vd_old 1412 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1413 csBundle(i * len + j).ldest := vd 1414 csBundle(i * len + j).uopIdx := (i * len + j).U 1415 } 1416 } 1417 switch(vlmulReg) { 1418 is("b001".U ){ 1419 genCsBundle_VEC_RGATHER(2) 1420 } 1421 is("b010".U ){ 1422 genCsBundle_VEC_RGATHER(4) 1423 } 1424 is("b011".U ){ 1425 genCsBundle_VEC_RGATHER(8) 1426 } 1427 } 1428 } 1429 is(UopSplitType.VEC_RGATHER_VX) { 1430 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1431 for (i <- 0 until len) 1432 for (j <- 0 until len) { 1433 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1434 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1435 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1436 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1437 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1438 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1439 csBundle(i * len + j + 1).lsrc(2) := vd_old 1440 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1441 csBundle(i * len + j + 1).ldest := vd 1442 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1443 } 1444 } 1445 // i to vector move 1446 csBundle(0).srcType(0) := SrcType.reg 1447 csBundle(0).srcType(1) := SrcType.imm 1448 csBundle(0).lsrc(1) := 0.U 1449 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1450 csBundle(0).fuType := FuType.i2v.U 1451 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1452 csBundle(0).rfWen := false.B 1453 csBundle(0).fpWen := false.B 1454 csBundle(0).vecWen := true.B 1455 genCsBundle_RGATHER_VX(1) 1456 switch(vlmulReg) { 1457 is("b001".U ){ 1458 genCsBundle_RGATHER_VX(2) 1459 } 1460 is("b010".U ){ 1461 genCsBundle_RGATHER_VX(4) 1462 } 1463 is("b011".U ){ 1464 genCsBundle_RGATHER_VX(8) 1465 } 1466 } 1467 } 1468 is(UopSplitType.VEC_RGATHEREI16) { 1469 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1470 for (i <- 0 until len) 1471 for (j <- 0 until len) { 1472 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1473 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1474 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1475 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1476 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1477 csBundle((i * len + j)*2+0).ldest := vd0 1478 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1479 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1480 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1481 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1482 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1483 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1484 csBundle((i * len + j)*2+1).ldest := vd1 1485 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1486 } 1487 } 1488 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1489 for (i <- 0 until len) 1490 for (j <- 0 until len) { 1491 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1492 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1493 csBundle(i * len + j).lsrc(0) := src1 + i.U 1494 csBundle(i * len + j).lsrc(1) := src2 + j.U 1495 csBundle(i * len + j).lsrc(2) := vd_old 1496 csBundle(i * len + j).ldest := vd 1497 csBundle(i * len + j).uopIdx := (i * len + j).U 1498 } 1499 } 1500 def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={ 1501 for (i <- 0 until len) 1502 for (j <- 0 until len) { 1503 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1504 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1505 csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U 1506 csBundle(i * len + j).lsrc(1) := src2 + j.U 1507 csBundle(i * len + j).lsrc(2) := vd_old 1508 csBundle(i * len + j).ldest := vd 1509 csBundle(i * len + j).uopIdx := (i * len + j).U 1510 } 1511 } 1512 def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={ 1513 for (i <- 0 until len) 1514 for (j <- 0 until len) { 1515 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1516 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1517 csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U 1518 csBundle(i * len + j).lsrc(1) := src2 + j.U 1519 csBundle(i * len + j).lsrc(2) := vd_old 1520 csBundle(i * len + j).ldest := vd 1521 csBundle(i * len + j).uopIdx := (i * len + j).U 1522 } 1523 } 1524 when(!vsewReg.orR){ 1525 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1526 }.elsewhen(vsewReg === VSew.e32){ 1527 genCsBundle_VEC_RGATHEREI16_SEW32(1) 1528 }.elsewhen(vsewReg === VSew.e64){ 1529 genCsBundle_VEC_RGATHEREI16_SEW64(1) 1530 }.otherwise{ 1531 genCsBundle_VEC_RGATHEREI16(1) 1532 } 1533 switch(vlmulReg) { 1534 is("b001".U) { 1535 when(!vsewReg.orR) { 1536 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1537 }.elsewhen(vsewReg === VSew.e32){ 1538 genCsBundle_VEC_RGATHEREI16_SEW32(2) 1539 }.elsewhen(vsewReg === VSew.e64){ 1540 genCsBundle_VEC_RGATHEREI16_SEW64(2) 1541 }.otherwise{ 1542 genCsBundle_VEC_RGATHEREI16(2) 1543 } 1544 } 1545 is("b010".U) { 1546 when(!vsewReg.orR) { 1547 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1548 }.elsewhen(vsewReg === VSew.e32){ 1549 genCsBundle_VEC_RGATHEREI16_SEW32(4) 1550 }.elsewhen(vsewReg === VSew.e64){ 1551 genCsBundle_VEC_RGATHEREI16_SEW64(4) 1552 }.otherwise{ 1553 genCsBundle_VEC_RGATHEREI16(4) 1554 } 1555 } 1556 is("b011".U) { 1557 when(vsewReg === VSew.e32){ 1558 genCsBundle_VEC_RGATHEREI16_SEW32(8) 1559 }.elsewhen(vsewReg === VSew.e64){ 1560 genCsBundle_VEC_RGATHEREI16_SEW64(8) 1561 }.otherwise{ 1562 genCsBundle_VEC_RGATHEREI16(8) 1563 } 1564 } 1565 } 1566 } 1567 is(UopSplitType.VEC_COMPRESS) { 1568 def genCsBundle_VEC_COMPRESS(len:Int): Unit = { 1569 for (i <- 0 until len) { 1570 val jlen = if (i == len-1) i+1 else i+2 1571 for (j <- 0 until jlen) { 1572 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1573 val vd = if(i==len-1) (dest + j.U) else { 1574 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1575 } 1576 val src13Type = if (j == i+1) DontCare else SrcType.vp 1577 csBundle(i*(i+3)/2 + j).srcType(0) := src13Type 1578 csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp 1579 csBundle(i*(i+3)/2 + j).srcType(2) := src13Type 1580 csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp 1581 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1582 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1583 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1584 csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U 1585 csBundle(i*(i+3)/2 + j).ldest := vd 1586 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1587 } 1588 } 1589 } 1590 switch(vlmulReg) { 1591 is("b001".U ){ 1592 genCsBundle_VEC_COMPRESS(2) 1593 } 1594 is("b010".U ){ 1595 genCsBundle_VEC_COMPRESS(4) 1596 } 1597 is("b011".U ){ 1598 genCsBundle_VEC_COMPRESS(8) 1599 } 1600 } 1601 } 1602 is(UopSplitType.VEC_MVNR) { 1603 for (i <- 0 until MAX_VLMUL) { 1604 csBundle(i).lsrc(0) := src1 + i.U 1605 csBundle(i).lsrc(1) := src2 + i.U 1606 csBundle(i).lsrc(2) := dest + i.U 1607 csBundle(i).ldest := dest + i.U 1608 csBundle(i).uopIdx := i.U 1609 } 1610 } 1611 is(UopSplitType.VEC_US_LDST) { 1612 /* 1613 FMV.D.X 1614 */ 1615 csBundle(0).srcType(0) := SrcType.reg 1616 csBundle(0).srcType(1) := SrcType.imm 1617 csBundle(0).lsrc(1) := 0.U 1618 csBundle(0).ldest := FP_TMP_REG_MV.U 1619 csBundle(0).fuType := FuType.i2v.U 1620 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1621 csBundle(0).rfWen := false.B 1622 csBundle(0).fpWen := true.B 1623 csBundle(0).vecWen := false.B 1624 csBundle(0).vlsInstr := true.B 1625 //LMUL 1626 for (i <- 0 until MAX_VLMUL) { 1627 csBundle(i + 1).srcType(0) := SrcType.fp 1628 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1629 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1630 csBundle(i + 1).ldest := dest + i.U 1631 csBundle(i + 1).uopIdx := i.U 1632 csBundle(i + 1).vlsInstr := true.B 1633 } 1634 csBundle.head.waitForward := isUsSegment 1635 csBundle(numOfUop - 1.U).blockBackward := isUsSegment 1636 } 1637 is(UopSplitType.VEC_S_LDST) { 1638 /* 1639 FMV.D.X 1640 */ 1641 csBundle(0).srcType(0) := SrcType.reg 1642 csBundle(0).srcType(1) := SrcType.imm 1643 csBundle(0).lsrc(1) := 0.U 1644 csBundle(0).ldest := FP_TMP_REG_MV.U 1645 csBundle(0).fuType := FuType.i2v.U 1646 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1647 csBundle(0).rfWen := false.B 1648 csBundle(0).fpWen := true.B 1649 csBundle(0).vecWen := false.B 1650 csBundle(0).vlsInstr := true.B 1651 1652 csBundle(1).srcType(0) := SrcType.reg 1653 csBundle(1).srcType(1) := SrcType.imm 1654 csBundle(1).lsrc(0) := latchedInst.lsrc(1) 1655 csBundle(1).lsrc(1) := 0.U 1656 csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U 1657 csBundle(1).fuType := FuType.i2v.U 1658 csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1659 csBundle(1).rfWen := false.B 1660 csBundle(1).fpWen := true.B 1661 csBundle(1).vecWen := false.B 1662 csBundle(1).vlsInstr := true.B 1663 1664 //LMUL 1665 for (i <- 0 until MAX_VLMUL) { 1666 csBundle(i + 2).srcType(0) := SrcType.fp 1667 csBundle(i + 2).srcType(1) := SrcType.fp 1668 csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U 1669 csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 1670 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1671 csBundle(i + 2).ldest := dest + i.U 1672 csBundle(i + 2).uopIdx := i.U 1673 csBundle(i + 2).vlsInstr := true.B 1674 } 1675 csBundle.head.waitForward := isSdSegment 1676 csBundle(numOfUop - 1.U).blockBackward := isSdSegment 1677 } 1678 is(UopSplitType.VEC_I_LDST) { 1679 /* 1680 FMV.D.X 1681 */ 1682 val vlmul = vlmulReg 1683 val vsew = Cat(0.U(1.W), vsewReg) 1684 val veew = Cat(0.U(1.W), width) 1685 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1686 val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array( 1687 "b001".U -> 1.U, 1688 "b010".U -> 2.U, 1689 "b011".U -> 3.U 1690 )) 1691 val simple_emul = MuxLookup(vemul, 0.U(2.W), Array( 1692 "b001".U -> 1.U, 1693 "b010".U -> 2.U, 1694 "b011".U -> 3.U 1695 )) 1696 csBundle(0).srcType(0) := SrcType.reg 1697 csBundle(0).srcType(1) := SrcType.imm 1698 csBundle(0).lsrc(1) := 0.U 1699 csBundle(0).ldest := FP_TMP_REG_MV.U 1700 csBundle(0).fuType := FuType.i2v.U 1701 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1702 csBundle(0).rfWen := false.B 1703 csBundle(0).fpWen := true.B 1704 csBundle(0).vecWen := false.B 1705 csBundle(0).vlsInstr := true.B 1706 1707 //LMUL 1708 for (i <- 0 until MAX_INDEXED_LS_UOPNUM) { 1709 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf) 1710 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1711 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1712 val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd 1713 csBundle(i + 1).srcType(0) := SrcType.fp 1714 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1715 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1716 /** 1717 * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and 1718 * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same 1719 * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be 1720 * deadlock for indexed instructions with emul > lmul. 1721 * 1722 * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest 1723 * N-1 uops will read temporary vector register. 1724 */ 1725 // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1726 csBundle(i + 1).srcType(2) := SrcType.vp 1727 csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1728 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1729 csBundle(i + 1).uopIdx := i.U 1730 csBundle(i + 1).vlsInstr := true.B 1731 } 1732 csBundle.head.waitForward := isIxSegment 1733 csBundle(numOfUop - 1.U).blockBackward := isIxSegment 1734 } 1735 } 1736 1737 //readyFromRename Counter 1738 val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U) 1739 1740 // The left uops of the complex inst in ComplexDecoder can be send out this cycle 1741 val thisAllOut = uopRes <= readyCounter 1742 1743 switch(state) { 1744 is(s_idle) { 1745 when (inValid) { 1746 stateNext := s_active 1747 uopResNext := inUopInfo.numOfUop 1748 } 1749 } 1750 is(s_active) { 1751 when (thisAllOut) { 1752 when (inValid) { 1753 stateNext := s_active 1754 uopResNext := inUopInfo.numOfUop 1755 }.otherwise { 1756 stateNext := s_idle 1757 uopResNext := 0.U 1758 } 1759 }.otherwise { 1760 stateNext := s_active 1761 uopResNext := uopRes - readyCounter 1762 } 1763 } 1764 } 1765 1766 state := Mux(io.redirect, s_idle, stateNext) 1767 uopRes := Mux(io.redirect, 0.U, uopResNext) 1768 1769 val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes) 1770 1771 for(i <- 0 until RenameWidth) { 1772 outValids(i) := complexNum > i.U 1773 outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1)) 1774 } 1775 1776 outComplexNum := Mux(state === s_active, complexNum, 0.U) 1777 inReady := state === s_idle || state === s_active && thisAllOut 1778 1779// val validSimple = Wire(Vec(DecodeWidth, Bool())) 1780// validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1781// val notInf = Wire(Vec(DecodeWidth, Bool())) 1782// notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1783// notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1784// val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1785// notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1786// 1787// complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1788// Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1789// 0.U) 1790// validToRename.zipWithIndex.foreach{ 1791// case(dst, i) => 1792// val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1793// dst := MuxCase(false.B, Seq( 1794// (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1795// (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1796// ).toSeq) 1797// } 1798// 1799// readyToIBuf.zipWithIndex.foreach { 1800// case (dst, i) => 1801// val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1802// dst := MuxCase(true.B, Seq( 1803// (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1804// (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1805// ).toSeq) 1806// } 1807// 1808// io.deq.decodedInsts := decodedInsts 1809// io.deq.complexNum := complexNum 1810// io.deq.validToRename := validToRename 1811// io.deq.readyToIBuf := readyToIBuf 1812} 1813