1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul} 34import yunsuan.VpermType 35import scala.collection.Seq 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(7.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 val outIsFirstUopInVd = IO(Output(Bool())) 43 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={ 44 if (lmul * nfields <= 8) { 45 for (k <-0 until nfields) { 46 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 47 var offset = 1 << (emul - lmul) 48 for (i <- 0 until (1 << emul)) { 49 if (uopIdx == k * (1 << emul) + i) { 50 return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0) 51 } 52 } 53 } else { // lmul > emul, uop num is depend on lmul * nf 54 var offset = 1 << (lmul - emul) 55 for (i <- 0 until (1 << lmul)) { 56 if (uopIdx == k * (1 << lmul) + i) { 57 return (i / offset, i + k * (1 << lmul), 1) 58 } 59 } 60 } 61 } 62 } 63 return (0, 0, 1) 64 } 65 // strided load/store 66 var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq() 67 for (emul <- 0 until 4) { 68 for (lmul <- 0 until 4) { 69 for (nf <- 0 until 8) { 70 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx) 71 var offsetVs2 = offset._1 72 var offsetVd = offset._2 73 var isFirstUopInVd = offset._3 74 combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) 75 } 76 } 77 } 78 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 79 case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) => 80 (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W))) 81 }, BitPat.N(7))) 82 outOffsetVs2 := out(5, 3) 83 outOffsetVd := out(2, 0) 84 outIsFirstUopInVd := out(6).asBool 85} 86 87trait VectorConstants { 88 val MAX_VLMUL = 8 89 val FP_TMP_REG_MV = 32 90 val VECTOR_TMP_REG_LMUL = 33 // 33~47 -> 15 91 val MAX_INDEXED_LS_UOPNUM = 64 92} 93 94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 95 val redirect = Input(Bool()) 96 val csrCtrl = Input(new CustomCSRCtrlIO) 97 val vtypeBypass = Input(new VType) 98 // When the first inst in decode vector is complex inst, pass it in 99 val in = Flipped(DecoupledIO(new Bundle { 100 val simpleDecodedInst = new DecodedInst 101 val uopInfo = new UopInfo 102 })) 103 val out = new Bundle { 104 val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst)) 105 } 106 val complexNum = Output(UInt(3.W)) 107} 108 109/** 110 * @author zly 111 */ 112class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 113 val io = IO(new DecodeUnitCompIO) 114 115 // alias 116 private val inReady = io.in.ready 117 private val inValid = io.in.valid 118 private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst) 119 private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields) 120 private val inUopInfo = io.in.bits.uopInfo 121 private val outValids = io.out.complexDecodedInsts.map(_.valid) 122 private val outReadys = io.out.complexDecodedInsts.map(_.ready) 123 private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits) 124 private val outComplexNum = io.complexNum 125 126 val maxUopSize = MaxUopSize 127 when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) { 128 when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) { 129 inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType) 130 }.elsewhen(inInstFields.RS1 === 0.U) { 131 inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType) 132 } 133 } 134 135 val latchedInst = RegEnable(inDecodedInst, inValid && inReady) 136 val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady) 137 //input bits 138 private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields) 139 140 val src1 = Cat(0.U(1.W), instFields.RS1) 141 val src2 = Cat(0.U(1.W), instFields.RS2) 142 val dest = Cat(0.U(1.W), instFields.RD) 143 144 val nf = instFields.NF 145 val width = instFields.WIDTH(1, 0) 146 147 //output of DecodeUnit 148 val numOfUop = Wire(UInt(log2Up(maxUopSize).W)) 149 val numOfWB = Wire(UInt(log2Up(maxUopSize).W)) 150 val lmul = Wire(UInt(4.W)) 151 val isVsetSimple = Wire(Bool()) 152 153 val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i))) 154 indexedLSRegOffset.map(_.src := 0.U) 155 156 //pre decode 157 lmul := latchedUopInfo.lmul 158 isVsetSimple := latchedInst.isVset 159 val vlmulReg = latchedInst.vpu.vlmul 160 val vsewReg = latchedInst.vpu.vsew 161 162 //Type of uop Div 163 val typeOfSplit = latchedInst.uopSplitType 164 val src1Type = latchedInst.srcType(0) 165 val src1IsImm = src1Type === SrcType.imm 166 val src1IsFp = src1Type === SrcType.fp 167 168 numOfUop := latchedUopInfo.numOfUop 169 numOfWB := latchedUopInfo.numOfWB 170 171 //uops dispatch 172 val s_idle :: s_active :: Nil = Enum(2) 173 val state = RegInit(s_idle) 174 val stateNext = WireDefault(state) 175 val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W)) 176 val uopRes = RegInit(0.U(log2Up(maxUopSize).W)) 177 val uopResNext = WireInit(uopRes) 178 val e64 = 3.U(2.W) 179 180 //uop div up to maxUopSize 181 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 182 csBundle.foreach { case dst => 183 dst := latchedInst 184 dst.numUops := latchedUopInfo.numOfUop 185 dst.numWB := latchedUopInfo.numOfWB 186 dst.firstUop := false.B 187 dst.lastUop := false.B 188 dst.vlsInstr := false.B 189 } 190 191 csBundle(0).firstUop := true.B 192 csBundle(numOfUop - 1.U).lastUop := true.B 193 194 switch(typeOfSplit) { 195 is(UopSplitType.VSET) { 196 // In simple decoder, rfWen and vecWen are not set 197 when(isVsetSimple) { 198 // Default 199 // uop0 set rd, never flushPipe 200 csBundle(0).fuType := FuType.vsetiwi.U 201 csBundle(0).flushPipe := false.B 202 csBundle(0).rfWen := true.B 203 // uop1 set vl, vsetvl will flushPipe 204 csBundle(1).ldest := VCONFIG_IDX.U 205 csBundle(1).vecWen := true.B 206 when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 207 csBundle(1).fuType := FuType.vsetfwf.U 208 csBundle(1).srcType(0) := SrcType.vp 209 csBundle(1).lsrc(0) := VCONFIG_IDX.U 210 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 211 // uop0: mv vtype gpr to vector region 212 csBundle(0).srcType(0) := SrcType.xp 213 csBundle(0).srcType(1) := SrcType.no 214 csBundle(0).lsrc(1) := 0.U 215 csBundle(0).ldest := FP_TMP_REG_MV.U 216 csBundle(0).fuType := FuType.i2v.U 217 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 218 csBundle(0).rfWen := false.B 219 csBundle(0).fpWen := true.B 220 csBundle(0).vecWen := false.B 221 csBundle(0).flushPipe := false.B 222 // uop1: uvsetvcfg_vv 223 csBundle(1).fuType := FuType.vsetfwf.U 224 // vl 225 csBundle(1).srcType(0) := SrcType.vp 226 csBundle(1).lsrc(0) := VCONFIG_IDX.U 227 // vtype 228 csBundle(1).srcType(1) := SrcType.fp 229 csBundle(1).lsrc(1) := FP_TMP_REG_MV.U 230 csBundle(1).vecWen := true.B 231 csBundle(1).ldest := VCONFIG_IDX.U 232 } 233 // use bypass vtype from vtypeGen 234 csBundle(0).vpu.connectVType(io.vtypeBypass) 235 csBundle(1).vpu.connectVType(io.vtypeBypass) 236 } 237 } 238 is(UopSplitType.VEC_VVV) { 239 for (i <- 0 until MAX_VLMUL) { 240 csBundle(i).lsrc(0) := src1 + i.U 241 csBundle(i).lsrc(1) := src2 + i.U 242 csBundle(i).lsrc(2) := dest + i.U 243 csBundle(i).ldest := dest + i.U 244 csBundle(i).uopIdx := i.U 245 } 246 } 247 is(UopSplitType.VEC_VFV) { 248 /* 249 i to vector move 250 */ 251 csBundle(0).srcType(0) := SrcType.fp 252 csBundle(0).srcType(1) := SrcType.imm 253 csBundle(0).lsrc(1) := 0.U 254 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 255 csBundle(0).fuType := FuType.f2v.U 256 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 257 csBundle(0).vecWen := true.B 258 csBundle(0).vpu.isReverse := false.B 259 /* 260 LMUL 261 */ 262 for (i <- 0 until MAX_VLMUL) { 263 csBundle(i + 1).srcType(0) := SrcType.vp 264 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 265 csBundle(i + 1).lsrc(1) := src2 + i.U 266 csBundle(i + 1).lsrc(2) := dest + i.U 267 csBundle(i + 1).ldest := dest + i.U 268 csBundle(i + 1).uopIdx := i.U 269 } 270 } 271 is(UopSplitType.VEC_EXT2) { 272 for (i <- 0 until MAX_VLMUL / 2) { 273 csBundle(2 * i).lsrc(1) := src2 + i.U 274 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 275 csBundle(2 * i).ldest := dest + (2 * i).U 276 csBundle(2 * i).uopIdx := (2 * i).U 277 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 278 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 279 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 280 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 281 } 282 } 283 is(UopSplitType.VEC_EXT4) { 284 for (i <- 0 until MAX_VLMUL / 4) { 285 csBundle(4 * i).lsrc(1) := src2 + i.U 286 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 287 csBundle(4 * i).ldest := dest + (4 * i).U 288 csBundle(4 * i).uopIdx := (4 * i).U 289 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 290 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 291 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 292 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 293 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 294 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 295 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 296 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 297 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 298 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 299 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 300 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 301 } 302 } 303 is(UopSplitType.VEC_EXT8) { 304 for (i <- 0 until MAX_VLMUL) { 305 csBundle(i).lsrc(1) := src2 306 csBundle(i).lsrc(2) := dest + i.U 307 csBundle(i).ldest := dest + i.U 308 csBundle(i).uopIdx := i.U 309 } 310 } 311 is(UopSplitType.VEC_0XV) { 312 /* 313 i/f to vector move 314 */ 315 csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg) 316 csBundle(0).srcType(1) := SrcType.imm 317 csBundle(0).lsrc(1) := 0.U 318 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 319 csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U) 320 csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 321 csBundle(0).rfWen := false.B 322 csBundle(0).fpWen := false.B 323 csBundle(0).vecWen := true.B 324 /* 325 vmv.s.x 326 */ 327 csBundle(1).srcType(0) := SrcType.vp 328 csBundle(1).srcType(1) := SrcType.imm 329 csBundle(1).srcType(2) := SrcType.vp 330 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 331 csBundle(1).lsrc(1) := 0.U 332 csBundle(1).lsrc(2) := dest 333 csBundle(1).ldest := dest 334 csBundle(1).rfWen := false.B 335 csBundle(1).fpWen := false.B 336 csBundle(1).vecWen := true.B 337 csBundle(1).uopIdx := 0.U 338 } 339 is(UopSplitType.VEC_VXV) { 340 /* 341 i to vector move 342 */ 343 csBundle(0).srcType(0) := SrcType.reg 344 csBundle(0).srcType(1) := SrcType.imm 345 csBundle(0).lsrc(1) := 0.U 346 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 347 csBundle(0).fuType := FuType.i2v.U 348 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 349 csBundle(0).vecWen := true.B 350 csBundle(0).vpu.isReverse := false.B 351 /* 352 LMUL 353 */ 354 for (i <- 0 until MAX_VLMUL) { 355 csBundle(i + 1).srcType(0) := SrcType.vp 356 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 357 csBundle(i + 1).lsrc(1) := src2 + i.U 358 csBundle(i + 1).lsrc(2) := dest + i.U 359 csBundle(i + 1).ldest := dest + i.U 360 csBundle(i + 1).uopIdx := i.U 361 } 362 } 363 is(UopSplitType.VEC_VVW) { 364 for (i <- 0 until MAX_VLMUL / 2) { 365 csBundle(2 * i).lsrc(0) := src1 + i.U 366 csBundle(2 * i).lsrc(1) := src2 + i.U 367 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 368 csBundle(2 * i).ldest := dest + (2 * i).U 369 csBundle(2 * i).uopIdx := (2 * i).U 370 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 371 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 372 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 373 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 374 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 375 } 376 } 377 is(UopSplitType.VEC_VFW) { 378 /* 379 f to vector move 380 */ 381 csBundle(0).srcType(0) := SrcType.fp 382 csBundle(0).srcType(1) := SrcType.imm 383 csBundle(0).lsrc(1) := 0.U 384 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 385 csBundle(0).fuType := FuType.f2v.U 386 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 387 csBundle(0).rfWen := false.B 388 csBundle(0).fpWen := false.B 389 csBundle(0).vecWen := true.B 390 391 for (i <- 0 until MAX_VLMUL / 2) { 392 csBundle(2 * i + 1).srcType(0) := SrcType.vp 393 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 394 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 395 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 396 csBundle(2 * i + 1).ldest := dest + (2 * i).U 397 csBundle(2 * i + 1).uopIdx := (2 * i).U 398 csBundle(2 * i + 2).srcType(0) := SrcType.vp 399 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 400 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 401 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 402 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 403 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 404 } 405 } 406 is(UopSplitType.VEC_WVW) { 407 for (i <- 0 until MAX_VLMUL / 2) { 408 csBundle(2 * i).lsrc(0) := src1 + i.U 409 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 410 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 411 csBundle(2 * i).ldest := dest + (2 * i).U 412 csBundle(2 * i).uopIdx := (2 * i).U 413 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 414 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 415 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 416 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 417 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 418 } 419 } 420 is(UopSplitType.VEC_VXW) { 421 /* 422 i to vector move 423 */ 424 csBundle(0).srcType(0) := SrcType.reg 425 csBundle(0).srcType(1) := SrcType.imm 426 csBundle(0).lsrc(1) := 0.U 427 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 428 csBundle(0).fuType := FuType.i2v.U 429 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 430 csBundle(0).vecWen := true.B 431 432 for (i <- 0 until MAX_VLMUL / 2) { 433 csBundle(2 * i + 1).srcType(0) := SrcType.vp 434 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 435 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 436 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 437 csBundle(2 * i + 1).ldest := dest + (2 * i).U 438 csBundle(2 * i + 1).uopIdx := (2 * i).U 439 csBundle(2 * i + 2).srcType(0) := SrcType.vp 440 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 441 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 442 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 443 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 444 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 445 } 446 } 447 is(UopSplitType.VEC_WXW) { 448 /* 449 i to vector move 450 */ 451 csBundle(0).srcType(0) := SrcType.reg 452 csBundle(0).srcType(1) := SrcType.imm 453 csBundle(0).lsrc(1) := 0.U 454 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 455 csBundle(0).fuType := FuType.i2v.U 456 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 457 csBundle(0).vecWen := true.B 458 459 for (i <- 0 until MAX_VLMUL / 2) { 460 csBundle(2 * i + 1).srcType(0) := SrcType.vp 461 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 462 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 463 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 464 csBundle(2 * i + 1).ldest := dest + (2 * i).U 465 csBundle(2 * i + 1).uopIdx := (2 * i).U 466 csBundle(2 * i + 2).srcType(0) := SrcType.vp 467 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 468 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 469 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 470 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 471 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 472 } 473 } 474 is(UopSplitType.VEC_WVV) { 475 for (i <- 0 until MAX_VLMUL / 2) { 476 477 csBundle(2 * i).lsrc(0) := src1 + i.U 478 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 479 csBundle(2 * i).lsrc(2) := dest + i.U 480 csBundle(2 * i).ldest := dest + i.U 481 csBundle(2 * i).uopIdx := (2 * i).U 482 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 483 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 484 csBundle(2 * i + 1).lsrc(2) := dest + i.U 485 csBundle(2 * i + 1).ldest := dest + i.U 486 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 487 } 488 } 489 is(UopSplitType.VEC_WFW) { 490 /* 491 f to vector move 492 */ 493 csBundle(0).srcType(0) := SrcType.fp 494 csBundle(0).srcType(1) := SrcType.imm 495 csBundle(0).lsrc(1) := 0.U 496 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 497 csBundle(0).fuType := FuType.f2v.U 498 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 499 csBundle(0).rfWen := false.B 500 csBundle(0).fpWen := false.B 501 csBundle(0).vecWen := true.B 502 503 for (i <- 0 until MAX_VLMUL / 2) { 504 csBundle(2 * i + 1).srcType(0) := SrcType.vp 505 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 506 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 507 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 508 csBundle(2 * i + 1).ldest := dest + (2 * i).U 509 csBundle(2 * i + 1).uopIdx := (2 * i).U 510 csBundle(2 * i + 2).srcType(0) := SrcType.vp 511 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 512 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 513 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 514 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 515 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 516 } 517 } 518 is(UopSplitType.VEC_WXV) { 519 /* 520 i to vector move 521 */ 522 csBundle(0).srcType(0) := SrcType.reg 523 csBundle(0).srcType(1) := SrcType.imm 524 csBundle(0).lsrc(1) := 0.U 525 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 526 csBundle(0).fuType := FuType.i2v.U 527 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 528 csBundle(0).vecWen := true.B 529 530 for (i <- 0 until MAX_VLMUL / 2) { 531 csBundle(2 * i + 1).srcType(0) := SrcType.vp 532 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 533 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 534 csBundle(2 * i + 1).lsrc(2) := dest + i.U 535 csBundle(2 * i + 1).ldest := dest + i.U 536 csBundle(2 * i + 1).uopIdx := (2 * i).U 537 csBundle(2 * i + 2).srcType(0) := SrcType.vp 538 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 539 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 540 csBundle(2 * i + 2).lsrc(2) := dest + i.U 541 csBundle(2 * i + 2).ldest := dest + i.U 542 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 543 } 544 } 545 is(UopSplitType.VEC_VVM) { 546 csBundle(0).lsrc(2) := dest 547 csBundle(0).ldest := dest 548 csBundle(0).uopIdx := 0.U 549 for (i <- 1 until MAX_VLMUL) { 550 csBundle(i).lsrc(0) := src1 + i.U 551 csBundle(i).lsrc(1) := src2 + i.U 552 csBundle(i).lsrc(2) := dest 553 csBundle(i).ldest := dest 554 csBundle(i).uopIdx := i.U 555 } 556 } 557 is(UopSplitType.VEC_VFM) { 558 /* 559 f to vector move 560 */ 561 csBundle(0).srcType(0) := SrcType.fp 562 csBundle(0).srcType(1) := SrcType.imm 563 csBundle(0).lsrc(1) := 0.U 564 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 565 csBundle(0).fuType := FuType.f2v.U 566 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 567 csBundle(0).rfWen := false.B 568 csBundle(0).fpWen := false.B 569 csBundle(0).vecWen := true.B 570 //LMUL 571 csBundle(1).srcType(0) := SrcType.vp 572 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 573 csBundle(1).lsrc(2) := dest 574 csBundle(1).ldest := dest 575 csBundle(1).uopIdx := 0.U 576 for (i <- 1 until MAX_VLMUL) { 577 csBundle(i + 1).srcType(0) := SrcType.vp 578 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 579 csBundle(i + 1).lsrc(1) := src2 + i.U 580 csBundle(i + 1).lsrc(2) := dest 581 csBundle(i + 1).ldest := dest 582 csBundle(i + 1).uopIdx := i.U 583 } 584 csBundle(numOfUop - 1.U).ldest := dest 585 } 586 is(UopSplitType.VEC_VXM) { 587 /* 588 i to vector move 589 */ 590 csBundle(0).srcType(0) := SrcType.reg 591 csBundle(0).srcType(1) := SrcType.imm 592 csBundle(0).lsrc(1) := 0.U 593 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 594 csBundle(0).fuType := FuType.i2v.U 595 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 596 csBundle(0).vecWen := true.B 597 //LMUL 598 csBundle(1).srcType(0) := SrcType.vp 599 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 600 csBundle(1).lsrc(2) := dest 601 csBundle(1).ldest := dest 602 csBundle(1).uopIdx := 0.U 603 for (i <- 1 until MAX_VLMUL) { 604 csBundle(i + 1).srcType(0) := SrcType.vp 605 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 606 csBundle(i + 1).lsrc(1) := src2 + i.U 607 csBundle(i + 1).lsrc(2) := dest 608 csBundle(i + 1).ldest := dest 609 csBundle(i + 1).uopIdx := i.U 610 } 611 csBundle(numOfUop - 1.U).ldest := dest 612 } 613 is(UopSplitType.VEC_SLIDE1UP) { 614 /* 615 i to vector move 616 */ 617 csBundle(0).srcType(0) := SrcType.reg 618 csBundle(0).srcType(1) := SrcType.imm 619 csBundle(0).lsrc(1) := 0.U 620 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 621 csBundle(0).fuType := FuType.i2v.U 622 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 623 csBundle(0).vecWen := true.B 624 //LMUL 625 csBundle(1).srcType(0) := SrcType.vp 626 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 627 csBundle(1).lsrc(2) := dest 628 csBundle(1).ldest := dest 629 csBundle(1).uopIdx := 0.U 630 for (i <- 1 until MAX_VLMUL) { 631 csBundle(i + 1).srcType(0) := SrcType.vp 632 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 633 csBundle(i + 1).lsrc(1) := src2 + i.U 634 csBundle(i + 1).lsrc(2) := dest + i.U 635 csBundle(i + 1).ldest := dest + i.U 636 csBundle(i + 1).uopIdx := i.U 637 } 638 } 639 is(UopSplitType.VEC_FSLIDE1UP) { 640 /* 641 i to vector move 642 */ 643 csBundle(0).srcType(0) := SrcType.fp 644 csBundle(0).srcType(1) := SrcType.imm 645 csBundle(0).lsrc(1) := 0.U 646 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 647 csBundle(0).fuType := FuType.f2v.U 648 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 649 csBundle(0).rfWen := false.B 650 csBundle(0).fpWen := false.B 651 csBundle(0).vecWen := true.B 652 //LMUL 653 csBundle(1).srcType(0) := SrcType.vp 654 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 655 csBundle(1).lsrc(1) := src2 656 csBundle(1).lsrc(2) := dest 657 csBundle(1).ldest := dest 658 csBundle(1).uopIdx := 0.U 659 for (i <- 1 until MAX_VLMUL) { 660 csBundle(i + 1).srcType(0) := SrcType.vp 661 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 662 csBundle(i + 1).lsrc(1) := src2 + i.U 663 csBundle(i + 1).lsrc(2) := dest + i.U 664 csBundle(i + 1).ldest := dest + i.U 665 csBundle(i + 1).uopIdx := i.U 666 } 667 } 668 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 669 /* 670 i to vector move 671 */ 672 csBundle(0).srcType(0) := SrcType.reg 673 csBundle(0).srcType(1) := SrcType.imm 674 csBundle(0).lsrc(1) := 0.U 675 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 676 csBundle(0).fuType := FuType.i2v.U 677 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 678 csBundle(0).vecWen := true.B 679 //LMUL 680 for (i <- 0 until MAX_VLMUL) { 681 csBundle(2 * i + 1).srcType(0) := SrcType.vp 682 csBundle(2 * i + 1).srcType(1) := SrcType.vp 683 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 684 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 685 csBundle(2 * i + 1).lsrc(2) := dest + i.U 686 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 687 csBundle(2 * i + 1).uopIdx := (2 * i).U 688 if (2 * i + 2 < MAX_VLMUL * 2) { 689 csBundle(2 * i + 2).srcType(0) := SrcType.vp 690 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 691 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 692 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 693 csBundle(2 * i + 2).ldest := dest + i.U 694 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 695 } 696 } 697 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 698 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 699 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 700 } 701 is(UopSplitType.VEC_FSLIDE1DOWN) { 702 /* 703 i to vector move 704 */ 705 csBundle(0).srcType(0) := SrcType.fp 706 csBundle(0).srcType(1) := SrcType.imm 707 csBundle(0).lsrc(1) := 0.U 708 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 709 csBundle(0).fuType := FuType.f2v.U 710 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 711 csBundle(0).rfWen := false.B 712 csBundle(0).fpWen := false.B 713 csBundle(0).vecWen := true.B 714 //LMUL 715 for (i <- 0 until MAX_VLMUL) { 716 csBundle(2 * i + 1).srcType(0) := SrcType.vp 717 csBundle(2 * i + 1).srcType(1) := SrcType.vp 718 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 719 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 720 csBundle(2 * i + 1).lsrc(2) := dest + i.U 721 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 722 csBundle(2 * i + 1).uopIdx := (2 * i).U 723 if (2 * i + 2 < MAX_VLMUL * 2) { 724 csBundle(2 * i + 2).srcType(0) := SrcType.vp 725 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 726 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 727 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 728 csBundle(2 * i + 2).ldest := dest + i.U 729 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 730 } 731 } 732 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 733 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 734 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 735 } 736 is(UopSplitType.VEC_VRED) { 737 when(vlmulReg === "b001".U) { 738 csBundle(0).srcType(2) := SrcType.DC 739 csBundle(0).lsrc(0) := src2 + 1.U 740 csBundle(0).lsrc(1) := src2 741 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 742 csBundle(0).uopIdx := 0.U 743 } 744 when(vlmulReg === "b010".U) { 745 csBundle(0).srcType(2) := SrcType.DC 746 csBundle(0).lsrc(0) := src2 + 1.U 747 csBundle(0).lsrc(1) := src2 748 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 749 csBundle(0).uopIdx := 0.U 750 751 csBundle(1).srcType(2) := SrcType.DC 752 csBundle(1).lsrc(0) := src2 + 3.U 753 csBundle(1).lsrc(1) := src2 + 2.U 754 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 755 csBundle(1).uopIdx := 1.U 756 757 csBundle(2).srcType(2) := SrcType.DC 758 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 759 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 760 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 761 csBundle(2).uopIdx := 2.U 762 } 763 when(vlmulReg === "b011".U) { 764 for (i <- 0 until MAX_VLMUL) { 765 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 766 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 767 csBundle(i).lsrc(1) := src2 + (i * 2).U 768 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 769 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 770 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 771 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 772 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 773 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 774 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 775 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 776 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 777 } 778 csBundle(i).srcType(2) := SrcType.DC 779 csBundle(i).uopIdx := i.U 780 } 781 } 782 when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) { 783 /* 784 * 2 <= vlmul <= 8 785 */ 786 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 787 csBundle(numOfUop - 1.U).lsrc(0) := src1 788 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 789 csBundle(numOfUop - 1.U).lsrc(2) := dest 790 csBundle(numOfUop - 1.U).ldest := dest 791 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 792 } 793 } 794 is(UopSplitType.VEC_VFRED) { 795 val vlmul = vlmulReg 796 val vsew = vsewReg 797 when(vlmul === VLmul.m8){ 798 for (i <- 0 until 4) { 799 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 800 csBundle(i).lsrc(1) := src2 + (i * 2).U 801 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 802 csBundle(i).uopIdx := i.U 803 } 804 for (i <- 4 until 6) { 805 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 806 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 807 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 808 csBundle(i).uopIdx := i.U 809 } 810 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 811 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 812 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 813 csBundle(6).uopIdx := 6.U 814 when(vsew === VSew.e64) { 815 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 816 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 817 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 818 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 819 csBundle(7).uopIdx := 7.U 820 csBundle(8).lsrc(0) := src1 821 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 822 csBundle(8).ldest := dest 823 csBundle(8).uopIdx := 8.U 824 } 825 when(vsew === VSew.e32) { 826 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 827 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 828 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 829 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 830 csBundle(7).uopIdx := 7.U 831 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 832 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 833 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 834 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 835 csBundle(8).uopIdx := 8.U 836 csBundle(9).lsrc(0) := src1 837 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 838 csBundle(9).ldest := dest 839 csBundle(9).uopIdx := 9.U 840 } 841 when(vsew === VSew.e16) { 842 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 843 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 844 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 845 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 846 csBundle(7).uopIdx := 7.U 847 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 848 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 849 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 850 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 851 csBundle(8).uopIdx := 8.U 852 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 853 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 854 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 855 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 856 csBundle(9).uopIdx := 9.U 857 csBundle(10).lsrc(0) := src1 858 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 859 csBundle(10).ldest := dest 860 csBundle(10).uopIdx := 10.U 861 } 862 } 863 when(vlmul === VLmul.m4) { 864 for (i <- 0 until 2) { 865 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 866 csBundle(i).lsrc(1) := src2 + (i * 2).U 867 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 868 csBundle(i).uopIdx := i.U 869 } 870 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 871 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 872 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 873 csBundle(2).uopIdx := 2.U 874 when(vsew === VSew.e64) { 875 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 876 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 877 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 878 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 879 csBundle(3).uopIdx := 3.U 880 csBundle(4).lsrc(0) := src1 881 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 882 csBundle(4).ldest := dest 883 csBundle(4).uopIdx := 4.U 884 } 885 when(vsew === VSew.e32) { 886 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 887 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 888 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 889 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 890 csBundle(3).uopIdx := 3.U 891 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 892 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 893 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 894 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 895 csBundle(4).uopIdx := 4.U 896 csBundle(5).lsrc(0) := src1 897 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 898 csBundle(5).ldest := dest 899 csBundle(5).uopIdx := 5.U 900 } 901 when(vsew === VSew.e16) { 902 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 903 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 904 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 905 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 906 csBundle(3).uopIdx := 3.U 907 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 908 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 909 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 910 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 911 csBundle(4).uopIdx := 4.U 912 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 913 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 914 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 915 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 916 csBundle(5).uopIdx := 5.U 917 csBundle(6).lsrc(0) := src1 918 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 919 csBundle(6).ldest := dest 920 csBundle(6).uopIdx := 6.U 921 } 922 } 923 when(vlmul === VLmul.m2) { 924 csBundle(0).lsrc(0) := src2 + 1.U 925 csBundle(0).lsrc(1) := src2 + 0.U 926 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 927 csBundle(0).uopIdx := 0.U 928 when(vsew === VSew.e64) { 929 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 930 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 931 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 932 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 933 csBundle(1).uopIdx := 1.U 934 csBundle(2).lsrc(0) := src1 935 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 936 csBundle(2).ldest := dest 937 csBundle(2).uopIdx := 2.U 938 } 939 when(vsew === VSew.e32) { 940 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 941 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 942 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 943 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 944 csBundle(1).uopIdx := 1.U 945 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 946 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 947 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 948 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 949 csBundle(2).uopIdx := 2.U 950 csBundle(3).lsrc(0) := src1 951 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 952 csBundle(3).ldest := dest 953 csBundle(3).uopIdx := 3.U 954 } 955 when(vsew === VSew.e16) { 956 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 957 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 958 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 959 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 960 csBundle(1).uopIdx := 1.U 961 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 962 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 963 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 964 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 965 csBundle(2).uopIdx := 2.U 966 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 967 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 968 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 969 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 970 csBundle(3).uopIdx := 3.U 971 csBundle(4).lsrc(0) := src1 972 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 973 csBundle(4).ldest := dest 974 csBundle(4).uopIdx := 4.U 975 } 976 } 977 when(vlmul === VLmul.m1) { 978 when(vsew === VSew.e64) { 979 csBundle(0).lsrc(0) := src2 980 csBundle(0).lsrc(1) := src2 981 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 982 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 983 csBundle(0).uopIdx := 0.U 984 csBundle(1).lsrc(0) := src1 985 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 986 csBundle(1).ldest := dest 987 csBundle(1).uopIdx := 1.U 988 } 989 when(vsew === VSew.e32) { 990 csBundle(0).lsrc(0) := src2 991 csBundle(0).lsrc(1) := src2 992 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 993 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 994 csBundle(0).uopIdx := 0.U 995 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 996 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 997 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 998 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 999 csBundle(1).uopIdx := 1.U 1000 csBundle(2).lsrc(0) := src1 1001 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1002 csBundle(2).ldest := dest 1003 csBundle(2).uopIdx := 2.U 1004 } 1005 when(vsew === VSew.e16) { 1006 csBundle(0).lsrc(0) := src2 1007 csBundle(0).lsrc(1) := src2 1008 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1009 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1010 csBundle(0).uopIdx := 0.U 1011 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1012 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1013 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1014 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1015 csBundle(1).uopIdx := 1.U 1016 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1017 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1018 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1019 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 1020 csBundle(2).uopIdx := 2.U 1021 csBundle(3).lsrc(0) := src1 1022 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1023 csBundle(3).ldest := dest 1024 csBundle(3).uopIdx := 3.U 1025 } 1026 } 1027 when(vlmul === VLmul.mf2) { 1028 when(vsew === VSew.e32) { 1029 csBundle(0).lsrc(0) := src2 1030 csBundle(0).lsrc(1) := src2 1031 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1032 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1033 csBundle(0).uopIdx := 0.U 1034 csBundle(1).lsrc(0) := src1 1035 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1036 csBundle(1).ldest := dest 1037 csBundle(1).uopIdx := 1.U 1038 } 1039 when(vsew === VSew.e16) { 1040 csBundle(0).lsrc(0) := src2 1041 csBundle(0).lsrc(1) := src2 1042 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1043 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1044 csBundle(0).uopIdx := 0.U 1045 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1046 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1047 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1048 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 1049 csBundle(1).uopIdx := 1.U 1050 csBundle(2).lsrc(0) := src1 1051 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1052 csBundle(2).ldest := dest 1053 csBundle(2).uopIdx := 2.U 1054 } 1055 } 1056 when(vlmul === VLmul.mf4) { 1057 when(vsew === VSew.e16) { 1058 csBundle(0).lsrc(0) := src2 1059 csBundle(0).lsrc(1) := src2 1060 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1061 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 1062 csBundle(0).uopIdx := 0.U 1063 csBundle(1).lsrc(0) := src1 1064 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1065 csBundle(1).ldest := dest 1066 csBundle(1).uopIdx := 1.U 1067 } 1068 } 1069 } 1070 1071 is(UopSplitType.VEC_VFREDOSUM) { 1072 import yunsuan.VfaluType 1073 val vlmul = vlmulReg 1074 val vsew = vsewReg 1075 val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum 1076 when(vlmul === VLmul.m8) { 1077 when(vsew === VSew.e64) { 1078 val vlmax = 16 1079 for (i <- 0 until vlmax) { 1080 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1081 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1082 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1083 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1084 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1085 csBundle(i).uopIdx := i.U 1086 } 1087 } 1088 when(vsew === VSew.e32) { 1089 val vlmax = 32 1090 for (i <- 0 until vlmax) { 1091 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1092 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1093 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1094 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1095 csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B) 1096 csBundle(i).uopIdx := i.U 1097 } 1098 } 1099 when(vsew === VSew.e16) { 1100 val vlmax = 64 1101 for (i <- 0 until vlmax) { 1102 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1103 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1104 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1105 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1106 csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B) 1107 csBundle(i).uopIdx := i.U 1108 } 1109 } 1110 } 1111 when(vlmul === VLmul.m4) { 1112 when(vsew === VSew.e64) { 1113 val vlmax = 8 1114 for (i <- 0 until vlmax) { 1115 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1116 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1117 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1118 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1119 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1120 csBundle(i).uopIdx := i.U 1121 } 1122 } 1123 when(vsew === VSew.e32) { 1124 val vlmax = 16 1125 for (i <- 0 until vlmax) { 1126 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1127 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1128 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1129 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1130 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1131 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1132 csBundle(i).uopIdx := i.U 1133 } 1134 } 1135 when(vsew === VSew.e16) { 1136 val vlmax = 32 1137 for (i <- 0 until vlmax) { 1138 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1139 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1140 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1141 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1142 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1143 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1144 csBundle(i).uopIdx := i.U 1145 } 1146 } 1147 } 1148 when(vlmul === VLmul.m2) { 1149 when(vsew === VSew.e64) { 1150 val vlmax = 4 1151 for (i <- 0 until vlmax) { 1152 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1153 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1154 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1155 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1156 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1157 csBundle(i).uopIdx := i.U 1158 } 1159 } 1160 when(vsew === VSew.e32) { 1161 val vlmax = 8 1162 for (i <- 0 until vlmax) { 1163 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1164 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1165 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1166 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1167 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1168 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1169 csBundle(i).uopIdx := i.U 1170 } 1171 } 1172 when(vsew === VSew.e16) { 1173 val vlmax = 16 1174 for (i <- 0 until vlmax) { 1175 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1176 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1177 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1178 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1179 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1180 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1181 csBundle(i).uopIdx := i.U 1182 } 1183 } 1184 } 1185 when(vlmul === VLmul.m1) { 1186 when(vsew === VSew.e64) { 1187 val vlmax = 2 1188 for (i <- 0 until vlmax) { 1189 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1190 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1191 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1192 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1193 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1194 csBundle(i).uopIdx := i.U 1195 } 1196 } 1197 when(vsew === VSew.e32) { 1198 val vlmax = 4 1199 for (i <- 0 until vlmax) { 1200 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1201 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1202 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1203 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1204 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1205 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1206 csBundle(i).uopIdx := i.U 1207 } 1208 } 1209 when(vsew === VSew.e16) { 1210 val vlmax = 8 1211 for (i <- 0 until vlmax) { 1212 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1213 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1214 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1215 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1216 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1217 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1218 csBundle(i).uopIdx := i.U 1219 } 1220 } 1221 } 1222 when(vlmul === VLmul.mf2) { 1223 when(vsew === VSew.e32) { 1224 val vlmax = 2 1225 for (i <- 0 until vlmax) { 1226 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1227 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1228 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1229 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1230 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1231 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1232 csBundle(i).uopIdx := i.U 1233 } 1234 } 1235 when(vsew === VSew.e16) { 1236 val vlmax = 4 1237 for (i <- 0 until vlmax) { 1238 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1239 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1240 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1241 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1242 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1243 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1244 csBundle(i).uopIdx := i.U 1245 } 1246 } 1247 } 1248 when(vlmul === VLmul.mf4) { 1249 when(vsew === VSew.e16) { 1250 val vlmax = 2 1251 for (i <- 0 until vlmax) { 1252 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1253 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1254 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1255 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1256 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1257 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1258 csBundle(i).uopIdx := i.U 1259 } 1260 } 1261 } 1262 } 1263 1264 is(UopSplitType.VEC_SLIDEUP) { 1265 // i to vector move 1266 csBundle(0).srcType(0) := SrcType.reg 1267 csBundle(0).srcType(1) := SrcType.imm 1268 csBundle(0).lsrc(1) := 0.U 1269 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1270 csBundle(0).fuType := FuType.i2v.U 1271 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1272 csBundle(0).vecWen := true.B 1273 // LMUL 1274 for (i <- 0 until MAX_VLMUL) 1275 for (j <- 0 to i) { 1276 val old_vd = if (j == 0) { 1277 dest + i.U 1278 } else (VECTOR_TMP_REG_LMUL + j).U 1279 val vd = if (j == i) { 1280 dest + i.U 1281 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1282 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1283 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1284 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1285 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1286 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1287 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1288 } 1289 } 1290 1291 is(UopSplitType.VEC_SLIDEDOWN) { 1292 // i to vector move 1293 csBundle(0).srcType(0) := SrcType.reg 1294 csBundle(0).srcType(1) := SrcType.imm 1295 csBundle(0).lsrc(1) := 0.U 1296 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1297 csBundle(0).fuType := FuType.i2v.U 1298 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1299 csBundle(0).vecWen := true.B 1300 // LMUL 1301 for (i <- 0 until MAX_VLMUL) 1302 for (j <- (0 to i).reverse) { 1303 when(i.U < lmul) { 1304 val old_vd = if (j == 0) { 1305 dest + lmul - 1.U - i.U 1306 } else (VECTOR_TMP_REG_LMUL + j).U 1307 val vd = if (j == i) { 1308 dest + lmul - 1.U - i.U 1309 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1310 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1311 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1312 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1313 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1314 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1315 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1316 } 1317 } 1318 } 1319 1320 is(UopSplitType.VEC_M0X) { 1321 // LMUL 1322 for (i <- 0 until MAX_VLMUL) { 1323 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1324 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1325 csBundle(i).srcType(0) := srcType0 1326 csBundle(i).srcType(1) := SrcType.vp 1327 csBundle(i).rfWen := false.B 1328 csBundle(i).fpWen := false.B 1329 csBundle(i).vecWen := true.B 1330 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1331 csBundle(i).lsrc(1) := src2 1332 // csBundle(i).lsrc(2) := dest + i.U DontCare 1333 csBundle(i).ldest := ldest 1334 csBundle(i).uopIdx := i.U 1335 } 1336 csBundle(lmul - 1.U).rfWen := true.B 1337 csBundle(lmul - 1.U).fpWen := false.B 1338 csBundle(lmul - 1.U).vecWen := false.B 1339 csBundle(lmul - 1.U).ldest := dest 1340 } 1341 1342 is(UopSplitType.VEC_MVV) { 1343 // LMUL 1344 for (i <- 0 until MAX_VLMUL) { 1345 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1346 csBundle(i * 2 + 0).srcType(0) := srcType0 1347 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1348 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1349 csBundle(i * 2 + 0).lsrc(1) := src2 1350 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1351 csBundle(i * 2 + 0).ldest := dest + i.U 1352 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1353 1354 csBundle(i * 2 + 1).srcType(0) := srcType0 1355 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1356 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1357 csBundle(i * 2 + 1).lsrc(1) := src2 1358 // csBundle(i).lsrc(2) := dest + i.U DontCare 1359 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1360 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1361 } 1362 } 1363 1364 is(UopSplitType.VEC_M0X_VFIRST) { 1365 // LMUL 1366 csBundle(0).rfWen := true.B 1367 csBundle(0).fpWen := false.B 1368 csBundle(0).vecWen := false.B 1369 csBundle(0).ldest := dest 1370 } 1371 is(UopSplitType.VEC_VWW) { 1372 for (i <- 0 until MAX_VLMUL*2) { 1373 when(i.U < lmul){ 1374 csBundle(i).srcType(2) := SrcType.DC 1375 csBundle(i).lsrc(0) := src2 + i.U 1376 csBundle(i).lsrc(1) := src2 + i.U 1377 // csBundle(i).lsrc(2) := dest + (2 * i).U 1378 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1379 csBundle(i).uopIdx := i.U 1380 } otherwise { 1381 csBundle(i).srcType(2) := SrcType.DC 1382 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1383 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1384 // csBundle(i).lsrc(2) := dest + (2 * i).U 1385 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1386 csBundle(i).uopIdx := i.U 1387 } 1388 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1389 csBundle(numOfUop-1.U).lsrc(0) := src1 1390 csBundle(numOfUop-1.U).lsrc(2) := dest 1391 csBundle(numOfUop-1.U).ldest := dest 1392 } 1393 } 1394 is(UopSplitType.VEC_RGATHER) { 1395 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1396 for (i <- 0 until len) 1397 for (j <- 0 until len) { 1398 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1399 // csBundle(i * len + j).srcType(1) := SrcType.vp 1400 // csBundle(i * len + j).srcType(2) := SrcType.vp 1401 csBundle(i * len + j).lsrc(0) := src1 + i.U 1402 csBundle(i * len + j).lsrc(1) := src2 + j.U 1403 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1404 csBundle(i * len + j).lsrc(2) := vd_old 1405 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1406 csBundle(i * len + j).ldest := vd 1407 csBundle(i * len + j).uopIdx := (i * len + j).U 1408 } 1409 } 1410 switch(vlmulReg) { 1411 is("b001".U ){ 1412 genCsBundle_VEC_RGATHER(2) 1413 } 1414 is("b010".U ){ 1415 genCsBundle_VEC_RGATHER(4) 1416 } 1417 is("b011".U ){ 1418 genCsBundle_VEC_RGATHER(8) 1419 } 1420 } 1421 } 1422 is(UopSplitType.VEC_RGATHER_VX) { 1423 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1424 for (i <- 0 until len) 1425 for (j <- 0 until len) { 1426 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1427 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1428 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1429 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1430 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1431 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1432 csBundle(i * len + j + 1).lsrc(2) := vd_old 1433 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1434 csBundle(i * len + j + 1).ldest := vd 1435 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1436 } 1437 } 1438 // i to vector move 1439 csBundle(0).srcType(0) := SrcType.reg 1440 csBundle(0).srcType(1) := SrcType.imm 1441 csBundle(0).lsrc(1) := 0.U 1442 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1443 csBundle(0).fuType := FuType.i2v.U 1444 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1445 csBundle(0).rfWen := false.B 1446 csBundle(0).fpWen := false.B 1447 csBundle(0).vecWen := true.B 1448 genCsBundle_RGATHER_VX(1) 1449 switch(vlmulReg) { 1450 is("b001".U ){ 1451 genCsBundle_RGATHER_VX(2) 1452 } 1453 is("b010".U ){ 1454 genCsBundle_RGATHER_VX(4) 1455 } 1456 is("b011".U ){ 1457 genCsBundle_RGATHER_VX(8) 1458 } 1459 } 1460 } 1461 is(UopSplitType.VEC_RGATHEREI16) { 1462 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1463 for (i <- 0 until len) 1464 for (j <- 0 until len) { 1465 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1466 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1467 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1468 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1469 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1470 csBundle((i * len + j)*2+0).ldest := vd0 1471 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1472 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1473 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1474 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1475 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1476 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1477 csBundle((i * len + j)*2+1).ldest := vd1 1478 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1479 } 1480 } 1481 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1482 for (i <- 0 until len) 1483 for (j <- 0 until len) { 1484 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1485 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1486 csBundle(i * len + j).lsrc(0) := src1 + i.U 1487 csBundle(i * len + j).lsrc(1) := src2 + j.U 1488 csBundle(i * len + j).lsrc(2) := vd_old 1489 csBundle(i * len + j).ldest := vd 1490 csBundle(i * len + j).uopIdx := (i * len + j).U 1491 } 1492 } 1493 def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={ 1494 for (i <- 0 until len) 1495 for (j <- 0 until len) { 1496 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1497 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1498 csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U 1499 csBundle(i * len + j).lsrc(1) := src2 + j.U 1500 csBundle(i * len + j).lsrc(2) := vd_old 1501 csBundle(i * len + j).ldest := vd 1502 csBundle(i * len + j).uopIdx := (i * len + j).U 1503 } 1504 } 1505 def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={ 1506 for (i <- 0 until len) 1507 for (j <- 0 until len) { 1508 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1509 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1510 csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U 1511 csBundle(i * len + j).lsrc(1) := src2 + j.U 1512 csBundle(i * len + j).lsrc(2) := vd_old 1513 csBundle(i * len + j).ldest := vd 1514 csBundle(i * len + j).uopIdx := (i * len + j).U 1515 } 1516 } 1517 when(!vsewReg.orR){ 1518 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1519 }.elsewhen(vsewReg === VSew.e32){ 1520 genCsBundle_VEC_RGATHEREI16_SEW32(1) 1521 }.elsewhen(vsewReg === VSew.e64){ 1522 genCsBundle_VEC_RGATHEREI16_SEW64(1) 1523 }.otherwise{ 1524 genCsBundle_VEC_RGATHEREI16(1) 1525 } 1526 switch(vlmulReg) { 1527 is("b001".U) { 1528 when(!vsewReg.orR) { 1529 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1530 }.elsewhen(vsewReg === VSew.e32){ 1531 genCsBundle_VEC_RGATHEREI16_SEW32(2) 1532 }.elsewhen(vsewReg === VSew.e64){ 1533 genCsBundle_VEC_RGATHEREI16_SEW64(2) 1534 }.otherwise{ 1535 genCsBundle_VEC_RGATHEREI16(2) 1536 } 1537 } 1538 is("b010".U) { 1539 when(!vsewReg.orR) { 1540 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1541 }.elsewhen(vsewReg === VSew.e32){ 1542 genCsBundle_VEC_RGATHEREI16_SEW32(4) 1543 }.elsewhen(vsewReg === VSew.e64){ 1544 genCsBundle_VEC_RGATHEREI16_SEW64(4) 1545 }.otherwise{ 1546 genCsBundle_VEC_RGATHEREI16(4) 1547 } 1548 } 1549 is("b011".U) { 1550 when(vsewReg === VSew.e32){ 1551 genCsBundle_VEC_RGATHEREI16_SEW32(8) 1552 }.elsewhen(vsewReg === VSew.e64){ 1553 genCsBundle_VEC_RGATHEREI16_SEW64(8) 1554 }.otherwise{ 1555 genCsBundle_VEC_RGATHEREI16(8) 1556 } 1557 } 1558 } 1559 } 1560 is(UopSplitType.VEC_COMPRESS) { 1561 def genCsBundle_VEC_COMPRESS(len:Int): Unit = { 1562 for (i <- 0 until len) { 1563 val jlen = if (i == len-1) i+1 else i+2 1564 for (j <- 0 until jlen) { 1565 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1566 val vd = if(i==len-1) (dest + j.U) else { 1567 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1568 } 1569 val src13Type = if (j == i+1) DontCare else SrcType.vp 1570 csBundle(i*(i+3)/2 + j).srcType(0) := src13Type 1571 csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp 1572 csBundle(i*(i+3)/2 + j).srcType(2) := src13Type 1573 csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp 1574 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1575 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1576 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1577 csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U 1578 csBundle(i*(i+3)/2 + j).ldest := vd 1579 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1580 } 1581 } 1582 } 1583 switch(vlmulReg) { 1584 is("b001".U ){ 1585 genCsBundle_VEC_COMPRESS(2) 1586 } 1587 is("b010".U ){ 1588 genCsBundle_VEC_COMPRESS(4) 1589 } 1590 is("b011".U ){ 1591 genCsBundle_VEC_COMPRESS(8) 1592 } 1593 } 1594 } 1595 is(UopSplitType.VEC_MVNR) { 1596 for (i <- 0 until MAX_VLMUL) { 1597 csBundle(i).lsrc(0) := src1 + i.U 1598 csBundle(i).lsrc(1) := src2 + i.U 1599 csBundle(i).lsrc(2) := dest + i.U 1600 csBundle(i).ldest := dest + i.U 1601 csBundle(i).uopIdx := i.U 1602 } 1603 } 1604 is(UopSplitType.VEC_US_LDST) { 1605 /* 1606 FMV.D.X 1607 */ 1608 csBundle(0).srcType(0) := SrcType.reg 1609 csBundle(0).srcType(1) := SrcType.imm 1610 csBundle(0).lsrc(1) := 0.U 1611 csBundle(0).ldest := FP_TMP_REG_MV.U 1612 csBundle(0).fuType := FuType.i2v.U 1613 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1614 csBundle(0).rfWen := false.B 1615 csBundle(0).fpWen := true.B 1616 csBundle(0).vecWen := false.B 1617 csBundle(0).vlsInstr := true.B 1618 //LMUL 1619 for (i <- 0 until MAX_VLMUL) { 1620 csBundle(i + 1).srcType(0) := SrcType.fp 1621 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1622 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1623 csBundle(i + 1).ldest := dest + i.U 1624 csBundle(i + 1).uopIdx := i.U 1625 csBundle(i + 1).vlsInstr := true.B 1626 } 1627 } 1628 is(UopSplitType.VEC_S_LDST) { 1629 /* 1630 FMV.D.X 1631 */ 1632 csBundle(0).srcType(0) := SrcType.reg 1633 csBundle(0).srcType(1) := SrcType.imm 1634 csBundle(0).lsrc(1) := 0.U 1635 csBundle(0).ldest := FP_TMP_REG_MV.U 1636 csBundle(0).fuType := FuType.i2v.U 1637 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1638 csBundle(0).rfWen := false.B 1639 csBundle(0).fpWen := true.B 1640 csBundle(0).vecWen := false.B 1641 csBundle(0).vlsInstr := true.B 1642 1643 csBundle(1).srcType(0) := SrcType.reg 1644 csBundle(1).srcType(1) := SrcType.imm 1645 csBundle(1).lsrc(0) := latchedInst.lsrc(1) 1646 csBundle(1).lsrc(1) := 0.U 1647 csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U 1648 csBundle(1).fuType := FuType.i2v.U 1649 csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1650 csBundle(1).rfWen := false.B 1651 csBundle(1).fpWen := true.B 1652 csBundle(1).vecWen := false.B 1653 csBundle(1).vlsInstr := true.B 1654 1655 //LMUL 1656 for (i <- 0 until MAX_VLMUL) { 1657 csBundle(i + 2).srcType(0) := SrcType.fp 1658 csBundle(i + 2).srcType(1) := SrcType.fp 1659 csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U 1660 csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 1661 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1662 csBundle(i + 2).ldest := dest + i.U 1663 csBundle(i + 2).uopIdx := i.U 1664 csBundle(i + 2).vlsInstr := true.B 1665 } 1666 } 1667 is(UopSplitType.VEC_I_LDST) { 1668 /* 1669 FMV.D.X 1670 */ 1671 val vlmul = vlmulReg 1672 val vsew = Cat(0.U(1.W), vsewReg) 1673 val veew = Cat(0.U(1.W), width) 1674 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1675 val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array( 1676 "b001".U -> 1.U, 1677 "b010".U -> 2.U, 1678 "b011".U -> 3.U 1679 )) 1680 val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array( 1681 "b001".U -> 1.U, 1682 "b010".U -> 2.U, 1683 "b011".U -> 3.U 1684 )) 1685 csBundle(0).srcType(0) := SrcType.reg 1686 csBundle(0).srcType(1) := SrcType.imm 1687 csBundle(0).lsrc(1) := 0.U 1688 csBundle(0).ldest := FP_TMP_REG_MV.U 1689 csBundle(0).fuType := FuType.i2v.U 1690 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1691 csBundle(0).rfWen := false.B 1692 csBundle(0).fpWen := true.B 1693 csBundle(0).vecWen := false.B 1694 csBundle(0).vlsInstr := true.B 1695 1696 //LMUL 1697 for (i <- 0 until MAX_INDEXED_LS_UOPNUM) { 1698 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf) 1699 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1700 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1701 val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd 1702 csBundle(i + 1).srcType(0) := SrcType.fp 1703 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1704 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1705 /** 1706 * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and 1707 * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same 1708 * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be 1709 * deadlock for indexed instructions with emul > lmul. 1710 * 1711 * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest 1712 * N-1 uops will read temporary vector register. 1713 */ 1714 // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1715 csBundle(i + 1).lsrc(2) := Mux( 1716 isFirstUopInVd, 1717 Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)), 1718 VECTOR_TMP_REG_LMUL.U 1719 ) 1720 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1721 csBundle(i + 1).uopIdx := i.U 1722 csBundle(i + 1).vlsInstr := true.B 1723 } 1724 } 1725 } 1726 1727 //readyFromRename Counter 1728 val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U) 1729 1730 // The left uops of the complex inst in ComplexDecoder can be send out this cycle 1731 val thisAllOut = uopRes <= readyCounter 1732 1733 switch(state) { 1734 is(s_idle) { 1735 when (inValid) { 1736 stateNext := s_active 1737 uopResNext := inUopInfo.numOfUop 1738 } 1739 } 1740 is(s_active) { 1741 when (thisAllOut) { 1742 when (inValid) { 1743 stateNext := s_active 1744 uopResNext := inUopInfo.numOfUop 1745 }.otherwise { 1746 stateNext := s_idle 1747 uopResNext := 0.U 1748 } 1749 }.otherwise { 1750 stateNext := s_active 1751 uopResNext := uopRes - readyCounter 1752 } 1753 } 1754 } 1755 1756 state := Mux(io.redirect, s_idle, stateNext) 1757 uopRes := Mux(io.redirect, 0.U, uopResNext) 1758 1759 val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes) 1760 1761 for(i <- 0 until RenameWidth) { 1762 outValids(i) := complexNum > i.U 1763 outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1)) 1764 } 1765 1766 outComplexNum := Mux(state === s_active, complexNum, 0.U) 1767 inReady := state === s_idle || state === s_active && thisAllOut 1768 1769// val validSimple = Wire(Vec(DecodeWidth, Bool())) 1770// validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1771// val notInf = Wire(Vec(DecodeWidth, Bool())) 1772// notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1773// notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1774// val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1775// notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1776// 1777// complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1778// Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1779// 0.U) 1780// validToRename.zipWithIndex.foreach{ 1781// case(dst, i) => 1782// val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1783// dst := MuxCase(false.B, Seq( 1784// (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1785// (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1786// ).toSeq) 1787// } 1788// 1789// readyToIBuf.zipWithIndex.foreach { 1790// case (dst, i) => 1791// val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1792// dst := MuxCase(true.B, Seq( 1793// (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1794// (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1795// ).toSeq) 1796// } 1797// 1798// io.deq.decodedInsts := decodedInsts 1799// io.deq.complexNum := complexNum 1800// io.deq.validToRename := validToRename 1801// io.deq.readyToIBuf := readyToIBuf 1802} 1803