1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul} 34import yunsuan.VpermType 35import scala.collection.Seq 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(4.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={ 43 // only consider non segment indexed load/store 44 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 45 var offset = 1 << (emul - lmul) 46 for (i <- 0 until (1 << emul)) { 47 if (uopIdx == i) { 48 return (i, i / offset) 49 } 50 } 51 } else { // lmul > emul, uop num is depend on lmul * nf 52 var offset = 1 << (lmul - emul) 53 for (i <- 0 until (1 << lmul)) { 54 if (uopIdx == i) { 55 return (i / offset, i) 56 } 57 } 58 } 59 return (0, 0) 60 } 61 // strided load/store 62 var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq() 63 for (emul <- 0 until 4) { 64 for (lmul <- 0 until 4) { 65 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx) 66 var offsetVs2 = offset._1 67 var offsetVd = offset._2 68 combVemulNf :+= (emul, lmul, offsetVs2, offsetVd) 69 } 70 } 71 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 72 case (emul, lmul, offsetVs2, offsetVd) => 73 (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W))) 74 }, BitPat.N(6))) 75 outOffsetVs2 := out(5, 3) 76 outOffsetVd := out(2, 0) 77} 78 79trait VectorConstants { 80 val MAX_VLMUL = 8 81 val VECTOR_TMP_REG_LMUL = 33 // 33~47 -> 15 82 val VECTOR_COMPRESS = 1 // in v0 regfile 83 val MAX_INDEXED_LS_UOPNUM = 64 84} 85 86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 87 val redirect = Input(Bool()) 88 val csrCtrl = Input(new CustomCSRCtrlIO) 89 val vtypeBypass = Input(new VType) 90 // When the first inst in decode vector is complex inst, pass it in 91 val in = Flipped(DecoupledIO(new Bundle { 92 val simpleDecodedInst = new DecodedInst 93 val uopInfo = new UopInfo 94 })) 95 val out = new Bundle { 96 val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst)) 97 } 98 val complexNum = Output(UInt(3.W)) 99} 100 101/** 102 * @author zly 103 */ 104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 105 val io = IO(new DecodeUnitCompIO) 106 107 // alias 108 private val inReady = io.in.ready 109 private val inValid = io.in.valid 110 private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst) 111 private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields) 112 private val inUopInfo = io.in.bits.uopInfo 113 private val outValids = io.out.complexDecodedInsts.map(_.valid) 114 private val outReadys = io.out.complexDecodedInsts.map(_.ready) 115 private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits) 116 private val outComplexNum = io.complexNum 117 118 val maxUopSize = MaxUopSize 119 when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) { 120 when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) { 121 inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType) 122 }.elsewhen(inInstFields.RS1 === 0.U) { 123 inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType) 124 } 125 } 126 127 val latchedInst = RegEnable(inDecodedInst, inValid && inReady) 128 val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady) 129 //input bits 130 private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields) 131 132 val src1 = Cat(0.U(1.W), instFields.RS1) 133 val src2 = Cat(0.U(1.W), instFields.RS2) 134 val dest = Cat(0.U(1.W), instFields.RD) 135 136 val nf = instFields.NF 137 val width = instFields.WIDTH(1, 0) 138 139 //output of DecodeUnit 140 val numOfUop = Wire(UInt(log2Up(maxUopSize).W)) 141 val numOfWB = Wire(UInt(log2Up(maxUopSize).W)) 142 val lmul = Wire(UInt(4.W)) 143 val isVsetSimple = Wire(Bool()) 144 145 val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i))) 146 indexedLSRegOffset.map(_.src := 0.U) 147 148 //pre decode 149 lmul := latchedUopInfo.lmul 150 isVsetSimple := latchedInst.isVset 151 val vlmulReg = latchedInst.vpu.vlmul 152 val vsewReg = latchedInst.vpu.vsew 153 154 //Type of uop Div 155 val typeOfSplit = latchedInst.uopSplitType 156 val src1Type = latchedInst.srcType(0) 157 val src1IsImm = src1Type === SrcType.imm 158 val src1IsFp = src1Type === SrcType.fp 159 160 val isVstore = FuType.isVStore(latchedInst.fuType) 161 162 numOfUop := latchedUopInfo.numOfUop 163 numOfWB := latchedUopInfo.numOfWB 164 165 //uops dispatch 166 val s_idle :: s_active :: Nil = Enum(2) 167 val state = RegInit(s_idle) 168 val stateNext = WireDefault(state) 169 val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W)) 170 val uopRes = RegInit(0.U(log2Up(maxUopSize).W)) 171 val uopResNext = WireInit(uopRes) 172 val e64 = 3.U(2.W) 173 val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U) 174 val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U 175 val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U 176 177 //uop div up to maxUopSize 178 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 179 csBundle.foreach { case dst => 180 dst := latchedInst 181 dst.numUops := latchedUopInfo.numOfUop 182 dst.numWB := latchedUopInfo.numOfWB 183 dst.firstUop := false.B 184 dst.lastUop := false.B 185 dst.vlsInstr := false.B 186 } 187 188 csBundle(0).firstUop := true.B 189 csBundle(numOfUop - 1.U).lastUop := true.B 190 191 switch(typeOfSplit) { 192 is(UopSplitType.VSET) { 193 // In simple decoder, rfWen and vecWen are not set 194 when(isVsetSimple) { 195 // Default 196 // uop0 set rd, never flushPipe 197 csBundle(0).fuType := FuType.vsetiwi.U 198 csBundle(0).flushPipe := false.B 199 csBundle(0).rfWen := true.B 200 // uop1 set vl, vsetvl will flushPipe 201 csBundle(1).ldest := Vl_IDX.U 202 csBundle(1).vecWen := false.B 203 csBundle(1).vlWen := true.B 204 when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 205 // write nothing, uop0 is a nop instruction 206 csBundle(0).rfWen := false.B 207 csBundle(0).fpWen := false.B 208 csBundle(0).vecWen := false.B 209 csBundle(0).vlWen := false.B 210 csBundle(1).fuType := FuType.vsetfwf.U 211 csBundle(1).srcType(0) := SrcType.no 212 csBundle(1).srcType(2) := SrcType.no 213 csBundle(1).srcType(3) := SrcType.no 214 csBundle(1).srcType(4) := SrcType.vp 215 csBundle(1).lsrc(4) := Vl_IDX.U 216 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 217 // uop0: mv vtype gpr to vector region 218 csBundle(0).srcType(0) := SrcType.xp 219 csBundle(0).srcType(1) := SrcType.no 220 csBundle(0).lsrc(0) := src2 221 csBundle(0).lsrc(1) := 0.U 222 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 223 csBundle(0).fuType := FuType.i2v.U 224 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 225 csBundle(0).rfWen := false.B 226 csBundle(0).fpWen := false.B 227 csBundle(0).vecWen := true.B 228 csBundle(0).vlWen := false.B 229 // uop1: uvsetvcfg_vv 230 csBundle(1).fuType := FuType.vsetfwf.U 231 // vl 232 csBundle(1).srcType(0) := SrcType.no 233 csBundle(1).srcType(2) := SrcType.no 234 csBundle(1).srcType(3) := SrcType.no 235 csBundle(1).srcType(4) := SrcType.vp 236 csBundle(1).lsrc(4) := Vl_IDX.U 237 // vtype 238 csBundle(1).srcType(1) := SrcType.vp 239 csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U 240 csBundle(1).vecWen := false.B 241 csBundle(1).vlWen := true.B 242 csBundle(1).ldest := Vl_IDX.U 243 }.elsewhen(dest === 0.U) { 244 // write nothing, uop0 is a nop instruction 245 csBundle(0).rfWen := false.B 246 csBundle(0).fpWen := false.B 247 csBundle(0).vecWen := false.B 248 csBundle(0).vlWen := false.B 249 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) { 250 // because vsetvl may modified src2 when src2 == rd, 251 // we need to modify vd in second uop to avoid dependency 252 // uop0 set vl 253 csBundle(0).fuType := FuType.vsetiwf.U 254 csBundle(0).ldest := Vl_IDX.U 255 csBundle(0).rfWen := false.B 256 csBundle(0).vlWen := true.B 257 // uop1 set rd 258 csBundle(1).fuType := FuType.vsetiwi.U 259 csBundle(1).ldest := dest 260 csBundle(1).rfWen := true.B 261 csBundle(1).vlWen := false.B 262 } 263 // use bypass vtype from vtypeGen 264 csBundle(0).vpu.connectVType(io.vtypeBypass) 265 csBundle(1).vpu.connectVType(io.vtypeBypass) 266 } 267 } 268 is(UopSplitType.VEC_VVV) { 269 for (i <- 0 until MAX_VLMUL) { 270 csBundle(i).lsrc(0) := src1 + i.U 271 csBundle(i).lsrc(1) := src2 + i.U 272 csBundle(i).lsrc(2) := dest + i.U 273 csBundle(i).ldest := dest + i.U 274 csBundle(i).uopIdx := i.U 275 } 276 } 277 is(UopSplitType.VEC_VFV) { 278 /* 279 f to vector move 280 */ 281 csBundle(0).srcType(0) := SrcType.fp 282 csBundle(0).srcType(1) := SrcType.imm 283 csBundle(0).srcType(2) := SrcType.imm 284 csBundle(0).lsrc(1) := 0.U 285 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 286 csBundle(0).fuType := FuType.f2v.U 287 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 288 csBundle(0).vecWen := true.B 289 csBundle(0).vpu.isReverse := false.B 290 /* 291 LMUL 292 */ 293 for (i <- 0 until MAX_VLMUL) { 294 csBundle(i + 1).srcType(0) := SrcType.vp 295 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 296 csBundle(i + 1).lsrc(1) := src2 + i.U 297 csBundle(i + 1).lsrc(2) := dest + i.U 298 csBundle(i + 1).ldest := dest + i.U 299 csBundle(i + 1).uopIdx := i.U 300 } 301 } 302 is(UopSplitType.VEC_EXT2) { 303 for (i <- 0 until MAX_VLMUL / 2) { 304 csBundle(2 * i).lsrc(1) := src2 + i.U 305 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 306 csBundle(2 * i).ldest := dest + (2 * i).U 307 csBundle(2 * i).uopIdx := (2 * i).U 308 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 309 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 310 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 311 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 312 } 313 } 314 is(UopSplitType.VEC_EXT4) { 315 for (i <- 0 until MAX_VLMUL / 4) { 316 csBundle(4 * i).lsrc(1) := src2 + i.U 317 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 318 csBundle(4 * i).ldest := dest + (4 * i).U 319 csBundle(4 * i).uopIdx := (4 * i).U 320 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 321 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 322 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 323 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 324 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 325 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 326 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 327 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 328 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 329 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 330 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 331 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 332 } 333 } 334 is(UopSplitType.VEC_EXT8) { 335 for (i <- 0 until MAX_VLMUL) { 336 csBundle(i).lsrc(1) := src2 337 csBundle(i).lsrc(2) := dest + i.U 338 csBundle(i).ldest := dest + i.U 339 csBundle(i).uopIdx := i.U 340 } 341 } 342 is(UopSplitType.VEC_0XV) { 343 /* 344 i/f to vector move 345 */ 346 csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg) 347 csBundle(0).srcType(1) := SrcType.imm 348 csBundle(0).srcType(2) := SrcType.imm 349 csBundle(0).lsrc(1) := 0.U 350 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 351 csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U) 352 csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 353 csBundle(0).rfWen := false.B 354 csBundle(0).fpWen := false.B 355 csBundle(0).vecWen := true.B 356 /* 357 vmv.s.x 358 */ 359 csBundle(1).srcType(0) := SrcType.vp 360 csBundle(1).srcType(1) := SrcType.imm 361 csBundle(1).srcType(2) := SrcType.vp 362 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 363 csBundle(1).lsrc(1) := 0.U 364 csBundle(1).lsrc(2) := dest 365 csBundle(1).ldest := dest 366 csBundle(1).rfWen := false.B 367 csBundle(1).fpWen := false.B 368 csBundle(1).vecWen := true.B 369 csBundle(1).uopIdx := 0.U 370 } 371 is(UopSplitType.VEC_VXV) { 372 /* 373 i to vector move 374 */ 375 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 376 csBundle(0).srcType(1) := SrcType.imm 377 csBundle(0).srcType(2) := SrcType.imm 378 csBundle(0).lsrc(1) := 0.U 379 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 380 csBundle(0).fuType := FuType.i2v.U 381 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 382 csBundle(0).vecWen := true.B 383 csBundle(0).vpu.isReverse := false.B 384 /* 385 LMUL 386 */ 387 for (i <- 0 until MAX_VLMUL) { 388 csBundle(i + 1).srcType(0) := SrcType.vp 389 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 390 csBundle(i + 1).lsrc(1) := src2 + i.U 391 csBundle(i + 1).lsrc(2) := dest + i.U 392 csBundle(i + 1).ldest := dest + i.U 393 csBundle(i + 1).uopIdx := i.U 394 } 395 } 396 is(UopSplitType.VEC_VVW) { 397 for (i <- 0 until MAX_VLMUL / 2) { 398 csBundle(2 * i).lsrc(0) := src1 + i.U 399 csBundle(2 * i).lsrc(1) := src2 + i.U 400 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 401 csBundle(2 * i).ldest := dest + (2 * i).U 402 csBundle(2 * i).uopIdx := (2 * i).U 403 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 404 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 405 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 406 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 407 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 408 } 409 } 410 is(UopSplitType.VEC_VFW) { 411 /* 412 f to vector move 413 */ 414 csBundle(0).srcType(0) := SrcType.fp 415 csBundle(0).srcType(1) := SrcType.imm 416 csBundle(0).srcType(2) := SrcType.imm 417 csBundle(0).lsrc(1) := 0.U 418 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 419 csBundle(0).fuType := FuType.f2v.U 420 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 421 csBundle(0).rfWen := false.B 422 csBundle(0).fpWen := false.B 423 csBundle(0).vecWen := true.B 424 425 for (i <- 0 until MAX_VLMUL / 2) { 426 csBundle(2 * i + 1).srcType(0) := SrcType.vp 427 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 428 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 429 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 430 csBundle(2 * i + 1).ldest := dest + (2 * i).U 431 csBundle(2 * i + 1).uopIdx := (2 * i).U 432 csBundle(2 * i + 2).srcType(0) := SrcType.vp 433 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 434 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 435 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 436 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 437 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 438 } 439 } 440 is(UopSplitType.VEC_WVW) { 441 for (i <- 0 until MAX_VLMUL / 2) { 442 csBundle(2 * i).lsrc(0) := src1 + i.U 443 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 444 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 445 csBundle(2 * i).ldest := dest + (2 * i).U 446 csBundle(2 * i).uopIdx := (2 * i).U 447 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 448 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 449 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 450 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 451 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 452 } 453 } 454 is(UopSplitType.VEC_VXW) { 455 /* 456 i to vector move 457 */ 458 csBundle(0).srcType(0) := SrcType.reg 459 csBundle(0).srcType(1) := SrcType.imm 460 csBundle(0).srcType(2) := SrcType.imm 461 csBundle(0).lsrc(1) := 0.U 462 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 463 csBundle(0).fuType := FuType.i2v.U 464 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 465 csBundle(0).vecWen := true.B 466 467 for (i <- 0 until MAX_VLMUL / 2) { 468 csBundle(2 * i + 1).srcType(0) := SrcType.vp 469 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 470 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 471 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 472 csBundle(2 * i + 1).ldest := dest + (2 * i).U 473 csBundle(2 * i + 1).uopIdx := (2 * i).U 474 csBundle(2 * i + 2).srcType(0) := SrcType.vp 475 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 476 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 477 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 478 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 479 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 480 } 481 } 482 is(UopSplitType.VEC_WXW) { 483 /* 484 i to vector move 485 */ 486 csBundle(0).srcType(0) := SrcType.reg 487 csBundle(0).srcType(1) := SrcType.imm 488 csBundle(0).srcType(2) := SrcType.imm 489 csBundle(0).lsrc(1) := 0.U 490 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 491 csBundle(0).fuType := FuType.i2v.U 492 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 493 csBundle(0).vecWen := true.B 494 495 for (i <- 0 until MAX_VLMUL / 2) { 496 csBundle(2 * i + 1).srcType(0) := SrcType.vp 497 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 498 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 499 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 500 csBundle(2 * i + 1).ldest := dest + (2 * i).U 501 csBundle(2 * i + 1).uopIdx := (2 * i).U 502 csBundle(2 * i + 2).srcType(0) := SrcType.vp 503 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 504 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 505 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 506 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 507 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 508 } 509 } 510 is(UopSplitType.VEC_WVV) { 511 for (i <- 0 until MAX_VLMUL / 2) { 512 513 csBundle(2 * i).lsrc(0) := src1 + i.U 514 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 515 csBundle(2 * i).lsrc(2) := dest + i.U 516 csBundle(2 * i).ldest := dest + i.U 517 csBundle(2 * i).uopIdx := (2 * i).U 518 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 519 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 520 csBundle(2 * i + 1).lsrc(2) := dest + i.U 521 csBundle(2 * i + 1).ldest := dest + i.U 522 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 523 } 524 } 525 is(UopSplitType.VEC_WFW) { 526 /* 527 f to vector move 528 */ 529 csBundle(0).srcType(0) := SrcType.fp 530 csBundle(0).srcType(1) := SrcType.imm 531 csBundle(0).srcType(2) := SrcType.imm 532 csBundle(0).lsrc(1) := 0.U 533 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 534 csBundle(0).fuType := FuType.f2v.U 535 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 536 csBundle(0).rfWen := false.B 537 csBundle(0).fpWen := false.B 538 csBundle(0).vecWen := true.B 539 540 for (i <- 0 until MAX_VLMUL / 2) { 541 csBundle(2 * i + 1).srcType(0) := SrcType.vp 542 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 543 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 544 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 545 csBundle(2 * i + 1).ldest := dest + (2 * i).U 546 csBundle(2 * i + 1).uopIdx := (2 * i).U 547 csBundle(2 * i + 2).srcType(0) := SrcType.vp 548 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 549 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 550 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 551 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 552 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 553 } 554 } 555 is(UopSplitType.VEC_WXV) { 556 /* 557 i to vector move 558 */ 559 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 560 csBundle(0).srcType(1) := SrcType.imm 561 csBundle(0).srcType(2) := SrcType.imm 562 csBundle(0).lsrc(1) := 0.U 563 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 564 csBundle(0).fuType := FuType.i2v.U 565 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 566 csBundle(0).vecWen := true.B 567 568 for (i <- 0 until MAX_VLMUL / 2) { 569 csBundle(2 * i + 1).srcType(0) := SrcType.vp 570 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 571 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 572 csBundle(2 * i + 1).lsrc(2) := dest + i.U 573 csBundle(2 * i + 1).ldest := dest + i.U 574 csBundle(2 * i + 1).uopIdx := (2 * i).U 575 csBundle(2 * i + 2).srcType(0) := SrcType.vp 576 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 577 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 578 csBundle(2 * i + 2).lsrc(2) := dest + i.U 579 csBundle(2 * i + 2).ldest := dest + i.U 580 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 581 } 582 } 583 is(UopSplitType.VEC_VVM) { 584 csBundle(0).lsrc(2) := dest 585 csBundle(0).ldest := dest 586 csBundle(0).uopIdx := 0.U 587 for (i <- 1 until MAX_VLMUL) { 588 csBundle(i).lsrc(0) := src1 + i.U 589 csBundle(i).lsrc(1) := src2 + i.U 590 csBundle(i).lsrc(2) := dest 591 csBundle(i).ldest := dest 592 csBundle(i).uopIdx := i.U 593 } 594 } 595 is(UopSplitType.VEC_VFM) { 596 /* 597 f to vector move 598 */ 599 csBundle(0).srcType(0) := SrcType.fp 600 csBundle(0).srcType(1) := SrcType.imm 601 csBundle(0).srcType(2) := SrcType.imm 602 csBundle(0).lsrc(1) := 0.U 603 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 604 csBundle(0).fuType := FuType.f2v.U 605 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 606 csBundle(0).rfWen := false.B 607 csBundle(0).fpWen := false.B 608 csBundle(0).vecWen := true.B 609 //LMUL 610 csBundle(1).srcType(0) := SrcType.vp 611 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 612 csBundle(1).lsrc(2) := dest 613 csBundle(1).ldest := dest 614 csBundle(1).uopIdx := 0.U 615 for (i <- 1 until MAX_VLMUL) { 616 csBundle(i + 1).srcType(0) := SrcType.vp 617 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 618 csBundle(i + 1).lsrc(1) := src2 + i.U 619 csBundle(i + 1).lsrc(2) := dest 620 csBundle(i + 1).ldest := dest 621 csBundle(i + 1).uopIdx := i.U 622 } 623 csBundle(numOfUop - 1.U).ldest := dest 624 } 625 is(UopSplitType.VEC_VXM) { 626 /* 627 i to vector move 628 */ 629 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 630 csBundle(0).srcType(1) := SrcType.imm 631 csBundle(0).srcType(2) := SrcType.imm 632 csBundle(0).lsrc(1) := 0.U 633 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 634 csBundle(0).fuType := FuType.i2v.U 635 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 636 csBundle(0).vecWen := true.B 637 //LMUL 638 csBundle(1).srcType(0) := SrcType.vp 639 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 640 csBundle(1).lsrc(2) := dest 641 csBundle(1).ldest := dest 642 csBundle(1).uopIdx := 0.U 643 for (i <- 1 until MAX_VLMUL) { 644 csBundle(i + 1).srcType(0) := SrcType.vp 645 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 646 csBundle(i + 1).lsrc(1) := src2 + i.U 647 csBundle(i + 1).lsrc(2) := dest 648 csBundle(i + 1).ldest := dest 649 csBundle(i + 1).uopIdx := i.U 650 } 651 csBundle(numOfUop - 1.U).ldest := dest 652 } 653 is(UopSplitType.VEC_SLIDE1UP) { 654 /* 655 i to vector move 656 */ 657 csBundle(0).srcType(0) := SrcType.reg 658 csBundle(0).srcType(1) := SrcType.imm 659 csBundle(0).srcType(2) := SrcType.imm 660 csBundle(0).lsrc(1) := 0.U 661 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 662 csBundle(0).fuType := FuType.i2v.U 663 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 664 csBundle(0).vecWen := true.B 665 //LMUL 666 csBundle(1).srcType(0) := SrcType.vp 667 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 668 csBundle(1).lsrc(2) := dest 669 csBundle(1).ldest := dest 670 csBundle(1).uopIdx := 0.U 671 for (i <- 1 until MAX_VLMUL) { 672 csBundle(i + 1).srcType(0) := SrcType.vp 673 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 674 csBundle(i + 1).lsrc(1) := src2 + i.U 675 csBundle(i + 1).lsrc(2) := dest + i.U 676 csBundle(i + 1).ldest := dest + i.U 677 csBundle(i + 1).uopIdx := i.U 678 } 679 } 680 is(UopSplitType.VEC_FSLIDE1UP) { 681 /* 682 f to vector move 683 */ 684 csBundle(0).srcType(0) := SrcType.fp 685 csBundle(0).srcType(1) := SrcType.imm 686 csBundle(0).srcType(2) := SrcType.imm 687 csBundle(0).lsrc(1) := 0.U 688 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 689 csBundle(0).fuType := FuType.f2v.U 690 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 691 csBundle(0).rfWen := false.B 692 csBundle(0).fpWen := false.B 693 csBundle(0).vecWen := true.B 694 //LMUL 695 csBundle(1).srcType(0) := SrcType.vp 696 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 697 csBundle(1).lsrc(1) := src2 698 csBundle(1).lsrc(2) := dest 699 csBundle(1).ldest := dest 700 csBundle(1).uopIdx := 0.U 701 for (i <- 1 until MAX_VLMUL) { 702 csBundle(i + 1).srcType(0) := SrcType.vp 703 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 704 csBundle(i + 1).lsrc(1) := src2 + i.U 705 csBundle(i + 1).lsrc(2) := dest + i.U 706 csBundle(i + 1).ldest := dest + i.U 707 csBundle(i + 1).uopIdx := i.U 708 } 709 } 710 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 711 /* 712 i to vector move 713 */ 714 csBundle(0).srcType(0) := SrcType.reg 715 csBundle(0).srcType(1) := SrcType.imm 716 csBundle(0).srcType(2) := SrcType.imm 717 csBundle(0).lsrc(1) := 0.U 718 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 719 csBundle(0).fuType := FuType.i2v.U 720 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 721 csBundle(0).vecWen := true.B 722 //LMUL 723 for (i <- 0 until MAX_VLMUL) { 724 csBundle(2 * i + 1).srcType(0) := SrcType.vp 725 csBundle(2 * i + 1).srcType(1) := SrcType.vp 726 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 727 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 728 csBundle(2 * i + 1).lsrc(2) := dest + i.U 729 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 730 csBundle(2 * i + 1).uopIdx := (2 * i).U 731 if (2 * i + 2 < MAX_VLMUL * 2) { 732 csBundle(2 * i + 2).srcType(0) := SrcType.vp 733 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 734 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 735 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 736 csBundle(2 * i + 2).ldest := dest + i.U 737 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 738 } 739 } 740 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 741 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 742 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 743 } 744 is(UopSplitType.VEC_FSLIDE1DOWN) { 745 /* 746 f to vector move 747 */ 748 csBundle(0).srcType(0) := SrcType.fp 749 csBundle(0).srcType(1) := SrcType.imm 750 csBundle(0).srcType(2) := SrcType.imm 751 csBundle(0).lsrc(1) := 0.U 752 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 753 csBundle(0).fuType := FuType.f2v.U 754 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 755 csBundle(0).rfWen := false.B 756 csBundle(0).fpWen := false.B 757 csBundle(0).vecWen := true.B 758 //LMUL 759 for (i <- 0 until MAX_VLMUL) { 760 csBundle(2 * i + 1).srcType(0) := SrcType.vp 761 csBundle(2 * i + 1).srcType(1) := SrcType.vp 762 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 763 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 764 csBundle(2 * i + 1).lsrc(2) := dest + i.U 765 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 766 csBundle(2 * i + 1).uopIdx := (2 * i).U 767 if (2 * i + 2 < MAX_VLMUL * 2) { 768 csBundle(2 * i + 2).srcType(0) := SrcType.vp 769 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 770 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 771 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 772 csBundle(2 * i + 2).ldest := dest + i.U 773 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 774 } 775 } 776 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 777 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 778 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 779 } 780 is(UopSplitType.VEC_VRED) { 781 when(vlmulReg === "b001".U) { 782 csBundle(0).srcType(2) := SrcType.DC 783 csBundle(0).lsrc(0) := src2 + 1.U 784 csBundle(0).lsrc(1) := src2 785 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 786 csBundle(0).uopIdx := 0.U 787 } 788 when(vlmulReg === "b010".U) { 789 csBundle(0).srcType(2) := SrcType.DC 790 csBundle(0).lsrc(0) := src2 + 1.U 791 csBundle(0).lsrc(1) := src2 792 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 793 csBundle(0).uopIdx := 0.U 794 795 csBundle(1).srcType(2) := SrcType.DC 796 csBundle(1).lsrc(0) := src2 + 3.U 797 csBundle(1).lsrc(1) := src2 + 2.U 798 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 799 csBundle(1).uopIdx := 1.U 800 801 csBundle(2).srcType(2) := SrcType.DC 802 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 803 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 804 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 805 csBundle(2).uopIdx := 2.U 806 } 807 when(vlmulReg === "b011".U) { 808 for (i <- 0 until MAX_VLMUL) { 809 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 810 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 811 csBundle(i).lsrc(1) := src2 + (i * 2).U 812 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 813 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 814 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 815 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 816 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 817 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 818 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 819 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 820 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 821 } 822 csBundle(i).srcType(2) := SrcType.DC 823 csBundle(i).uopIdx := i.U 824 } 825 } 826 when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) { 827 /* 828 * 2 <= vlmul <= 8 829 */ 830 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 831 csBundle(numOfUop - 1.U).lsrc(0) := src1 832 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 833 csBundle(numOfUop - 1.U).lsrc(2) := dest 834 csBundle(numOfUop - 1.U).ldest := dest 835 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 836 } 837 } 838 is(UopSplitType.VEC_VFRED) { 839 val vlmul = vlmulReg 840 val vsew = vsewReg 841 when(vlmul === VLmul.m8){ 842 for (i <- 0 until 4) { 843 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 844 csBundle(i).lsrc(1) := src2 + (i * 2).U 845 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 846 csBundle(i).uopIdx := i.U 847 } 848 for (i <- 4 until 6) { 849 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 850 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 851 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 852 csBundle(i).uopIdx := i.U 853 } 854 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 855 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 856 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 857 csBundle(6).uopIdx := 6.U 858 when(vsew === VSew.e64) { 859 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 860 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 861 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 862 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 863 csBundle(7).uopIdx := 7.U 864 csBundle(8).lsrc(0) := src1 865 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 866 csBundle(8).ldest := dest 867 csBundle(8).uopIdx := 8.U 868 } 869 when(vsew === VSew.e32) { 870 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 871 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 872 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 873 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 874 csBundle(7).uopIdx := 7.U 875 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 876 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 877 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 878 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 879 csBundle(8).uopIdx := 8.U 880 csBundle(9).lsrc(0) := src1 881 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 882 csBundle(9).ldest := dest 883 csBundle(9).uopIdx := 9.U 884 } 885 when(vsew === VSew.e16) { 886 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 887 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 888 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 889 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 890 csBundle(7).uopIdx := 7.U 891 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 892 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 893 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 894 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 895 csBundle(8).uopIdx := 8.U 896 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 897 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 898 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 899 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 900 csBundle(9).uopIdx := 9.U 901 csBundle(10).lsrc(0) := src1 902 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 903 csBundle(10).ldest := dest 904 csBundle(10).uopIdx := 10.U 905 } 906 } 907 when(vlmul === VLmul.m4) { 908 for (i <- 0 until 2) { 909 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 910 csBundle(i).lsrc(1) := src2 + (i * 2).U 911 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 912 csBundle(i).uopIdx := i.U 913 } 914 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 915 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 916 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 917 csBundle(2).uopIdx := 2.U 918 when(vsew === VSew.e64) { 919 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 920 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 921 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 922 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 923 csBundle(3).uopIdx := 3.U 924 csBundle(4).lsrc(0) := src1 925 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 926 csBundle(4).ldest := dest 927 csBundle(4).uopIdx := 4.U 928 } 929 when(vsew === VSew.e32) { 930 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 931 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 932 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 933 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 934 csBundle(3).uopIdx := 3.U 935 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 936 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 937 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 938 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 939 csBundle(4).uopIdx := 4.U 940 csBundle(5).lsrc(0) := src1 941 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 942 csBundle(5).ldest := dest 943 csBundle(5).uopIdx := 5.U 944 } 945 when(vsew === VSew.e16) { 946 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 947 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 948 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 949 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 950 csBundle(3).uopIdx := 3.U 951 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 952 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 953 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 954 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 955 csBundle(4).uopIdx := 4.U 956 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 957 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 958 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 959 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 960 csBundle(5).uopIdx := 5.U 961 csBundle(6).lsrc(0) := src1 962 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 963 csBundle(6).ldest := dest 964 csBundle(6).uopIdx := 6.U 965 } 966 } 967 when(vlmul === VLmul.m2) { 968 csBundle(0).lsrc(0) := src2 + 1.U 969 csBundle(0).lsrc(1) := src2 + 0.U 970 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 971 csBundle(0).uopIdx := 0.U 972 when(vsew === VSew.e64) { 973 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 974 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 975 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 976 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 977 csBundle(1).uopIdx := 1.U 978 csBundle(2).lsrc(0) := src1 979 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 980 csBundle(2).ldest := dest 981 csBundle(2).uopIdx := 2.U 982 } 983 when(vsew === VSew.e32) { 984 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 985 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 986 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 987 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 988 csBundle(1).uopIdx := 1.U 989 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 990 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 991 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 992 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 993 csBundle(2).uopIdx := 2.U 994 csBundle(3).lsrc(0) := src1 995 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 996 csBundle(3).ldest := dest 997 csBundle(3).uopIdx := 3.U 998 } 999 when(vsew === VSew.e16) { 1000 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1001 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1002 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1003 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 1004 csBundle(1).uopIdx := 1.U 1005 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1006 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1007 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1008 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 1009 csBundle(2).uopIdx := 2.U 1010 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 1011 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1012 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 1013 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 1014 csBundle(3).uopIdx := 3.U 1015 csBundle(4).lsrc(0) := src1 1016 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 1017 csBundle(4).ldest := dest 1018 csBundle(4).uopIdx := 4.U 1019 } 1020 } 1021 when(vlmul === VLmul.m1) { 1022 when(vsew === VSew.e64) { 1023 csBundle(0).lsrc(0) := src2 1024 csBundle(0).lsrc(1) := src2 1025 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1026 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1027 csBundle(0).uopIdx := 0.U 1028 csBundle(1).lsrc(0) := src1 1029 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1030 csBundle(1).ldest := dest 1031 csBundle(1).uopIdx := 1.U 1032 } 1033 when(vsew === VSew.e32) { 1034 csBundle(0).lsrc(0) := src2 1035 csBundle(0).lsrc(1) := src2 1036 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1037 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1038 csBundle(0).uopIdx := 0.U 1039 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1040 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1041 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1042 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1043 csBundle(1).uopIdx := 1.U 1044 csBundle(2).lsrc(0) := src1 1045 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1046 csBundle(2).ldest := dest 1047 csBundle(2).uopIdx := 2.U 1048 } 1049 when(vsew === VSew.e16) { 1050 csBundle(0).lsrc(0) := src2 1051 csBundle(0).lsrc(1) := src2 1052 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1053 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1054 csBundle(0).uopIdx := 0.U 1055 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1056 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1057 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1058 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1059 csBundle(1).uopIdx := 1.U 1060 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1061 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1062 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1063 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 1064 csBundle(2).uopIdx := 2.U 1065 csBundle(3).lsrc(0) := src1 1066 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1067 csBundle(3).ldest := dest 1068 csBundle(3).uopIdx := 3.U 1069 } 1070 } 1071 when(vlmul === VLmul.mf2) { 1072 when(vsew === VSew.e32) { 1073 csBundle(0).lsrc(0) := src2 1074 csBundle(0).lsrc(1) := src2 1075 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1076 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1077 csBundle(0).uopIdx := 0.U 1078 csBundle(1).lsrc(0) := src1 1079 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1080 csBundle(1).ldest := dest 1081 csBundle(1).uopIdx := 1.U 1082 } 1083 when(vsew === VSew.e16) { 1084 csBundle(0).lsrc(0) := src2 1085 csBundle(0).lsrc(1) := src2 1086 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1087 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1088 csBundle(0).uopIdx := 0.U 1089 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1090 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1091 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1092 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 1093 csBundle(1).uopIdx := 1.U 1094 csBundle(2).lsrc(0) := src1 1095 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1096 csBundle(2).ldest := dest 1097 csBundle(2).uopIdx := 2.U 1098 } 1099 } 1100 when(vlmul === VLmul.mf4) { 1101 when(vsew === VSew.e16) { 1102 csBundle(0).lsrc(0) := src2 1103 csBundle(0).lsrc(1) := src2 1104 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1105 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 1106 csBundle(0).uopIdx := 0.U 1107 csBundle(1).lsrc(0) := src1 1108 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1109 csBundle(1).ldest := dest 1110 csBundle(1).uopIdx := 1.U 1111 } 1112 } 1113 } 1114 1115 is(UopSplitType.VEC_VFREDOSUM) { 1116 import yunsuan.VfaluType 1117 val vlmul = vlmulReg 1118 val vsew = vsewReg 1119 val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum 1120 when(vlmul === VLmul.m8) { 1121 when(vsew === VSew.e64) { 1122 val vlmax = 16 1123 for (i <- 0 until vlmax) { 1124 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1125 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1126 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1127 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1128 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1129 csBundle(i).uopIdx := i.U 1130 } 1131 } 1132 when(vsew === VSew.e32) { 1133 val vlmax = 32 1134 for (i <- 0 until vlmax) { 1135 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1136 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1137 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1138 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1139 csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B) 1140 csBundle(i).uopIdx := i.U 1141 } 1142 } 1143 when(vsew === VSew.e16) { 1144 val vlmax = 64 1145 for (i <- 0 until vlmax) { 1146 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1147 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1148 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1149 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1150 csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B) 1151 csBundle(i).uopIdx := i.U 1152 } 1153 } 1154 } 1155 when(vlmul === VLmul.m4) { 1156 when(vsew === VSew.e64) { 1157 val vlmax = 8 1158 for (i <- 0 until vlmax) { 1159 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1160 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1161 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1162 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1163 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1164 csBundle(i).uopIdx := i.U 1165 } 1166 } 1167 when(vsew === VSew.e32) { 1168 val vlmax = 16 1169 for (i <- 0 until vlmax) { 1170 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1171 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1172 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1173 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1174 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1175 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1176 csBundle(i).uopIdx := i.U 1177 } 1178 } 1179 when(vsew === VSew.e16) { 1180 val vlmax = 32 1181 for (i <- 0 until vlmax) { 1182 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1183 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1184 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1185 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1186 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1187 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1188 csBundle(i).uopIdx := i.U 1189 } 1190 } 1191 } 1192 when(vlmul === VLmul.m2) { 1193 when(vsew === VSew.e64) { 1194 val vlmax = 4 1195 for (i <- 0 until vlmax) { 1196 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1197 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1198 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1199 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1200 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1201 csBundle(i).uopIdx := i.U 1202 } 1203 } 1204 when(vsew === VSew.e32) { 1205 val vlmax = 8 1206 for (i <- 0 until vlmax) { 1207 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1208 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1209 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1210 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1211 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1212 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1213 csBundle(i).uopIdx := i.U 1214 } 1215 } 1216 when(vsew === VSew.e16) { 1217 val vlmax = 16 1218 for (i <- 0 until vlmax) { 1219 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1220 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1221 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1222 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1223 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1224 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1225 csBundle(i).uopIdx := i.U 1226 } 1227 } 1228 } 1229 when(vlmul === VLmul.m1) { 1230 when(vsew === VSew.e64) { 1231 val vlmax = 2 1232 for (i <- 0 until vlmax) { 1233 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1234 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1235 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1236 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1237 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1238 csBundle(i).uopIdx := i.U 1239 } 1240 } 1241 when(vsew === VSew.e32) { 1242 val vlmax = 4 1243 for (i <- 0 until vlmax) { 1244 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1245 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1246 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1247 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1248 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1249 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1250 csBundle(i).uopIdx := i.U 1251 } 1252 } 1253 when(vsew === VSew.e16) { 1254 val vlmax = 8 1255 for (i <- 0 until vlmax) { 1256 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1257 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1258 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1259 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1260 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1261 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1262 csBundle(i).uopIdx := i.U 1263 } 1264 } 1265 } 1266 when(vlmul === VLmul.mf2) { 1267 when(vsew === VSew.e32) { 1268 val vlmax = 2 1269 for (i <- 0 until vlmax) { 1270 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1271 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1272 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1273 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1274 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1275 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1276 csBundle(i).uopIdx := i.U 1277 } 1278 } 1279 when(vsew === VSew.e16) { 1280 val vlmax = 4 1281 for (i <- 0 until vlmax) { 1282 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1283 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1284 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1285 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1286 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1287 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1288 csBundle(i).uopIdx := i.U 1289 } 1290 } 1291 } 1292 when(vlmul === VLmul.mf4) { 1293 when(vsew === VSew.e16) { 1294 val vlmax = 2 1295 for (i <- 0 until vlmax) { 1296 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1297 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1298 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1299 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1300 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1301 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1302 csBundle(i).uopIdx := i.U 1303 } 1304 } 1305 } 1306 } 1307 1308 is(UopSplitType.VEC_SLIDEUP) { 1309 // i to vector move 1310 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1311 csBundle(0).srcType(1) := SrcType.imm 1312 csBundle(0).srcType(2) := SrcType.imm 1313 csBundle(0).lsrc(1) := 0.U 1314 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1315 csBundle(0).fuType := FuType.i2v.U 1316 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1317 csBundle(0).vecWen := true.B 1318 // LMUL 1319 for (i <- 0 until MAX_VLMUL) 1320 for (j <- 0 to i) { 1321 val old_vd = if (j == 0) { 1322 dest + i.U 1323 } else (VECTOR_TMP_REG_LMUL + j).U 1324 val vd = if (j == i) { 1325 dest + i.U 1326 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1327 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1328 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1329 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1330 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1331 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1332 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1333 } 1334 } 1335 1336 is(UopSplitType.VEC_SLIDEDOWN) { 1337 // i to vector move 1338 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1339 csBundle(0).srcType(1) := SrcType.imm 1340 csBundle(0).srcType(2) := SrcType.imm 1341 csBundle(0).lsrc(1) := 0.U 1342 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1343 csBundle(0).fuType := FuType.i2v.U 1344 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1345 csBundle(0).vecWen := true.B 1346 // LMUL 1347 for (i <- 0 until MAX_VLMUL) 1348 for (j <- (0 to i).reverse) { 1349 when(i.U < lmul) { 1350 val old_vd = if (j == 0) { 1351 dest + lmul - 1.U - i.U 1352 } else (VECTOR_TMP_REG_LMUL + j).U 1353 val vd = if (j == i) { 1354 dest + lmul - 1.U - i.U 1355 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1356 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1357 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1358 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1359 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1360 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1361 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1362 } 1363 } 1364 } 1365 1366 is(UopSplitType.VEC_M0X) { 1367 // LMUL 1368 for (i <- 0 until MAX_VLMUL) { 1369 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1370 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1371 csBundle(i).srcType(0) := srcType0 1372 csBundle(i).srcType(1) := SrcType.vp 1373 csBundle(i).rfWen := false.B 1374 csBundle(i).fpWen := false.B 1375 csBundle(i).vecWen := true.B 1376 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1377 csBundle(i).lsrc(1) := src2 1378 // csBundle(i).lsrc(2) := dest + i.U DontCare 1379 csBundle(i).ldest := ldest 1380 csBundle(i).uopIdx := i.U 1381 } 1382 csBundle(lmul - 1.U).rfWen := true.B 1383 csBundle(lmul - 1.U).fpWen := false.B 1384 csBundle(lmul - 1.U).vecWen := false.B 1385 csBundle(lmul - 1.U).ldest := dest 1386 } 1387 1388 is(UopSplitType.VEC_MVV) { 1389 // LMUL 1390 for (i <- 0 until MAX_VLMUL) { 1391 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1392 csBundle(i * 2 + 0).srcType(0) := srcType0 1393 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1394 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1395 csBundle(i * 2 + 0).lsrc(1) := src2 1396 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1397 csBundle(i * 2 + 0).ldest := dest + i.U 1398 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1399 1400 csBundle(i * 2 + 1).srcType(0) := srcType0 1401 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1402 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1403 csBundle(i * 2 + 1).lsrc(1) := src2 1404 // csBundle(i).lsrc(2) := dest + i.U DontCare 1405 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1406 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1407 } 1408 } 1409 1410 is(UopSplitType.VEC_M0X_VFIRST) { 1411 // LMUL 1412 csBundle(0).rfWen := true.B 1413 csBundle(0).fpWen := false.B 1414 csBundle(0).vecWen := false.B 1415 csBundle(0).ldest := dest 1416 } 1417 is(UopSplitType.VEC_VWW) { 1418 for (i <- 0 until MAX_VLMUL*2) { 1419 when(i.U < lmul){ 1420 csBundle(i).srcType(2) := SrcType.DC 1421 csBundle(i).lsrc(0) := src2 + i.U 1422 csBundle(i).lsrc(1) := src2 + i.U 1423 // csBundle(i).lsrc(2) := dest + (2 * i).U 1424 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1425 csBundle(i).uopIdx := i.U 1426 } otherwise { 1427 csBundle(i).srcType(2) := SrcType.DC 1428 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1429 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1430 // csBundle(i).lsrc(2) := dest + (2 * i).U 1431 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1432 csBundle(i).uopIdx := i.U 1433 } 1434 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1435 csBundle(numOfUop-1.U).lsrc(0) := src1 1436 csBundle(numOfUop-1.U).lsrc(2) := dest 1437 csBundle(numOfUop-1.U).ldest := dest 1438 } 1439 } 1440 is(UopSplitType.VEC_RGATHER) { 1441 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1442 for (i <- 0 until len) 1443 for (j <- 0 until len) { 1444 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1445 // csBundle(i * len + j).srcType(1) := SrcType.vp 1446 // csBundle(i * len + j).srcType(2) := SrcType.vp 1447 csBundle(i * len + j).lsrc(0) := src1 + i.U 1448 csBundle(i * len + j).lsrc(1) := src2 + j.U 1449 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1450 csBundle(i * len + j).lsrc(2) := vd_old 1451 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1452 csBundle(i * len + j).ldest := vd 1453 csBundle(i * len + j).uopIdx := (i * len + j).U 1454 } 1455 } 1456 switch(vlmulReg) { 1457 is("b001".U ){ 1458 genCsBundle_VEC_RGATHER(2) 1459 } 1460 is("b010".U ){ 1461 genCsBundle_VEC_RGATHER(4) 1462 } 1463 is("b011".U ){ 1464 genCsBundle_VEC_RGATHER(8) 1465 } 1466 } 1467 } 1468 is(UopSplitType.VEC_RGATHER_VX) { 1469 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1470 for (i <- 0 until len) 1471 for (j <- 0 until len) { 1472 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1473 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1474 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1475 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1476 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1477 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1478 csBundle(i * len + j + 1).lsrc(2) := vd_old 1479 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1480 csBundle(i * len + j + 1).ldest := vd 1481 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1482 } 1483 } 1484 // i to vector move 1485 csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg) 1486 csBundle(0).srcType(1) := SrcType.imm 1487 csBundle(0).srcType(2) := SrcType.imm 1488 csBundle(0).lsrc(1) := 0.U 1489 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1490 csBundle(0).fuType := FuType.i2v.U 1491 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1492 csBundle(0).rfWen := false.B 1493 csBundle(0).fpWen := false.B 1494 csBundle(0).vecWen := true.B 1495 genCsBundle_RGATHER_VX(1) 1496 switch(vlmulReg) { 1497 is("b001".U ){ 1498 genCsBundle_RGATHER_VX(2) 1499 } 1500 is("b010".U ){ 1501 genCsBundle_RGATHER_VX(4) 1502 } 1503 is("b011".U ){ 1504 genCsBundle_RGATHER_VX(8) 1505 } 1506 } 1507 } 1508 is(UopSplitType.VEC_RGATHEREI16) { 1509 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1510 for (i <- 0 until len) 1511 for (j <- 0 until len) { 1512 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1513 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1514 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1515 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1516 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1517 csBundle((i * len + j)*2+0).ldest := vd0 1518 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1519 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1520 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1521 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1522 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1523 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1524 csBundle((i * len + j)*2+1).ldest := vd1 1525 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1526 } 1527 } 1528 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1529 for (i <- 0 until len) 1530 for (j <- 0 until len) { 1531 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1532 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1533 csBundle(i * len + j).lsrc(0) := src1 + i.U 1534 csBundle(i * len + j).lsrc(1) := src2 + j.U 1535 csBundle(i * len + j).lsrc(2) := vd_old 1536 csBundle(i * len + j).ldest := vd 1537 csBundle(i * len + j).uopIdx := (i * len + j).U 1538 } 1539 } 1540 def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={ 1541 for (i <- 0 until len) 1542 for (j <- 0 until len) { 1543 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1544 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1545 csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U 1546 csBundle(i * len + j).lsrc(1) := src2 + j.U 1547 csBundle(i * len + j).lsrc(2) := vd_old 1548 csBundle(i * len + j).ldest := vd 1549 csBundle(i * len + j).uopIdx := (i * len + j).U 1550 } 1551 } 1552 def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={ 1553 for (i <- 0 until len) 1554 for (j <- 0 until len) { 1555 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1556 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1557 csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U 1558 csBundle(i * len + j).lsrc(1) := src2 + j.U 1559 csBundle(i * len + j).lsrc(2) := vd_old 1560 csBundle(i * len + j).ldest := vd 1561 csBundle(i * len + j).uopIdx := (i * len + j).U 1562 } 1563 } 1564 when(!vsewReg.orR){ 1565 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1566 }.elsewhen(vsewReg === VSew.e32){ 1567 genCsBundle_VEC_RGATHEREI16_SEW32(1) 1568 }.elsewhen(vsewReg === VSew.e64){ 1569 genCsBundle_VEC_RGATHEREI16_SEW64(1) 1570 }.otherwise{ 1571 genCsBundle_VEC_RGATHEREI16(1) 1572 } 1573 switch(vlmulReg) { 1574 is("b001".U) { 1575 when(!vsewReg.orR) { 1576 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1577 }.elsewhen(vsewReg === VSew.e32){ 1578 genCsBundle_VEC_RGATHEREI16_SEW32(2) 1579 }.elsewhen(vsewReg === VSew.e64){ 1580 genCsBundle_VEC_RGATHEREI16_SEW64(2) 1581 }.otherwise{ 1582 genCsBundle_VEC_RGATHEREI16(2) 1583 } 1584 } 1585 is("b010".U) { 1586 when(!vsewReg.orR) { 1587 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1588 }.elsewhen(vsewReg === VSew.e32){ 1589 genCsBundle_VEC_RGATHEREI16_SEW32(4) 1590 }.elsewhen(vsewReg === VSew.e64){ 1591 genCsBundle_VEC_RGATHEREI16_SEW64(4) 1592 }.otherwise{ 1593 genCsBundle_VEC_RGATHEREI16(4) 1594 } 1595 } 1596 is("b011".U) { 1597 when(vsewReg === VSew.e32){ 1598 genCsBundle_VEC_RGATHEREI16_SEW32(8) 1599 }.elsewhen(vsewReg === VSew.e64){ 1600 genCsBundle_VEC_RGATHEREI16_SEW64(8) 1601 }.otherwise{ 1602 genCsBundle_VEC_RGATHEREI16(8) 1603 } 1604 } 1605 } 1606 } 1607 is(UopSplitType.VEC_COMPRESS) { 1608 def genCsBundle_VEC_COMPRESS(len:Int): Unit = { 1609 for (i <- 0 until len) { 1610 val jlen = if (i == len-1) i+1 else i+2 1611 for (j <- 0 until jlen) { 1612 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1613 val vd = if(i==len-1) (dest + j.U) else { 1614 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1615 } 1616 csBundle(i*(i+3)/2 + j).vecWen := true.B 1617 csBundle(i*(i+3)/2 + j).v0Wen := false.B 1618 val src13Type = if (j == i+1) DontCare else SrcType.vp 1619 csBundle(i*(i+3)/2 + j).srcType(0) := src13Type 1620 csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp 1621 csBundle(i*(i+3)/2 + j).srcType(2) := src13Type 1622 if (i == 0) { 1623 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1624 } else { 1625 csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1626 } 1627 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1628 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1629 csBundle(i*(i+3)/2 + j).ldest := vd 1630 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1631 } 1632 } 1633 } 1634 switch(vlmulReg) { 1635 is("b001".U ){ 1636 genCsBundle_VEC_COMPRESS(2) 1637 } 1638 is("b010".U ){ 1639 genCsBundle_VEC_COMPRESS(4) 1640 } 1641 is("b011".U ){ 1642 genCsBundle_VEC_COMPRESS(8) 1643 } 1644 } 1645 } 1646 is(UopSplitType.VEC_MVNR) { 1647 for (i <- 0 until MAX_VLMUL) { 1648 csBundle(i).lsrc(0) := src1 + i.U 1649 csBundle(i).lsrc(1) := src2 + i.U 1650 csBundle(i).lsrc(2) := dest + i.U 1651 csBundle(i).ldest := dest + i.U 1652 csBundle(i).uopIdx := i.U 1653 } 1654 } 1655 is(UopSplitType.VEC_US_LDST) { 1656 /* 1657 FMV.D.X 1658 */ 1659 csBundle(0).srcType(0) := SrcType.reg 1660 csBundle(0).srcType(1) := SrcType.imm 1661 csBundle(0).lsrc(1) := 0.U 1662 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1663 csBundle(0).fuType := FuType.i2v.U 1664 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1665 csBundle(0).rfWen := false.B 1666 csBundle(0).fpWen := false.B 1667 csBundle(0).vecWen := true.B 1668 csBundle(0).vlsInstr := true.B 1669 //LMUL 1670 for (i <- 0 until MAX_VLMUL) { 1671 csBundle(i + 1).srcType(0) := SrcType.vp 1672 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1673 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1674 csBundle(i + 1).ldest := dest + i.U 1675 csBundle(i + 1).uopIdx := i.U 1676 csBundle(i + 1).vlsInstr := true.B 1677 } 1678 csBundle.head.waitForward := isUsSegment 1679 csBundle(numOfUop - 1.U).blockBackward := isUsSegment 1680 } 1681 is(UopSplitType.VEC_S_LDST) { 1682 /* 1683 FMV.D.X 1684 */ 1685 csBundle(0).srcType(0) := SrcType.reg 1686 csBundle(0).srcType(1) := SrcType.imm 1687 csBundle(0).lsrc(1) := 0.U 1688 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1689 csBundle(0).fuType := FuType.i2v.U 1690 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1691 csBundle(0).rfWen := false.B 1692 csBundle(0).fpWen := false.B 1693 csBundle(0).vecWen := true.B 1694 csBundle(0).vlsInstr := true.B 1695 1696 csBundle(1).srcType(0) := SrcType.reg 1697 csBundle(1).srcType(1) := SrcType.imm 1698 csBundle(1).lsrc(0) := latchedInst.lsrc(1) 1699 csBundle(1).lsrc(1) := 0.U 1700 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1701 csBundle(1).fuType := FuType.i2v.U 1702 csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1703 csBundle(1).rfWen := false.B 1704 csBundle(1).fpWen := false.B 1705 csBundle(1).vecWen := true.B 1706 csBundle(1).vlsInstr := true.B 1707 1708 //LMUL 1709 for (i <- 0 until MAX_VLMUL) { 1710 csBundle(i + 2).srcType(0) := SrcType.vp 1711 csBundle(i + 2).srcType(1) := SrcType.vp 1712 csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1713 csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1714 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1715 csBundle(i + 2).ldest := dest + i.U 1716 csBundle(i + 2).uopIdx := i.U 1717 csBundle(i + 2).vlsInstr := true.B 1718 } 1719 csBundle.head.waitForward := isSdSegment 1720 csBundle(numOfUop - 1.U).blockBackward := isSdSegment 1721 } 1722 is(UopSplitType.VEC_I_LDST) { 1723 def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={ 1724 for (i <- 0 until MAX_VLMUL) { 1725 val vecWen = if (i < lmul * nf) true.B else false.B 1726 val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no 1727 csBundle(i + 1).srcType(0) := SrcType.vp 1728 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1729 csBundle(i + 1).srcType(1) := SrcType.no 1730 csBundle(i + 1).lsrc(1) := src2 + i.U 1731 csBundle(i + 1).srcType(2) := src2Type 1732 csBundle(i + 1).lsrc(2) := dest + i.U 1733 csBundle(i + 1).ldest := dest + i.U 1734 csBundle(i + 1).rfWen := false.B 1735 csBundle(i + 1).fpWen := false.B 1736 csBundle(i + 1).vecWen := vecWen 1737 csBundle(i + 1).uopIdx := i.U 1738 csBundle(i + 1).vlsInstr := true.B 1739 } 1740 } 1741 def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={ 1742 for (i <- 0 until MAX_VLMUL) { 1743 val src1Type = if (i < emul) SrcType.vp else SrcType.no 1744 csBundle(i + 1).srcType(1) := src1Type 1745 csBundle(i + 1).lsrc(1) := src2 + i.U 1746 } 1747 } 1748 1749 val vlmul = vlmulReg 1750 val vsew = Cat(0.U(1.W), vsewReg) 1751 val veew = Cat(0.U(1.W), width) 1752 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1753 val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array( 1754 "b001".U -> 1.U, 1755 "b010".U -> 2.U, 1756 "b011".U -> 3.U 1757 )) 1758 val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array( 1759 "b001".U -> 1.U, 1760 "b010".U -> 2.U, 1761 "b011".U -> 3.U 1762 )) 1763 csBundle(0).srcType(0) := SrcType.reg 1764 csBundle(0).srcType(1) := SrcType.imm 1765 csBundle(0).lsrc(1) := 0.U 1766 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1767 csBundle(0).fuType := FuType.i2v.U 1768 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1769 csBundle(0).rfWen := false.B 1770 csBundle(0).fpWen := false.B 1771 csBundle(0).vecWen := true.B 1772 csBundle(0).vlsInstr := true.B 1773 1774 //LMUL 1775 when(nf === 0.U) { 1776 for (i <- 0 until MAX_VLMUL) { 1777 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul) 1778 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1779 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1780 csBundle(i + 1).srcType(0) := SrcType.vp 1781 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1782 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1783 csBundle(i + 1).srcType(2) := SrcType.vp 1784 // lsrc2 is old vd 1785 csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1786 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1787 csBundle(i + 1).uopIdx := i.U 1788 csBundle(i + 1).vlsInstr := true.B 1789 } 1790 }.otherwise{ 1791 // nf > 1, is segment indexed load/store 1792 // gen src0, vd 1793 switch(simple_lmul) { 1794 is(0.U) { 1795 switch(nf) { 1796 is(1.U) { 1797 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2) 1798 } 1799 is(2.U) { 1800 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3) 1801 } 1802 is(3.U) { 1803 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4) 1804 } 1805 is(4.U) { 1806 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5) 1807 } 1808 is(5.U) { 1809 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6) 1810 } 1811 is(6.U) { 1812 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7) 1813 } 1814 is(7.U) { 1815 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8) 1816 } 1817 } 1818 } 1819 is(1.U) { 1820 switch(nf) { 1821 is(1.U) { 1822 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2) 1823 } 1824 is(2.U) { 1825 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3) 1826 } 1827 is(3.U) { 1828 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4) 1829 } 1830 } 1831 } 1832 is(2.U) { 1833 switch(nf) { 1834 is(1.U) { 1835 genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2) 1836 } 1837 } 1838 } 1839 } 1840 1841 // gen src1 1842 switch(simple_emul) { 1843 is(0.U) { 1844 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1) 1845 } 1846 is(1.U) { 1847 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2) 1848 } 1849 is(2.U) { 1850 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4) 1851 } 1852 is(3.U) { 1853 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8) 1854 } 1855 } 1856 1857 // when is vstore instructions, not set vecwen 1858 when(isVstore) { 1859 for (i <- 0 until MAX_VLMUL) { 1860 csBundle(i + 1).vecWen := false.B 1861 } 1862 } 1863 } 1864 csBundle.head.waitForward := isIxSegment 1865 csBundle(numOfUop - 1.U).blockBackward := isIxSegment 1866 } 1867 } 1868 1869 //readyFromRename Counter 1870 val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U) 1871 1872 // The left uops of the complex inst in ComplexDecoder can be send out this cycle 1873 val thisAllOut = uopRes <= readyCounter 1874 1875 switch(state) { 1876 is(s_idle) { 1877 when (inValid) { 1878 stateNext := s_active 1879 uopResNext := inUopInfo.numOfUop 1880 } 1881 } 1882 is(s_active) { 1883 when (thisAllOut) { 1884 when (inValid) { 1885 stateNext := s_active 1886 uopResNext := inUopInfo.numOfUop 1887 }.otherwise { 1888 stateNext := s_idle 1889 uopResNext := 0.U 1890 } 1891 }.otherwise { 1892 stateNext := s_active 1893 uopResNext := uopRes - readyCounter 1894 } 1895 } 1896 } 1897 1898 state := Mux(io.redirect, s_idle, stateNext) 1899 uopRes := Mux(io.redirect, 0.U, uopResNext) 1900 1901 val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes) 1902 1903 for(i <- 0 until RenameWidth) { 1904 outValids(i) := complexNum > i.U 1905 outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1)) 1906 } 1907 1908 outComplexNum := Mux(state === s_active, complexNum, 0.U) 1909 inReady := state === s_idle || state === s_active && thisAllOut 1910 1911// val validSimple = Wire(Vec(DecodeWidth, Bool())) 1912// validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1913// val notInf = Wire(Vec(DecodeWidth, Bool())) 1914// notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1915// notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1916// val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1917// notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1918// 1919// complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1920// Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1921// 0.U) 1922// validToRename.zipWithIndex.foreach{ 1923// case(dst, i) => 1924// val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1925// dst := MuxCase(false.B, Seq( 1926// (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1927// (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1928// ).toSeq) 1929// } 1930// 1931// readyToIBuf.zipWithIndex.foreach { 1932// case (dst, i) => 1933// val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1934// dst := MuxCase(true.B, Seq( 1935// (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1936// (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1937// ).toSeq) 1938// } 1939// 1940// io.deq.decodedInsts := decodedInsts 1941// io.deq.complexNum := complexNum 1942// io.deq.validToRename := validToRename 1943// io.deq.readyToIBuf := readyToIBuf 1944} 1945