1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul} 34import yunsuan.VpermType 35import scala.collection.Seq 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(4.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={ 43 // only consider non segment indexed load/store 44 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 45 var offset = 1 << (emul - lmul) 46 for (i <- 0 until (1 << emul)) { 47 if (uopIdx == i) { 48 return (i, i / offset) 49 } 50 } 51 } else { // lmul > emul, uop num is depend on lmul * nf 52 var offset = 1 << (lmul - emul) 53 for (i <- 0 until (1 << lmul)) { 54 if (uopIdx == i) { 55 return (i / offset, i) 56 } 57 } 58 } 59 return (0, 0) 60 } 61 // strided load/store 62 var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq() 63 for (emul <- 0 until 4) { 64 for (lmul <- 0 until 4) { 65 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx) 66 var offsetVs2 = offset._1 67 var offsetVd = offset._2 68 combVemulNf :+= (emul, lmul, offsetVs2, offsetVd) 69 } 70 } 71 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 72 case (emul, lmul, offsetVs2, offsetVd) => 73 (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W))) 74 }, BitPat.N(6))) 75 outOffsetVs2 := out(5, 3) 76 outOffsetVd := out(2, 0) 77} 78 79trait VectorConstants { 80 val MAX_VLMUL = 8 81 val VECTOR_TMP_REG_LMUL = 33 // 33~47 -> 15 82 val VECTOR_COMPRESS = 1 // in v0 regfile 83 val MAX_INDEXED_LS_UOPNUM = 64 84} 85 86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 87 val redirect = Input(Bool()) 88 val csrCtrl = Input(new CustomCSRCtrlIO) 89 val vtypeBypass = Input(new VType) 90 // When the first inst in decode vector is complex inst, pass it in 91 val in = Flipped(DecoupledIO(new Bundle { 92 val simpleDecodedInst = new DecodedInst 93 val uopInfo = new UopInfo 94 })) 95 val out = new Bundle { 96 val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst)) 97 } 98 val complexNum = Output(UInt(3.W)) 99} 100 101/** 102 * @author zly 103 */ 104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 105 val io = IO(new DecodeUnitCompIO) 106 107 // alias 108 private val inReady = io.in.ready 109 private val inValid = io.in.valid 110 private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst) 111 private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields) 112 private val inUopInfo = io.in.bits.uopInfo 113 private val outValids = io.out.complexDecodedInsts.map(_.valid) 114 private val outReadys = io.out.complexDecodedInsts.map(_.ready) 115 private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits) 116 private val outComplexNum = io.complexNum 117 118 val maxUopSize = MaxUopSize 119 when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) { 120 when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) { 121 inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType) 122 }.elsewhen(inInstFields.RS1 === 0.U) { 123 inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType) 124 } 125 } 126 127 val latchedInst = RegEnable(inDecodedInst, inValid && inReady) 128 val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady) 129 //input bits 130 private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields) 131 132 val src1 = Cat(0.U(1.W), instFields.RS1) 133 val src2 = Cat(0.U(1.W), instFields.RS2) 134 val dest = Cat(0.U(1.W), instFields.RD) 135 136 val nf = instFields.NF 137 val width = instFields.WIDTH(1, 0) 138 139 //output of DecodeUnit 140 val numOfUop = Wire(UInt(log2Up(maxUopSize).W)) 141 val numOfWB = Wire(UInt(log2Up(maxUopSize).W)) 142 val lmul = Wire(UInt(4.W)) 143 val isVsetSimple = Wire(Bool()) 144 145 val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i))) 146 indexedLSRegOffset.map(_.src := 0.U) 147 148 //pre decode 149 lmul := latchedUopInfo.lmul 150 isVsetSimple := latchedInst.isVset 151 val vlmulReg = latchedInst.vpu.vlmul 152 val vsewReg = latchedInst.vpu.vsew 153 154 //Type of uop Div 155 val typeOfSplit = latchedInst.uopSplitType 156 val src1Type = latchedInst.srcType(0) 157 val src1IsImm = src1Type === SrcType.imm 158 val src1IsFp = src1Type === SrcType.fp 159 160 val isVstore = FuType.isVStore(latchedInst.fuType) 161 162 numOfUop := latchedUopInfo.numOfUop 163 numOfWB := latchedUopInfo.numOfWB 164 165 //uops dispatch 166 val s_idle :: s_active :: Nil = Enum(2) 167 val state = RegInit(s_idle) 168 val stateNext = WireDefault(state) 169 val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W)) 170 val uopRes = RegInit(0.U(log2Up(maxUopSize).W)) 171 val uopResNext = WireInit(uopRes) 172 val e64 = 3.U(2.W) 173 val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U) 174 val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U 175 val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U 176 177 //uop div up to maxUopSize 178 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 179 csBundle.foreach { case dst => 180 dst := latchedInst 181 dst.numUops := latchedUopInfo.numOfUop 182 dst.numWB := latchedUopInfo.numOfWB 183 dst.firstUop := false.B 184 dst.lastUop := false.B 185 dst.vlsInstr := false.B 186 } 187 188 csBundle(0).firstUop := true.B 189 csBundle(numOfUop - 1.U).lastUop := true.B 190 191 switch(typeOfSplit) { 192 is(UopSplitType.VSET) { 193 // In simple decoder, rfWen and vecWen are not set 194 when(isVsetSimple) { 195 // Default 196 // uop0 set rd, never flushPipe 197 csBundle(0).fuType := FuType.vsetiwi.U 198 csBundle(0).flushPipe := false.B 199 csBundle(0).rfWen := true.B 200 // uop1 set vl, vsetvl will flushPipe 201 csBundle(1).ldest := Vl_IDX.U 202 csBundle(1).vecWen := false.B 203 csBundle(1).vlWen := true.B 204 when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 205 // write nothing, uop0 is a nop instruction 206 csBundle(0).rfWen := false.B 207 csBundle(0).fpWen := false.B 208 csBundle(0).vecWen := false.B 209 csBundle(0).vlWen := false.B 210 csBundle(1).fuType := FuType.vsetfwf.U 211 csBundle(1).srcType(0) := SrcType.vp 212 csBundle(1).lsrc(0) := Vl_IDX.U 213 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 214 // uop0: mv vtype gpr to vector region 215 csBundle(0).srcType(0) := SrcType.xp 216 csBundle(0).srcType(1) := SrcType.no 217 csBundle(0).lsrc(0) := src2 218 csBundle(0).lsrc(1) := 0.U 219 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 220 csBundle(0).fuType := FuType.i2v.U 221 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 222 csBundle(0).rfWen := false.B 223 csBundle(0).fpWen := false.B 224 csBundle(0).vecWen := true.B 225 csBundle(0).vlWen := false.B 226 csBundle(0).flushPipe := false.B 227 // uop1: uvsetvcfg_vv 228 csBundle(1).fuType := FuType.vsetfwf.U 229 // vl 230 csBundle(1).srcType(0) := SrcType.vp 231 csBundle(1).lsrc(0) := Vl_IDX.U 232 // vtype 233 csBundle(1).srcType(1) := SrcType.vp 234 csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U 235 csBundle(1).vecWen := false.B 236 csBundle(1).vlWen := true.B 237 csBundle(1).ldest := Vl_IDX.U 238 }.elsewhen(dest === 0.U) { 239 // write nothing, uop0 is a nop instruction 240 csBundle(0).rfWen := false.B 241 csBundle(0).fpWen := false.B 242 csBundle(0).vecWen := false.B 243 csBundle(0).vlWen := false.B 244 } 245 // use bypass vtype from vtypeGen 246 csBundle(0).vpu.connectVType(io.vtypeBypass) 247 csBundle(1).vpu.connectVType(io.vtypeBypass) 248 } 249 } 250 is(UopSplitType.VEC_VVV) { 251 for (i <- 0 until MAX_VLMUL) { 252 csBundle(i).lsrc(0) := src1 + i.U 253 csBundle(i).lsrc(1) := src2 + i.U 254 csBundle(i).lsrc(2) := dest + i.U 255 csBundle(i).ldest := dest + i.U 256 csBundle(i).uopIdx := i.U 257 } 258 } 259 is(UopSplitType.VEC_VFV) { 260 /* 261 f to vector move 262 */ 263 csBundle(0).srcType(0) := SrcType.fp 264 csBundle(0).srcType(1) := SrcType.imm 265 csBundle(0).srcType(2) := SrcType.imm 266 csBundle(0).lsrc(1) := 0.U 267 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 268 csBundle(0).fuType := FuType.f2v.U 269 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 270 csBundle(0).vecWen := true.B 271 csBundle(0).vpu.isReverse := false.B 272 /* 273 LMUL 274 */ 275 for (i <- 0 until MAX_VLMUL) { 276 csBundle(i + 1).srcType(0) := SrcType.vp 277 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 278 csBundle(i + 1).lsrc(1) := src2 + i.U 279 csBundle(i + 1).lsrc(2) := dest + i.U 280 csBundle(i + 1).ldest := dest + i.U 281 csBundle(i + 1).uopIdx := i.U 282 } 283 } 284 is(UopSplitType.VEC_EXT2) { 285 for (i <- 0 until MAX_VLMUL / 2) { 286 csBundle(2 * i).lsrc(1) := src2 + i.U 287 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 288 csBundle(2 * i).ldest := dest + (2 * i).U 289 csBundle(2 * i).uopIdx := (2 * i).U 290 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 291 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 292 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 293 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 294 } 295 } 296 is(UopSplitType.VEC_EXT4) { 297 for (i <- 0 until MAX_VLMUL / 4) { 298 csBundle(4 * i).lsrc(1) := src2 + i.U 299 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 300 csBundle(4 * i).ldest := dest + (4 * i).U 301 csBundle(4 * i).uopIdx := (4 * i).U 302 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 303 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 304 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 305 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 306 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 307 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 308 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 309 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 310 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 311 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 312 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 313 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 314 } 315 } 316 is(UopSplitType.VEC_EXT8) { 317 for (i <- 0 until MAX_VLMUL) { 318 csBundle(i).lsrc(1) := src2 319 csBundle(i).lsrc(2) := dest + i.U 320 csBundle(i).ldest := dest + i.U 321 csBundle(i).uopIdx := i.U 322 } 323 } 324 is(UopSplitType.VEC_0XV) { 325 /* 326 i/f to vector move 327 */ 328 csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg) 329 csBundle(0).srcType(1) := SrcType.imm 330 csBundle(0).srcType(2) := SrcType.imm 331 csBundle(0).lsrc(1) := 0.U 332 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 333 csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U) 334 csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 335 csBundle(0).rfWen := false.B 336 csBundle(0).fpWen := false.B 337 csBundle(0).vecWen := true.B 338 /* 339 vmv.s.x 340 */ 341 csBundle(1).srcType(0) := SrcType.vp 342 csBundle(1).srcType(1) := SrcType.imm 343 csBundle(1).srcType(2) := SrcType.vp 344 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 345 csBundle(1).lsrc(1) := 0.U 346 csBundle(1).lsrc(2) := dest 347 csBundle(1).ldest := dest 348 csBundle(1).rfWen := false.B 349 csBundle(1).fpWen := false.B 350 csBundle(1).vecWen := true.B 351 csBundle(1).uopIdx := 0.U 352 } 353 is(UopSplitType.VEC_VXV) { 354 /* 355 i to vector move 356 */ 357 csBundle(0).srcType(0) := SrcType.reg 358 csBundle(0).srcType(1) := SrcType.imm 359 csBundle(0).srcType(2) := SrcType.imm 360 csBundle(0).lsrc(1) := 0.U 361 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 362 csBundle(0).fuType := FuType.i2v.U 363 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 364 csBundle(0).vecWen := true.B 365 csBundle(0).vpu.isReverse := false.B 366 /* 367 LMUL 368 */ 369 for (i <- 0 until MAX_VLMUL) { 370 csBundle(i + 1).srcType(0) := SrcType.vp 371 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 372 csBundle(i + 1).lsrc(1) := src2 + i.U 373 csBundle(i + 1).lsrc(2) := dest + i.U 374 csBundle(i + 1).ldest := dest + i.U 375 csBundle(i + 1).uopIdx := i.U 376 } 377 } 378 is(UopSplitType.VEC_VVW) { 379 for (i <- 0 until MAX_VLMUL / 2) { 380 csBundle(2 * i).lsrc(0) := src1 + i.U 381 csBundle(2 * i).lsrc(1) := src2 + i.U 382 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 383 csBundle(2 * i).ldest := dest + (2 * i).U 384 csBundle(2 * i).uopIdx := (2 * i).U 385 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 386 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 387 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 388 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 389 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 390 } 391 } 392 is(UopSplitType.VEC_VFW) { 393 /* 394 f to vector move 395 */ 396 csBundle(0).srcType(0) := SrcType.fp 397 csBundle(0).srcType(1) := SrcType.imm 398 csBundle(0).srcType(2) := SrcType.imm 399 csBundle(0).lsrc(1) := 0.U 400 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 401 csBundle(0).fuType := FuType.f2v.U 402 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 403 csBundle(0).rfWen := false.B 404 csBundle(0).fpWen := false.B 405 csBundle(0).vecWen := true.B 406 407 for (i <- 0 until MAX_VLMUL / 2) { 408 csBundle(2 * i + 1).srcType(0) := SrcType.vp 409 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 410 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 411 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 412 csBundle(2 * i + 1).ldest := dest + (2 * i).U 413 csBundle(2 * i + 1).uopIdx := (2 * i).U 414 csBundle(2 * i + 2).srcType(0) := SrcType.vp 415 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 416 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 417 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 418 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 419 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 420 } 421 } 422 is(UopSplitType.VEC_WVW) { 423 for (i <- 0 until MAX_VLMUL / 2) { 424 csBundle(2 * i).lsrc(0) := src1 + i.U 425 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 426 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 427 csBundle(2 * i).ldest := dest + (2 * i).U 428 csBundle(2 * i).uopIdx := (2 * i).U 429 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 430 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 431 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 432 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 433 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 434 } 435 } 436 is(UopSplitType.VEC_VXW) { 437 /* 438 i to vector move 439 */ 440 csBundle(0).srcType(0) := SrcType.reg 441 csBundle(0).srcType(1) := SrcType.imm 442 csBundle(0).srcType(2) := SrcType.imm 443 csBundle(0).lsrc(1) := 0.U 444 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 445 csBundle(0).fuType := FuType.i2v.U 446 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 447 csBundle(0).vecWen := true.B 448 449 for (i <- 0 until MAX_VLMUL / 2) { 450 csBundle(2 * i + 1).srcType(0) := SrcType.vp 451 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 452 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 453 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 454 csBundle(2 * i + 1).ldest := dest + (2 * i).U 455 csBundle(2 * i + 1).uopIdx := (2 * i).U 456 csBundle(2 * i + 2).srcType(0) := SrcType.vp 457 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 458 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 459 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 460 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 461 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 462 } 463 } 464 is(UopSplitType.VEC_WXW) { 465 /* 466 i to vector move 467 */ 468 csBundle(0).srcType(0) := SrcType.reg 469 csBundle(0).srcType(1) := SrcType.imm 470 csBundle(0).srcType(2) := SrcType.imm 471 csBundle(0).lsrc(1) := 0.U 472 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 473 csBundle(0).fuType := FuType.i2v.U 474 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 475 csBundle(0).vecWen := true.B 476 477 for (i <- 0 until MAX_VLMUL / 2) { 478 csBundle(2 * i + 1).srcType(0) := SrcType.vp 479 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 480 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 481 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 482 csBundle(2 * i + 1).ldest := dest + (2 * i).U 483 csBundle(2 * i + 1).uopIdx := (2 * i).U 484 csBundle(2 * i + 2).srcType(0) := SrcType.vp 485 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 486 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 487 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 488 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 489 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 490 } 491 } 492 is(UopSplitType.VEC_WVV) { 493 for (i <- 0 until MAX_VLMUL / 2) { 494 495 csBundle(2 * i).lsrc(0) := src1 + i.U 496 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 497 csBundle(2 * i).lsrc(2) := dest + i.U 498 csBundle(2 * i).ldest := dest + i.U 499 csBundle(2 * i).uopIdx := (2 * i).U 500 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 501 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 502 csBundle(2 * i + 1).lsrc(2) := dest + i.U 503 csBundle(2 * i + 1).ldest := dest + i.U 504 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 505 } 506 } 507 is(UopSplitType.VEC_WFW) { 508 /* 509 f to vector move 510 */ 511 csBundle(0).srcType(0) := SrcType.fp 512 csBundle(0).srcType(1) := SrcType.imm 513 csBundle(0).srcType(2) := SrcType.imm 514 csBundle(0).lsrc(1) := 0.U 515 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 516 csBundle(0).fuType := FuType.f2v.U 517 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 518 csBundle(0).rfWen := false.B 519 csBundle(0).fpWen := false.B 520 csBundle(0).vecWen := true.B 521 522 for (i <- 0 until MAX_VLMUL / 2) { 523 csBundle(2 * i + 1).srcType(0) := SrcType.vp 524 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 525 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 526 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 527 csBundle(2 * i + 1).ldest := dest + (2 * i).U 528 csBundle(2 * i + 1).uopIdx := (2 * i).U 529 csBundle(2 * i + 2).srcType(0) := SrcType.vp 530 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 531 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 532 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 533 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 534 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 535 } 536 } 537 is(UopSplitType.VEC_WXV) { 538 /* 539 i to vector move 540 */ 541 csBundle(0).srcType(0) := SrcType.reg 542 csBundle(0).srcType(1) := SrcType.imm 543 csBundle(0).srcType(2) := SrcType.imm 544 csBundle(0).lsrc(1) := 0.U 545 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 546 csBundle(0).fuType := FuType.i2v.U 547 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 548 csBundle(0).vecWen := true.B 549 550 for (i <- 0 until MAX_VLMUL / 2) { 551 csBundle(2 * i + 1).srcType(0) := SrcType.vp 552 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 553 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 554 csBundle(2 * i + 1).lsrc(2) := dest + i.U 555 csBundle(2 * i + 1).ldest := dest + i.U 556 csBundle(2 * i + 1).uopIdx := (2 * i).U 557 csBundle(2 * i + 2).srcType(0) := SrcType.vp 558 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 559 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 560 csBundle(2 * i + 2).lsrc(2) := dest + i.U 561 csBundle(2 * i + 2).ldest := dest + i.U 562 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 563 } 564 } 565 is(UopSplitType.VEC_VVM) { 566 csBundle(0).lsrc(2) := dest 567 csBundle(0).ldest := dest 568 csBundle(0).uopIdx := 0.U 569 for (i <- 1 until MAX_VLMUL) { 570 csBundle(i).lsrc(0) := src1 + i.U 571 csBundle(i).lsrc(1) := src2 + i.U 572 csBundle(i).lsrc(2) := dest 573 csBundle(i).ldest := dest 574 csBundle(i).uopIdx := i.U 575 } 576 } 577 is(UopSplitType.VEC_VFM) { 578 /* 579 f to vector move 580 */ 581 csBundle(0).srcType(0) := SrcType.fp 582 csBundle(0).srcType(1) := SrcType.imm 583 csBundle(0).srcType(2) := SrcType.imm 584 csBundle(0).lsrc(1) := 0.U 585 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 586 csBundle(0).fuType := FuType.f2v.U 587 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 588 csBundle(0).rfWen := false.B 589 csBundle(0).fpWen := false.B 590 csBundle(0).vecWen := true.B 591 //LMUL 592 csBundle(1).srcType(0) := SrcType.vp 593 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 594 csBundle(1).lsrc(2) := dest 595 csBundle(1).ldest := dest 596 csBundle(1).uopIdx := 0.U 597 for (i <- 1 until MAX_VLMUL) { 598 csBundle(i + 1).srcType(0) := SrcType.vp 599 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 600 csBundle(i + 1).lsrc(1) := src2 + i.U 601 csBundle(i + 1).lsrc(2) := dest 602 csBundle(i + 1).ldest := dest 603 csBundle(i + 1).uopIdx := i.U 604 } 605 csBundle(numOfUop - 1.U).ldest := dest 606 } 607 is(UopSplitType.VEC_VXM) { 608 /* 609 i to vector move 610 */ 611 csBundle(0).srcType(0) := SrcType.reg 612 csBundle(0).srcType(1) := SrcType.imm 613 csBundle(0).srcType(2) := SrcType.imm 614 csBundle(0).lsrc(1) := 0.U 615 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 616 csBundle(0).fuType := FuType.i2v.U 617 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 618 csBundle(0).vecWen := true.B 619 //LMUL 620 csBundle(1).srcType(0) := SrcType.vp 621 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 622 csBundle(1).lsrc(2) := dest 623 csBundle(1).ldest := dest 624 csBundle(1).uopIdx := 0.U 625 for (i <- 1 until MAX_VLMUL) { 626 csBundle(i + 1).srcType(0) := SrcType.vp 627 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 628 csBundle(i + 1).lsrc(1) := src2 + i.U 629 csBundle(i + 1).lsrc(2) := dest 630 csBundle(i + 1).ldest := dest 631 csBundle(i + 1).uopIdx := i.U 632 } 633 csBundle(numOfUop - 1.U).ldest := dest 634 } 635 is(UopSplitType.VEC_SLIDE1UP) { 636 /* 637 i to vector move 638 */ 639 csBundle(0).srcType(0) := SrcType.reg 640 csBundle(0).srcType(1) := SrcType.imm 641 csBundle(0).srcType(2) := SrcType.imm 642 csBundle(0).lsrc(1) := 0.U 643 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 644 csBundle(0).fuType := FuType.i2v.U 645 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 646 csBundle(0).vecWen := true.B 647 //LMUL 648 csBundle(1).srcType(0) := SrcType.vp 649 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 650 csBundle(1).lsrc(2) := dest 651 csBundle(1).ldest := dest 652 csBundle(1).uopIdx := 0.U 653 for (i <- 1 until MAX_VLMUL) { 654 csBundle(i + 1).srcType(0) := SrcType.vp 655 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 656 csBundle(i + 1).lsrc(1) := src2 + i.U 657 csBundle(i + 1).lsrc(2) := dest + i.U 658 csBundle(i + 1).ldest := dest + i.U 659 csBundle(i + 1).uopIdx := i.U 660 } 661 } 662 is(UopSplitType.VEC_FSLIDE1UP) { 663 /* 664 f to vector move 665 */ 666 csBundle(0).srcType(0) := SrcType.fp 667 csBundle(0).srcType(1) := SrcType.imm 668 csBundle(0).srcType(2) := SrcType.imm 669 csBundle(0).lsrc(1) := 0.U 670 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 671 csBundle(0).fuType := FuType.f2v.U 672 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 673 csBundle(0).rfWen := false.B 674 csBundle(0).fpWen := false.B 675 csBundle(0).vecWen := true.B 676 //LMUL 677 csBundle(1).srcType(0) := SrcType.vp 678 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 679 csBundle(1).lsrc(1) := src2 680 csBundle(1).lsrc(2) := dest 681 csBundle(1).ldest := dest 682 csBundle(1).uopIdx := 0.U 683 for (i <- 1 until MAX_VLMUL) { 684 csBundle(i + 1).srcType(0) := SrcType.vp 685 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 686 csBundle(i + 1).lsrc(1) := src2 + i.U 687 csBundle(i + 1).lsrc(2) := dest + i.U 688 csBundle(i + 1).ldest := dest + i.U 689 csBundle(i + 1).uopIdx := i.U 690 } 691 } 692 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 693 /* 694 i to vector move 695 */ 696 csBundle(0).srcType(0) := SrcType.reg 697 csBundle(0).srcType(1) := SrcType.imm 698 csBundle(0).srcType(2) := SrcType.imm 699 csBundle(0).lsrc(1) := 0.U 700 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 701 csBundle(0).fuType := FuType.i2v.U 702 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 703 csBundle(0).vecWen := true.B 704 //LMUL 705 for (i <- 0 until MAX_VLMUL) { 706 csBundle(2 * i + 1).srcType(0) := SrcType.vp 707 csBundle(2 * i + 1).srcType(1) := SrcType.vp 708 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 709 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 710 csBundle(2 * i + 1).lsrc(2) := dest + i.U 711 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 712 csBundle(2 * i + 1).uopIdx := (2 * i).U 713 if (2 * i + 2 < MAX_VLMUL * 2) { 714 csBundle(2 * i + 2).srcType(0) := SrcType.vp 715 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 716 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 717 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 718 csBundle(2 * i + 2).ldest := dest + i.U 719 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 720 } 721 } 722 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 723 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 724 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 725 } 726 is(UopSplitType.VEC_FSLIDE1DOWN) { 727 /* 728 f to vector move 729 */ 730 csBundle(0).srcType(0) := SrcType.fp 731 csBundle(0).srcType(1) := SrcType.imm 732 csBundle(0).srcType(2) := SrcType.imm 733 csBundle(0).lsrc(1) := 0.U 734 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 735 csBundle(0).fuType := FuType.f2v.U 736 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 737 csBundle(0).rfWen := false.B 738 csBundle(0).fpWen := false.B 739 csBundle(0).vecWen := true.B 740 //LMUL 741 for (i <- 0 until MAX_VLMUL) { 742 csBundle(2 * i + 1).srcType(0) := SrcType.vp 743 csBundle(2 * i + 1).srcType(1) := SrcType.vp 744 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 745 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 746 csBundle(2 * i + 1).lsrc(2) := dest + i.U 747 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 748 csBundle(2 * i + 1).uopIdx := (2 * i).U 749 if (2 * i + 2 < MAX_VLMUL * 2) { 750 csBundle(2 * i + 2).srcType(0) := SrcType.vp 751 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 752 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 753 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 754 csBundle(2 * i + 2).ldest := dest + i.U 755 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 756 } 757 } 758 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 759 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 760 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 761 } 762 is(UopSplitType.VEC_VRED) { 763 when(vlmulReg === "b001".U) { 764 csBundle(0).srcType(2) := SrcType.DC 765 csBundle(0).lsrc(0) := src2 + 1.U 766 csBundle(0).lsrc(1) := src2 767 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 768 csBundle(0).uopIdx := 0.U 769 } 770 when(vlmulReg === "b010".U) { 771 csBundle(0).srcType(2) := SrcType.DC 772 csBundle(0).lsrc(0) := src2 + 1.U 773 csBundle(0).lsrc(1) := src2 774 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 775 csBundle(0).uopIdx := 0.U 776 777 csBundle(1).srcType(2) := SrcType.DC 778 csBundle(1).lsrc(0) := src2 + 3.U 779 csBundle(1).lsrc(1) := src2 + 2.U 780 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 781 csBundle(1).uopIdx := 1.U 782 783 csBundle(2).srcType(2) := SrcType.DC 784 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 785 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 786 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 787 csBundle(2).uopIdx := 2.U 788 } 789 when(vlmulReg === "b011".U) { 790 for (i <- 0 until MAX_VLMUL) { 791 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 792 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 793 csBundle(i).lsrc(1) := src2 + (i * 2).U 794 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 795 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 796 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 797 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 798 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 799 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 800 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 801 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 802 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 803 } 804 csBundle(i).srcType(2) := SrcType.DC 805 csBundle(i).uopIdx := i.U 806 } 807 } 808 when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) { 809 /* 810 * 2 <= vlmul <= 8 811 */ 812 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 813 csBundle(numOfUop - 1.U).lsrc(0) := src1 814 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 815 csBundle(numOfUop - 1.U).lsrc(2) := dest 816 csBundle(numOfUop - 1.U).ldest := dest 817 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 818 } 819 } 820 is(UopSplitType.VEC_VFRED) { 821 val vlmul = vlmulReg 822 val vsew = vsewReg 823 when(vlmul === VLmul.m8){ 824 for (i <- 0 until 4) { 825 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 826 csBundle(i).lsrc(1) := src2 + (i * 2).U 827 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 828 csBundle(i).uopIdx := i.U 829 } 830 for (i <- 4 until 6) { 831 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 832 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 833 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 834 csBundle(i).uopIdx := i.U 835 } 836 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 837 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 838 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 839 csBundle(6).uopIdx := 6.U 840 when(vsew === VSew.e64) { 841 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 842 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 843 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 844 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 845 csBundle(7).uopIdx := 7.U 846 csBundle(8).lsrc(0) := src1 847 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 848 csBundle(8).ldest := dest 849 csBundle(8).uopIdx := 8.U 850 } 851 when(vsew === VSew.e32) { 852 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 853 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 854 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 855 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 856 csBundle(7).uopIdx := 7.U 857 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 858 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 859 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 860 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 861 csBundle(8).uopIdx := 8.U 862 csBundle(9).lsrc(0) := src1 863 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 864 csBundle(9).ldest := dest 865 csBundle(9).uopIdx := 9.U 866 } 867 when(vsew === VSew.e16) { 868 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 869 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 870 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 871 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 872 csBundle(7).uopIdx := 7.U 873 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 874 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 875 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 876 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 877 csBundle(8).uopIdx := 8.U 878 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 879 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 880 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 881 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 882 csBundle(9).uopIdx := 9.U 883 csBundle(10).lsrc(0) := src1 884 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 885 csBundle(10).ldest := dest 886 csBundle(10).uopIdx := 10.U 887 } 888 } 889 when(vlmul === VLmul.m4) { 890 for (i <- 0 until 2) { 891 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 892 csBundle(i).lsrc(1) := src2 + (i * 2).U 893 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 894 csBundle(i).uopIdx := i.U 895 } 896 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 897 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 898 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 899 csBundle(2).uopIdx := 2.U 900 when(vsew === VSew.e64) { 901 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 902 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 903 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 904 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 905 csBundle(3).uopIdx := 3.U 906 csBundle(4).lsrc(0) := src1 907 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 908 csBundle(4).ldest := dest 909 csBundle(4).uopIdx := 4.U 910 } 911 when(vsew === VSew.e32) { 912 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 913 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 914 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 915 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 916 csBundle(3).uopIdx := 3.U 917 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 918 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 919 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 920 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 921 csBundle(4).uopIdx := 4.U 922 csBundle(5).lsrc(0) := src1 923 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 924 csBundle(5).ldest := dest 925 csBundle(5).uopIdx := 5.U 926 } 927 when(vsew === VSew.e16) { 928 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 929 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 930 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 931 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 932 csBundle(3).uopIdx := 3.U 933 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 934 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 935 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 936 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 937 csBundle(4).uopIdx := 4.U 938 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 939 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 940 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 941 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 942 csBundle(5).uopIdx := 5.U 943 csBundle(6).lsrc(0) := src1 944 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 945 csBundle(6).ldest := dest 946 csBundle(6).uopIdx := 6.U 947 } 948 } 949 when(vlmul === VLmul.m2) { 950 csBundle(0).lsrc(0) := src2 + 1.U 951 csBundle(0).lsrc(1) := src2 + 0.U 952 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 953 csBundle(0).uopIdx := 0.U 954 when(vsew === VSew.e64) { 955 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 956 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 957 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 958 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 959 csBundle(1).uopIdx := 1.U 960 csBundle(2).lsrc(0) := src1 961 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 962 csBundle(2).ldest := dest 963 csBundle(2).uopIdx := 2.U 964 } 965 when(vsew === VSew.e32) { 966 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 967 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 968 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 969 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 970 csBundle(1).uopIdx := 1.U 971 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 972 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 973 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 974 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 975 csBundle(2).uopIdx := 2.U 976 csBundle(3).lsrc(0) := src1 977 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 978 csBundle(3).ldest := dest 979 csBundle(3).uopIdx := 3.U 980 } 981 when(vsew === VSew.e16) { 982 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 983 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 984 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 985 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 986 csBundle(1).uopIdx := 1.U 987 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 988 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 989 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 990 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 991 csBundle(2).uopIdx := 2.U 992 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 993 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 994 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 995 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 996 csBundle(3).uopIdx := 3.U 997 csBundle(4).lsrc(0) := src1 998 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 999 csBundle(4).ldest := dest 1000 csBundle(4).uopIdx := 4.U 1001 } 1002 } 1003 when(vlmul === VLmul.m1) { 1004 when(vsew === VSew.e64) { 1005 csBundle(0).lsrc(0) := src2 1006 csBundle(0).lsrc(1) := src2 1007 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1008 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1009 csBundle(0).uopIdx := 0.U 1010 csBundle(1).lsrc(0) := src1 1011 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1012 csBundle(1).ldest := dest 1013 csBundle(1).uopIdx := 1.U 1014 } 1015 when(vsew === VSew.e32) { 1016 csBundle(0).lsrc(0) := src2 1017 csBundle(0).lsrc(1) := src2 1018 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1019 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1020 csBundle(0).uopIdx := 0.U 1021 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1022 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1023 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1024 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1025 csBundle(1).uopIdx := 1.U 1026 csBundle(2).lsrc(0) := src1 1027 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1028 csBundle(2).ldest := dest 1029 csBundle(2).uopIdx := 2.U 1030 } 1031 when(vsew === VSew.e16) { 1032 csBundle(0).lsrc(0) := src2 1033 csBundle(0).lsrc(1) := src2 1034 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1035 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1036 csBundle(0).uopIdx := 0.U 1037 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1038 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1039 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1040 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1041 csBundle(1).uopIdx := 1.U 1042 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1043 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1044 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1045 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 1046 csBundle(2).uopIdx := 2.U 1047 csBundle(3).lsrc(0) := src1 1048 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1049 csBundle(3).ldest := dest 1050 csBundle(3).uopIdx := 3.U 1051 } 1052 } 1053 when(vlmul === VLmul.mf2) { 1054 when(vsew === VSew.e32) { 1055 csBundle(0).lsrc(0) := src2 1056 csBundle(0).lsrc(1) := src2 1057 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1058 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1059 csBundle(0).uopIdx := 0.U 1060 csBundle(1).lsrc(0) := src1 1061 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1062 csBundle(1).ldest := dest 1063 csBundle(1).uopIdx := 1.U 1064 } 1065 when(vsew === VSew.e16) { 1066 csBundle(0).lsrc(0) := src2 1067 csBundle(0).lsrc(1) := src2 1068 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1069 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1070 csBundle(0).uopIdx := 0.U 1071 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1072 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1073 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1074 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 1075 csBundle(1).uopIdx := 1.U 1076 csBundle(2).lsrc(0) := src1 1077 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1078 csBundle(2).ldest := dest 1079 csBundle(2).uopIdx := 2.U 1080 } 1081 } 1082 when(vlmul === VLmul.mf4) { 1083 when(vsew === VSew.e16) { 1084 csBundle(0).lsrc(0) := src2 1085 csBundle(0).lsrc(1) := src2 1086 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1087 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 1088 csBundle(0).uopIdx := 0.U 1089 csBundle(1).lsrc(0) := src1 1090 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1091 csBundle(1).ldest := dest 1092 csBundle(1).uopIdx := 1.U 1093 } 1094 } 1095 } 1096 1097 is(UopSplitType.VEC_VFREDOSUM) { 1098 import yunsuan.VfaluType 1099 val vlmul = vlmulReg 1100 val vsew = vsewReg 1101 val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum 1102 when(vlmul === VLmul.m8) { 1103 when(vsew === VSew.e64) { 1104 val vlmax = 16 1105 for (i <- 0 until vlmax) { 1106 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1107 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1108 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1109 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1110 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1111 csBundle(i).uopIdx := i.U 1112 } 1113 } 1114 when(vsew === VSew.e32) { 1115 val vlmax = 32 1116 for (i <- 0 until vlmax) { 1117 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1118 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1119 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1120 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1121 csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B) 1122 csBundle(i).uopIdx := i.U 1123 } 1124 } 1125 when(vsew === VSew.e16) { 1126 val vlmax = 64 1127 for (i <- 0 until vlmax) { 1128 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1129 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1130 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1131 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1132 csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B) 1133 csBundle(i).uopIdx := i.U 1134 } 1135 } 1136 } 1137 when(vlmul === VLmul.m4) { 1138 when(vsew === VSew.e64) { 1139 val vlmax = 8 1140 for (i <- 0 until vlmax) { 1141 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1142 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1143 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1144 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1145 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1146 csBundle(i).uopIdx := i.U 1147 } 1148 } 1149 when(vsew === VSew.e32) { 1150 val vlmax = 16 1151 for (i <- 0 until vlmax) { 1152 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1153 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1154 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1155 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1156 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1157 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1158 csBundle(i).uopIdx := i.U 1159 } 1160 } 1161 when(vsew === VSew.e16) { 1162 val vlmax = 32 1163 for (i <- 0 until vlmax) { 1164 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1165 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1166 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1167 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1168 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1169 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1170 csBundle(i).uopIdx := i.U 1171 } 1172 } 1173 } 1174 when(vlmul === VLmul.m2) { 1175 when(vsew === VSew.e64) { 1176 val vlmax = 4 1177 for (i <- 0 until vlmax) { 1178 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1179 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1180 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1181 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1182 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1183 csBundle(i).uopIdx := i.U 1184 } 1185 } 1186 when(vsew === VSew.e32) { 1187 val vlmax = 8 1188 for (i <- 0 until vlmax) { 1189 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1190 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1191 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1192 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1193 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1194 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1195 csBundle(i).uopIdx := i.U 1196 } 1197 } 1198 when(vsew === VSew.e16) { 1199 val vlmax = 16 1200 for (i <- 0 until vlmax) { 1201 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1202 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1203 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1204 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1205 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1206 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1207 csBundle(i).uopIdx := i.U 1208 } 1209 } 1210 } 1211 when(vlmul === VLmul.m1) { 1212 when(vsew === VSew.e64) { 1213 val vlmax = 2 1214 for (i <- 0 until vlmax) { 1215 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1216 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1217 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1218 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1219 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1220 csBundle(i).uopIdx := i.U 1221 } 1222 } 1223 when(vsew === VSew.e32) { 1224 val vlmax = 4 1225 for (i <- 0 until vlmax) { 1226 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1227 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1228 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1229 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1230 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1231 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1232 csBundle(i).uopIdx := i.U 1233 } 1234 } 1235 when(vsew === VSew.e16) { 1236 val vlmax = 8 1237 for (i <- 0 until vlmax) { 1238 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1239 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1240 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1241 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1242 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1243 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1244 csBundle(i).uopIdx := i.U 1245 } 1246 } 1247 } 1248 when(vlmul === VLmul.mf2) { 1249 when(vsew === VSew.e32) { 1250 val vlmax = 2 1251 for (i <- 0 until vlmax) { 1252 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1253 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1254 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1255 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1256 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1257 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1258 csBundle(i).uopIdx := i.U 1259 } 1260 } 1261 when(vsew === VSew.e16) { 1262 val vlmax = 4 1263 for (i <- 0 until vlmax) { 1264 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1265 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1266 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1267 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1268 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1269 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1270 csBundle(i).uopIdx := i.U 1271 } 1272 } 1273 } 1274 when(vlmul === VLmul.mf4) { 1275 when(vsew === VSew.e16) { 1276 val vlmax = 2 1277 for (i <- 0 until vlmax) { 1278 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1279 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1280 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1281 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1282 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1283 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1284 csBundle(i).uopIdx := i.U 1285 } 1286 } 1287 } 1288 } 1289 1290 is(UopSplitType.VEC_SLIDEUP) { 1291 // i to vector move 1292 csBundle(0).srcType(0) := SrcType.reg 1293 csBundle(0).srcType(1) := SrcType.imm 1294 csBundle(0).srcType(2) := SrcType.imm 1295 csBundle(0).lsrc(1) := 0.U 1296 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1297 csBundle(0).fuType := FuType.i2v.U 1298 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1299 csBundle(0).vecWen := true.B 1300 // LMUL 1301 for (i <- 0 until MAX_VLMUL) 1302 for (j <- 0 to i) { 1303 val old_vd = if (j == 0) { 1304 dest + i.U 1305 } else (VECTOR_TMP_REG_LMUL + j).U 1306 val vd = if (j == i) { 1307 dest + i.U 1308 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1309 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1310 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1311 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1312 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1313 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1314 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1315 } 1316 } 1317 1318 is(UopSplitType.VEC_SLIDEDOWN) { 1319 // i to vector move 1320 csBundle(0).srcType(0) := SrcType.reg 1321 csBundle(0).srcType(1) := SrcType.imm 1322 csBundle(0).srcType(2) := SrcType.imm 1323 csBundle(0).lsrc(1) := 0.U 1324 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1325 csBundle(0).fuType := FuType.i2v.U 1326 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1327 csBundle(0).vecWen := true.B 1328 // LMUL 1329 for (i <- 0 until MAX_VLMUL) 1330 for (j <- (0 to i).reverse) { 1331 when(i.U < lmul) { 1332 val old_vd = if (j == 0) { 1333 dest + lmul - 1.U - i.U 1334 } else (VECTOR_TMP_REG_LMUL + j).U 1335 val vd = if (j == i) { 1336 dest + lmul - 1.U - i.U 1337 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1338 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1339 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1340 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1341 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1342 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1343 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1344 } 1345 } 1346 } 1347 1348 is(UopSplitType.VEC_M0X) { 1349 // LMUL 1350 for (i <- 0 until MAX_VLMUL) { 1351 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1352 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1353 csBundle(i).srcType(0) := srcType0 1354 csBundle(i).srcType(1) := SrcType.vp 1355 csBundle(i).rfWen := false.B 1356 csBundle(i).fpWen := false.B 1357 csBundle(i).vecWen := true.B 1358 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1359 csBundle(i).lsrc(1) := src2 1360 // csBundle(i).lsrc(2) := dest + i.U DontCare 1361 csBundle(i).ldest := ldest 1362 csBundle(i).uopIdx := i.U 1363 } 1364 csBundle(lmul - 1.U).rfWen := true.B 1365 csBundle(lmul - 1.U).fpWen := false.B 1366 csBundle(lmul - 1.U).vecWen := false.B 1367 csBundle(lmul - 1.U).ldest := dest 1368 } 1369 1370 is(UopSplitType.VEC_MVV) { 1371 // LMUL 1372 for (i <- 0 until MAX_VLMUL) { 1373 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1374 csBundle(i * 2 + 0).srcType(0) := srcType0 1375 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1376 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1377 csBundle(i * 2 + 0).lsrc(1) := src2 1378 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1379 csBundle(i * 2 + 0).ldest := dest + i.U 1380 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1381 1382 csBundle(i * 2 + 1).srcType(0) := srcType0 1383 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1384 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1385 csBundle(i * 2 + 1).lsrc(1) := src2 1386 // csBundle(i).lsrc(2) := dest + i.U DontCare 1387 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1388 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1389 } 1390 } 1391 1392 is(UopSplitType.VEC_M0X_VFIRST) { 1393 // LMUL 1394 csBundle(0).rfWen := true.B 1395 csBundle(0).fpWen := false.B 1396 csBundle(0).vecWen := false.B 1397 csBundle(0).ldest := dest 1398 } 1399 is(UopSplitType.VEC_VWW) { 1400 for (i <- 0 until MAX_VLMUL*2) { 1401 when(i.U < lmul){ 1402 csBundle(i).srcType(2) := SrcType.DC 1403 csBundle(i).lsrc(0) := src2 + i.U 1404 csBundle(i).lsrc(1) := src2 + i.U 1405 // csBundle(i).lsrc(2) := dest + (2 * i).U 1406 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1407 csBundle(i).uopIdx := i.U 1408 } otherwise { 1409 csBundle(i).srcType(2) := SrcType.DC 1410 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1411 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1412 // csBundle(i).lsrc(2) := dest + (2 * i).U 1413 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1414 csBundle(i).uopIdx := i.U 1415 } 1416 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1417 csBundle(numOfUop-1.U).lsrc(0) := src1 1418 csBundle(numOfUop-1.U).lsrc(2) := dest 1419 csBundle(numOfUop-1.U).ldest := dest 1420 } 1421 } 1422 is(UopSplitType.VEC_RGATHER) { 1423 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1424 for (i <- 0 until len) 1425 for (j <- 0 until len) { 1426 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1427 // csBundle(i * len + j).srcType(1) := SrcType.vp 1428 // csBundle(i * len + j).srcType(2) := SrcType.vp 1429 csBundle(i * len + j).lsrc(0) := src1 + i.U 1430 csBundle(i * len + j).lsrc(1) := src2 + j.U 1431 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1432 csBundle(i * len + j).lsrc(2) := vd_old 1433 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1434 csBundle(i * len + j).ldest := vd 1435 csBundle(i * len + j).uopIdx := (i * len + j).U 1436 } 1437 } 1438 switch(vlmulReg) { 1439 is("b001".U ){ 1440 genCsBundle_VEC_RGATHER(2) 1441 } 1442 is("b010".U ){ 1443 genCsBundle_VEC_RGATHER(4) 1444 } 1445 is("b011".U ){ 1446 genCsBundle_VEC_RGATHER(8) 1447 } 1448 } 1449 } 1450 is(UopSplitType.VEC_RGATHER_VX) { 1451 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1452 for (i <- 0 until len) 1453 for (j <- 0 until len) { 1454 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1455 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1456 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1457 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1458 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1459 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1460 csBundle(i * len + j + 1).lsrc(2) := vd_old 1461 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1462 csBundle(i * len + j + 1).ldest := vd 1463 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1464 } 1465 } 1466 // i to vector move 1467 csBundle(0).srcType(0) := SrcType.reg 1468 csBundle(0).srcType(1) := SrcType.imm 1469 csBundle(0).srcType(2) := SrcType.imm 1470 csBundle(0).lsrc(1) := 0.U 1471 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1472 csBundle(0).fuType := FuType.i2v.U 1473 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1474 csBundle(0).rfWen := false.B 1475 csBundle(0).fpWen := false.B 1476 csBundle(0).vecWen := true.B 1477 genCsBundle_RGATHER_VX(1) 1478 switch(vlmulReg) { 1479 is("b001".U ){ 1480 genCsBundle_RGATHER_VX(2) 1481 } 1482 is("b010".U ){ 1483 genCsBundle_RGATHER_VX(4) 1484 } 1485 is("b011".U ){ 1486 genCsBundle_RGATHER_VX(8) 1487 } 1488 } 1489 } 1490 is(UopSplitType.VEC_RGATHEREI16) { 1491 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1492 for (i <- 0 until len) 1493 for (j <- 0 until len) { 1494 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1495 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1496 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1497 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1498 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1499 csBundle((i * len + j)*2+0).ldest := vd0 1500 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1501 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1502 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1503 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1504 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1505 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1506 csBundle((i * len + j)*2+1).ldest := vd1 1507 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1508 } 1509 } 1510 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1511 for (i <- 0 until len) 1512 for (j <- 0 until len) { 1513 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1514 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1515 csBundle(i * len + j).lsrc(0) := src1 + i.U 1516 csBundle(i * len + j).lsrc(1) := src2 + j.U 1517 csBundle(i * len + j).lsrc(2) := vd_old 1518 csBundle(i * len + j).ldest := vd 1519 csBundle(i * len + j).uopIdx := (i * len + j).U 1520 } 1521 } 1522 def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={ 1523 for (i <- 0 until len) 1524 for (j <- 0 until len) { 1525 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1526 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1527 csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U 1528 csBundle(i * len + j).lsrc(1) := src2 + j.U 1529 csBundle(i * len + j).lsrc(2) := vd_old 1530 csBundle(i * len + j).ldest := vd 1531 csBundle(i * len + j).uopIdx := (i * len + j).U 1532 } 1533 } 1534 def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={ 1535 for (i <- 0 until len) 1536 for (j <- 0 until len) { 1537 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1538 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1539 csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U 1540 csBundle(i * len + j).lsrc(1) := src2 + j.U 1541 csBundle(i * len + j).lsrc(2) := vd_old 1542 csBundle(i * len + j).ldest := vd 1543 csBundle(i * len + j).uopIdx := (i * len + j).U 1544 } 1545 } 1546 when(!vsewReg.orR){ 1547 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1548 }.elsewhen(vsewReg === VSew.e32){ 1549 genCsBundle_VEC_RGATHEREI16_SEW32(1) 1550 }.elsewhen(vsewReg === VSew.e64){ 1551 genCsBundle_VEC_RGATHEREI16_SEW64(1) 1552 }.otherwise{ 1553 genCsBundle_VEC_RGATHEREI16(1) 1554 } 1555 switch(vlmulReg) { 1556 is("b001".U) { 1557 when(!vsewReg.orR) { 1558 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1559 }.elsewhen(vsewReg === VSew.e32){ 1560 genCsBundle_VEC_RGATHEREI16_SEW32(2) 1561 }.elsewhen(vsewReg === VSew.e64){ 1562 genCsBundle_VEC_RGATHEREI16_SEW64(2) 1563 }.otherwise{ 1564 genCsBundle_VEC_RGATHEREI16(2) 1565 } 1566 } 1567 is("b010".U) { 1568 when(!vsewReg.orR) { 1569 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1570 }.elsewhen(vsewReg === VSew.e32){ 1571 genCsBundle_VEC_RGATHEREI16_SEW32(4) 1572 }.elsewhen(vsewReg === VSew.e64){ 1573 genCsBundle_VEC_RGATHEREI16_SEW64(4) 1574 }.otherwise{ 1575 genCsBundle_VEC_RGATHEREI16(4) 1576 } 1577 } 1578 is("b011".U) { 1579 when(vsewReg === VSew.e32){ 1580 genCsBundle_VEC_RGATHEREI16_SEW32(8) 1581 }.elsewhen(vsewReg === VSew.e64){ 1582 genCsBundle_VEC_RGATHEREI16_SEW64(8) 1583 }.otherwise{ 1584 genCsBundle_VEC_RGATHEREI16(8) 1585 } 1586 } 1587 } 1588 } 1589 is(UopSplitType.VEC_COMPRESS) { 1590 def genCsBundle_VEC_COMPRESS(len:Int): Unit = { 1591 for (i <- 0 until len) { 1592 val jlen = if (i == len-1) i+1 else i+2 1593 for (j <- 0 until jlen) { 1594 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1595 val vd = if(i==len-1) (dest + j.U) else { 1596 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1597 } 1598 csBundle(i*(i+3)/2 + j).vecWen := true.B 1599 csBundle(i*(i+3)/2 + j).v0Wen := false.B 1600 val src13Type = if (j == i+1) DontCare else SrcType.vp 1601 csBundle(i*(i+3)/2 + j).srcType(0) := src13Type 1602 csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp 1603 csBundle(i*(i+3)/2 + j).srcType(2) := src13Type 1604 if (i == 0) { 1605 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1606 } else { 1607 csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1608 } 1609 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1610 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1611 csBundle(i*(i+3)/2 + j).ldest := vd 1612 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1613 } 1614 } 1615 } 1616 switch(vlmulReg) { 1617 is("b001".U ){ 1618 genCsBundle_VEC_COMPRESS(2) 1619 } 1620 is("b010".U ){ 1621 genCsBundle_VEC_COMPRESS(4) 1622 } 1623 is("b011".U ){ 1624 genCsBundle_VEC_COMPRESS(8) 1625 } 1626 } 1627 } 1628 is(UopSplitType.VEC_MVNR) { 1629 for (i <- 0 until MAX_VLMUL) { 1630 csBundle(i).lsrc(0) := src1 + i.U 1631 csBundle(i).lsrc(1) := src2 + i.U 1632 csBundle(i).lsrc(2) := dest + i.U 1633 csBundle(i).ldest := dest + i.U 1634 csBundle(i).uopIdx := i.U 1635 } 1636 } 1637 is(UopSplitType.VEC_US_LDST) { 1638 /* 1639 FMV.D.X 1640 */ 1641 csBundle(0).srcType(0) := SrcType.reg 1642 csBundle(0).srcType(1) := SrcType.imm 1643 csBundle(0).lsrc(1) := 0.U 1644 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1645 csBundle(0).fuType := FuType.i2v.U 1646 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1647 csBundle(0).rfWen := false.B 1648 csBundle(0).fpWen := false.B 1649 csBundle(0).vecWen := true.B 1650 csBundle(0).vlsInstr := true.B 1651 //LMUL 1652 for (i <- 0 until MAX_VLMUL) { 1653 csBundle(i + 1).srcType(0) := SrcType.vp 1654 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1655 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1656 csBundle(i + 1).ldest := dest + i.U 1657 csBundle(i + 1).uopIdx := i.U 1658 csBundle(i + 1).vlsInstr := true.B 1659 } 1660 csBundle.head.waitForward := isUsSegment 1661 csBundle(numOfUop - 1.U).blockBackward := isUsSegment 1662 } 1663 is(UopSplitType.VEC_S_LDST) { 1664 /* 1665 FMV.D.X 1666 */ 1667 csBundle(0).srcType(0) := SrcType.reg 1668 csBundle(0).srcType(1) := SrcType.imm 1669 csBundle(0).lsrc(1) := 0.U 1670 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1671 csBundle(0).fuType := FuType.i2v.U 1672 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1673 csBundle(0).rfWen := false.B 1674 csBundle(0).fpWen := false.B 1675 csBundle(0).vecWen := true.B 1676 csBundle(0).vlsInstr := true.B 1677 1678 csBundle(1).srcType(0) := SrcType.reg 1679 csBundle(1).srcType(1) := SrcType.imm 1680 csBundle(1).lsrc(0) := latchedInst.lsrc(1) 1681 csBundle(1).lsrc(1) := 0.U 1682 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1683 csBundle(1).fuType := FuType.i2v.U 1684 csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1685 csBundle(1).rfWen := false.B 1686 csBundle(1).fpWen := false.B 1687 csBundle(1).vecWen := true.B 1688 csBundle(1).vlsInstr := true.B 1689 1690 //LMUL 1691 for (i <- 0 until MAX_VLMUL) { 1692 csBundle(i + 2).srcType(0) := SrcType.vp 1693 csBundle(i + 2).srcType(1) := SrcType.vp 1694 csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1695 csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1696 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1697 csBundle(i + 2).ldest := dest + i.U 1698 csBundle(i + 2).uopIdx := i.U 1699 csBundle(i + 2).vlsInstr := true.B 1700 } 1701 csBundle.head.waitForward := isSdSegment 1702 csBundle(numOfUop - 1.U).blockBackward := isSdSegment 1703 } 1704 is(UopSplitType.VEC_I_LDST) { 1705 def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={ 1706 for (i <- 0 until MAX_VLMUL) { 1707 val vecWen = if (i < lmul * nf) true.B else false.B 1708 val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no 1709 csBundle(i + 1).srcType(0) := SrcType.vp 1710 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1711 csBundle(i + 1).srcType(1) := SrcType.no 1712 csBundle(i + 1).lsrc(1) := src2 + i.U 1713 csBundle(i + 1).srcType(2) := src2Type 1714 csBundle(i + 1).lsrc(2) := dest + i.U 1715 csBundle(i + 1).ldest := dest + i.U 1716 csBundle(i + 1).rfWen := false.B 1717 csBundle(i + 1).fpWen := false.B 1718 csBundle(i + 1).vecWen := vecWen 1719 csBundle(i + 1).uopIdx := i.U 1720 csBundle(i + 1).vlsInstr := true.B 1721 } 1722 } 1723 def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={ 1724 for (i <- 0 until MAX_VLMUL) { 1725 val src1Type = if (i < emul) SrcType.vp else SrcType.no 1726 csBundle(i + 1).srcType(1) := src1Type 1727 csBundle(i + 1).lsrc(1) := src2 + i.U 1728 } 1729 } 1730 1731 val vlmul = vlmulReg 1732 val vsew = Cat(0.U(1.W), vsewReg) 1733 val veew = Cat(0.U(1.W), width) 1734 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1735 val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array( 1736 "b001".U -> 1.U, 1737 "b010".U -> 2.U, 1738 "b011".U -> 3.U 1739 )) 1740 val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array( 1741 "b001".U -> 1.U, 1742 "b010".U -> 2.U, 1743 "b011".U -> 3.U 1744 )) 1745 csBundle(0).srcType(0) := SrcType.reg 1746 csBundle(0).srcType(1) := SrcType.imm 1747 csBundle(0).lsrc(1) := 0.U 1748 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1749 csBundle(0).fuType := FuType.i2v.U 1750 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1751 csBundle(0).rfWen := false.B 1752 csBundle(0).fpWen := false.B 1753 csBundle(0).vecWen := true.B 1754 csBundle(0).vlsInstr := true.B 1755 1756 //LMUL 1757 when(nf === 0.U) { 1758 for (i <- 0 until MAX_VLMUL) { 1759 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul) 1760 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1761 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1762 csBundle(i + 1).srcType(0) := SrcType.vp 1763 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1764 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1765 csBundle(i + 1).srcType(2) := SrcType.vp 1766 // lsrc2 is old vd 1767 csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1768 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1769 csBundle(i + 1).uopIdx := i.U 1770 csBundle(i + 1).vlsInstr := true.B 1771 } 1772 }.otherwise{ 1773 // nf > 1, is segment indexed load/store 1774 // gen src0, vd 1775 switch(simple_lmul) { 1776 is(0.U) { 1777 switch(nf) { 1778 is(1.U) { 1779 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2) 1780 } 1781 is(2.U) { 1782 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3) 1783 } 1784 is(3.U) { 1785 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4) 1786 } 1787 is(4.U) { 1788 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5) 1789 } 1790 is(5.U) { 1791 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6) 1792 } 1793 is(6.U) { 1794 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7) 1795 } 1796 is(7.U) { 1797 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8) 1798 } 1799 } 1800 } 1801 is(1.U) { 1802 switch(nf) { 1803 is(1.U) { 1804 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2) 1805 } 1806 is(2.U) { 1807 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3) 1808 } 1809 is(3.U) { 1810 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4) 1811 } 1812 } 1813 } 1814 is(2.U) { 1815 switch(nf) { 1816 is(1.U) { 1817 genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2) 1818 } 1819 } 1820 } 1821 } 1822 1823 // gen src1 1824 switch(simple_emul) { 1825 is(0.U) { 1826 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1) 1827 } 1828 is(1.U) { 1829 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2) 1830 } 1831 is(2.U) { 1832 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4) 1833 } 1834 is(3.U) { 1835 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8) 1836 } 1837 } 1838 1839 // when is vstore instructions, not set vecwen 1840 when(isVstore) { 1841 for (i <- 0 until MAX_VLMUL) { 1842 csBundle(i + 1).vecWen := false.B 1843 } 1844 } 1845 } 1846 csBundle.head.waitForward := isIxSegment 1847 csBundle(numOfUop - 1.U).blockBackward := isIxSegment 1848 } 1849 } 1850 1851 //readyFromRename Counter 1852 val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U) 1853 1854 // The left uops of the complex inst in ComplexDecoder can be send out this cycle 1855 val thisAllOut = uopRes <= readyCounter 1856 1857 switch(state) { 1858 is(s_idle) { 1859 when (inValid) { 1860 stateNext := s_active 1861 uopResNext := inUopInfo.numOfUop 1862 } 1863 } 1864 is(s_active) { 1865 when (thisAllOut) { 1866 when (inValid) { 1867 stateNext := s_active 1868 uopResNext := inUopInfo.numOfUop 1869 }.otherwise { 1870 stateNext := s_idle 1871 uopResNext := 0.U 1872 } 1873 }.otherwise { 1874 stateNext := s_active 1875 uopResNext := uopRes - readyCounter 1876 } 1877 } 1878 } 1879 1880 state := Mux(io.redirect, s_idle, stateNext) 1881 uopRes := Mux(io.redirect, 0.U, uopResNext) 1882 1883 val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes) 1884 1885 for(i <- 0 until RenameWidth) { 1886 outValids(i) := complexNum > i.U 1887 outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1)) 1888 } 1889 1890 outComplexNum := Mux(state === s_active, complexNum, 0.U) 1891 inReady := state === s_idle || state === s_active && thisAllOut 1892 1893// val validSimple = Wire(Vec(DecodeWidth, Bool())) 1894// validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1895// val notInf = Wire(Vec(DecodeWidth, Bool())) 1896// notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1897// notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1898// val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1899// notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1900// 1901// complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1902// Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1903// 0.U) 1904// validToRename.zipWithIndex.foreach{ 1905// case(dst, i) => 1906// val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1907// dst := MuxCase(false.B, Seq( 1908// (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1909// (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1910// ).toSeq) 1911// } 1912// 1913// readyToIBuf.zipWithIndex.foreach { 1914// case (dst, i) => 1915// val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1916// dst := MuxCase(true.B, Seq( 1917// (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1918// (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1919// ).toSeq) 1920// } 1921// 1922// io.deq.decodedInsts := decodedInsts 1923// io.deq.complexNum := complexNum 1924// io.deq.validToRename := validToRename 1925// io.deq.readyToIBuf := readyToIBuf 1926} 1927