1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul} 34import yunsuan.VpermType 35import scala.collection.Seq 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(7.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 val outIsFirstUopInVd = IO(Output(Bool())) 43 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={ 44 if (lmul * nfields <= 8) { 45 for (k <-0 until nfields) { 46 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 47 var offset = 1 << (emul - lmul) 48 for (i <- 0 until (1 << emul)) { 49 if (uopIdx == k * (1 << emul) + i) { 50 return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0) 51 } 52 } 53 } else { // lmul > emul, uop num is depend on lmul * nf 54 var offset = 1 << (lmul - emul) 55 for (i <- 0 until (1 << lmul)) { 56 if (uopIdx == k * (1 << lmul) + i) { 57 return (i / offset, i + k * (1 << lmul), 1) 58 } 59 } 60 } 61 } 62 } 63 return (0, 0, 1) 64 } 65 // strided load/store 66 var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq() 67 for (emul <- 0 until 4) { 68 for (lmul <- 0 until 4) { 69 for (nf <- 0 until 8) { 70 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx) 71 var offsetVs2 = offset._1 72 var offsetVd = offset._2 73 var isFirstUopInVd = offset._3 74 combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) 75 } 76 } 77 } 78 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 79 case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) => 80 (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W))) 81 }, BitPat.N(7))) 82 outOffsetVs2 := out(5, 3) 83 outOffsetVd := out(2, 0) 84 outIsFirstUopInVd := out(6).asBool 85} 86 87trait VectorConstants { 88 val MAX_VLMUL = 8 89 val FP_TMP_REG_MV = 32 90 val VECTOR_TMP_REG_LMUL = 33 // 33~47 -> 15 91 val MAX_INDEXED_LS_UOPNUM = 64 92} 93 94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 95 val redirect = Input(Bool()) 96 val csrCtrl = Input(new CustomCSRCtrlIO) 97 val vtypeBypass = Input(new VType) 98 // When the first inst in decode vector is complex inst, pass it in 99 val in = Flipped(DecoupledIO(new Bundle { 100 val simpleDecodedInst = new DecodedInst 101 val uopInfo = new UopInfo 102 })) 103 val out = new Bundle { 104 val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst)) 105 } 106 val complexNum = Output(UInt(3.W)) 107} 108 109/** 110 * @author zly 111 */ 112class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 113 val io = IO(new DecodeUnitCompIO) 114 115 // alias 116 private val inReady = io.in.ready 117 private val inValid = io.in.valid 118 private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst) 119 private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields) 120 private val inUopInfo = io.in.bits.uopInfo 121 private val outValids = io.out.complexDecodedInsts.map(_.valid) 122 private val outReadys = io.out.complexDecodedInsts.map(_.ready) 123 private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits) 124 private val outComplexNum = io.complexNum 125 126 val maxUopSize = MaxUopSize 127 when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) { 128 when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) { 129 inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType) 130 }.elsewhen(inInstFields.RS1 === 0.U) { 131 inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType) 132 } 133 } 134 135 val latchedInst = RegEnable(inDecodedInst, inValid && inReady) 136 val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady) 137 //input bits 138 private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields) 139 140 val src1 = Cat(0.U(1.W), instFields.RS1) 141 val src2 = Cat(0.U(1.W), instFields.RS2) 142 val dest = Cat(0.U(1.W), instFields.RD) 143 144 val nf = instFields.NF 145 val width = instFields.WIDTH(1, 0) 146 147 //output of DecodeUnit 148 val numOfUop = Wire(UInt(log2Up(maxUopSize).W)) 149 val numOfWB = Wire(UInt(log2Up(maxUopSize).W)) 150 val lmul = Wire(UInt(4.W)) 151 val isVsetSimple = Wire(Bool()) 152 153 val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i))) 154 indexedLSRegOffset.map(_.src := 0.U) 155 156 //pre decode 157 lmul := latchedUopInfo.lmul 158 isVsetSimple := latchedInst.isVset 159 val vlmulReg = latchedInst.vpu.vlmul 160 val vsewReg = latchedInst.vpu.vsew 161 162 //Type of uop Div 163 val typeOfSplit = latchedInst.uopSplitType 164 val src1Type = latchedInst.srcType(0) 165 val src1IsImm = src1Type === SrcType.imm 166 val src1IsFp = src1Type === SrcType.fp 167 168 numOfUop := latchedUopInfo.numOfUop 169 numOfWB := latchedUopInfo.numOfWB 170 171 //uops dispatch 172 val s_idle :: s_active :: Nil = Enum(2) 173 val state = RegInit(s_idle) 174 val stateNext = WireDefault(state) 175 val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W)) 176 val uopRes = RegInit(0.U(log2Up(maxUopSize).W)) 177 val uopResNext = WireInit(uopRes) 178 179 //uop div up to maxUopSize 180 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 181 csBundle.foreach { case dst => 182 dst := latchedInst 183 dst.numUops := latchedUopInfo.numOfUop 184 dst.numWB := latchedUopInfo.numOfWB 185 dst.firstUop := false.B 186 dst.lastUop := false.B 187 dst.vlsInstr := false.B 188 } 189 190 csBundle(0).firstUop := true.B 191 csBundle(numOfUop - 1.U).lastUop := true.B 192 193 switch(typeOfSplit) { 194 is(UopSplitType.VSET) { 195 // In simple decoder, rfWen and vecWen are not set 196 when(isVsetSimple) { 197 // Default 198 // uop0 set rd, never flushPipe 199 csBundle(0).fuType := FuType.vsetiwi.U 200 csBundle(0).flushPipe := false.B 201 csBundle(0).rfWen := true.B 202 // uop1 set vl, vsetvl will flushPipe 203 csBundle(1).ldest := VCONFIG_IDX.U 204 csBundle(1).vecWen := true.B 205 when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 206 csBundle(1).fuType := FuType.vsetfwf.U 207 csBundle(1).srcType(0) := SrcType.vp 208 csBundle(1).lsrc(0) := VCONFIG_IDX.U 209 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 210 // uop0: mv vtype gpr to vector region 211 csBundle(0).srcType(0) := SrcType.xp 212 csBundle(0).srcType(1) := SrcType.no 213 csBundle(0).lsrc(1) := 0.U 214 csBundle(0).ldest := FP_TMP_REG_MV.U 215 csBundle(0).fuType := FuType.i2f.U 216 csBundle(0).fpWen := true.B 217 csBundle(0).fpu.isAddSub := false.B 218 csBundle(0).fpu.typeTagIn := FPU.D 219 csBundle(0).fpu.typeTagOut := FPU.D 220 csBundle(0).fpu.fromInt := true.B 221 csBundle(0).fpu.wflags := false.B 222 csBundle(0).fpu.fpWen := true.B 223 csBundle(0).fpu.div := false.B 224 csBundle(0).fpu.sqrt := false.B 225 csBundle(0).fpu.fcvt := false.B 226 csBundle(0).flushPipe := false.B 227 // uop1: uvsetvcfg_vv 228 csBundle(1).fuType := FuType.vsetfwf.U 229 // vl 230 csBundle(1).srcType(0) := SrcType.vp 231 csBundle(1).lsrc(0) := VCONFIG_IDX.U 232 // vtype 233 csBundle(1).srcType(1) := SrcType.fp 234 csBundle(1).lsrc(1) := FP_TMP_REG_MV.U 235 csBundle(1).vecWen := true.B 236 csBundle(1).ldest := VCONFIG_IDX.U 237 } 238 // use bypass vtype from vtypeGen 239 csBundle(0).vpu.connectVType(io.vtypeBypass) 240 csBundle(1).vpu.connectVType(io.vtypeBypass) 241 } 242 } 243 is(UopSplitType.VEC_VVV) { 244 for (i <- 0 until MAX_VLMUL) { 245 csBundle(i).lsrc(0) := src1 + i.U 246 csBundle(i).lsrc(1) := src2 + i.U 247 csBundle(i).lsrc(2) := dest + i.U 248 csBundle(i).ldest := dest + i.U 249 csBundle(i).uopIdx := i.U 250 } 251 } 252 is(UopSplitType.VEC_VFV) { 253 /* 254 i to vector move 255 */ 256 csBundle(0).srcType(0) := SrcType.fp 257 csBundle(0).srcType(1) := SrcType.imm 258 csBundle(0).lsrc(1) := 0.U 259 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 260 csBundle(0).fuType := FuType.f2v.U 261 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 262 csBundle(0).vecWen := true.B 263 csBundle(0).vpu.isReverse := false.B 264 /* 265 LMUL 266 */ 267 for (i <- 0 until MAX_VLMUL) { 268 csBundle(i + 1).srcType(0) := SrcType.vp 269 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 270 csBundle(i + 1).lsrc(1) := src2 + i.U 271 csBundle(i + 1).lsrc(2) := dest + i.U 272 csBundle(i + 1).ldest := dest + i.U 273 csBundle(i + 1).uopIdx := i.U 274 } 275 } 276 is(UopSplitType.VEC_EXT2) { 277 for (i <- 0 until MAX_VLMUL / 2) { 278 csBundle(2 * i).lsrc(1) := src2 + i.U 279 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 280 csBundle(2 * i).ldest := dest + (2 * i).U 281 csBundle(2 * i).uopIdx := (2 * i).U 282 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 283 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 284 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 285 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 286 } 287 } 288 is(UopSplitType.VEC_EXT4) { 289 for (i <- 0 until MAX_VLMUL / 4) { 290 csBundle(4 * i).lsrc(1) := src2 + i.U 291 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 292 csBundle(4 * i).ldest := dest + (4 * i).U 293 csBundle(4 * i).uopIdx := (4 * i).U 294 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 295 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 296 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 297 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 298 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 299 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 300 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 301 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 302 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 303 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 304 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 305 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 306 } 307 } 308 is(UopSplitType.VEC_EXT8) { 309 for (i <- 0 until MAX_VLMUL) { 310 csBundle(i).lsrc(1) := src2 311 csBundle(i).lsrc(2) := dest + i.U 312 csBundle(i).ldest := dest + i.U 313 csBundle(i).uopIdx := i.U 314 } 315 } 316 is(UopSplitType.VEC_0XV) { 317 /* 318 i/f to vector move 319 */ 320 csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg) 321 csBundle(0).srcType(1) := SrcType.imm 322 csBundle(0).lsrc(1) := 0.U 323 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 324 csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U) 325 csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 326 csBundle(0).rfWen := false.B 327 csBundle(0).fpWen := false.B 328 csBundle(0).vecWen := true.B 329 /* 330 vmv.s.x 331 */ 332 csBundle(1).srcType(0) := SrcType.vp 333 csBundle(1).srcType(1) := SrcType.imm 334 csBundle(1).srcType(2) := SrcType.vp 335 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 336 csBundle(1).lsrc(1) := 0.U 337 csBundle(1).lsrc(2) := dest 338 csBundle(1).ldest := dest 339 csBundle(1).rfWen := false.B 340 csBundle(1).fpWen := false.B 341 csBundle(1).vecWen := true.B 342 csBundle(1).uopIdx := 0.U 343 } 344 is(UopSplitType.VEC_VXV) { 345 /* 346 i to vector move 347 */ 348 csBundle(0).srcType(0) := SrcType.reg 349 csBundle(0).srcType(1) := SrcType.imm 350 csBundle(0).lsrc(1) := 0.U 351 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 352 csBundle(0).fuType := FuType.i2v.U 353 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 354 csBundle(0).vecWen := true.B 355 csBundle(0).vpu.isReverse := false.B 356 /* 357 LMUL 358 */ 359 for (i <- 0 until MAX_VLMUL) { 360 csBundle(i + 1).srcType(0) := SrcType.vp 361 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 362 csBundle(i + 1).lsrc(1) := src2 + i.U 363 csBundle(i + 1).lsrc(2) := dest + i.U 364 csBundle(i + 1).ldest := dest + i.U 365 csBundle(i + 1).uopIdx := i.U 366 } 367 } 368 is(UopSplitType.VEC_VVW) { 369 for (i <- 0 until MAX_VLMUL / 2) { 370 csBundle(2 * i).lsrc(0) := src1 + i.U 371 csBundle(2 * i).lsrc(1) := src2 + i.U 372 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 373 csBundle(2 * i).ldest := dest + (2 * i).U 374 csBundle(2 * i).uopIdx := (2 * i).U 375 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 376 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 377 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 378 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 379 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 380 } 381 } 382 is(UopSplitType.VEC_VFW) { 383 /* 384 f to vector move 385 */ 386 csBundle(0).srcType(0) := SrcType.fp 387 csBundle(0).srcType(1) := SrcType.imm 388 csBundle(0).lsrc(1) := 0.U 389 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 390 csBundle(0).fuType := FuType.f2v.U 391 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 392 csBundle(0).rfWen := false.B 393 csBundle(0).fpWen := false.B 394 csBundle(0).vecWen := true.B 395 396 for (i <- 0 until MAX_VLMUL / 2) { 397 csBundle(2 * i + 1).srcType(0) := SrcType.vp 398 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 399 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 400 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 401 csBundle(2 * i + 1).ldest := dest + (2 * i).U 402 csBundle(2 * i + 1).uopIdx := (2 * i).U 403 csBundle(2 * i + 2).srcType(0) := SrcType.vp 404 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 405 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 406 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 407 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 408 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 409 } 410 } 411 is(UopSplitType.VEC_WVW) { 412 for (i <- 0 until MAX_VLMUL / 2) { 413 csBundle(2 * i).lsrc(0) := src1 + i.U 414 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 415 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 416 csBundle(2 * i).ldest := dest + (2 * i).U 417 csBundle(2 * i).uopIdx := (2 * i).U 418 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 419 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 420 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 421 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 422 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 423 } 424 } 425 is(UopSplitType.VEC_VXW) { 426 /* 427 i to vector move 428 */ 429 csBundle(0).srcType(0) := SrcType.reg 430 csBundle(0).srcType(1) := SrcType.imm 431 csBundle(0).lsrc(1) := 0.U 432 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 433 csBundle(0).fuType := FuType.i2v.U 434 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 435 csBundle(0).vecWen := true.B 436 437 for (i <- 0 until MAX_VLMUL / 2) { 438 csBundle(2 * i + 1).srcType(0) := SrcType.vp 439 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 440 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 441 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 442 csBundle(2 * i + 1).ldest := dest + (2 * i).U 443 csBundle(2 * i + 1).uopIdx := (2 * i).U 444 csBundle(2 * i + 2).srcType(0) := SrcType.vp 445 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 446 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 447 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 448 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 449 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 450 } 451 } 452 is(UopSplitType.VEC_WXW) { 453 /* 454 i to vector move 455 */ 456 csBundle(0).srcType(0) := SrcType.reg 457 csBundle(0).srcType(1) := SrcType.imm 458 csBundle(0).lsrc(1) := 0.U 459 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 460 csBundle(0).fuType := FuType.i2v.U 461 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 462 csBundle(0).vecWen := true.B 463 464 for (i <- 0 until MAX_VLMUL / 2) { 465 csBundle(2 * i + 1).srcType(0) := SrcType.vp 466 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 467 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 468 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 469 csBundle(2 * i + 1).ldest := dest + (2 * i).U 470 csBundle(2 * i + 1).uopIdx := (2 * i).U 471 csBundle(2 * i + 2).srcType(0) := SrcType.vp 472 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 473 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 474 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 475 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 476 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 477 } 478 } 479 is(UopSplitType.VEC_WVV) { 480 for (i <- 0 until MAX_VLMUL / 2) { 481 482 csBundle(2 * i).lsrc(0) := src1 + i.U 483 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 484 csBundle(2 * i).lsrc(2) := dest + i.U 485 csBundle(2 * i).ldest := dest + i.U 486 csBundle(2 * i).uopIdx := (2 * i).U 487 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 488 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 489 csBundle(2 * i + 1).lsrc(2) := dest + i.U 490 csBundle(2 * i + 1).ldest := dest + i.U 491 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 492 } 493 } 494 is(UopSplitType.VEC_WFW) { 495 /* 496 f to vector move 497 */ 498 csBundle(0).srcType(0) := SrcType.fp 499 csBundle(0).srcType(1) := SrcType.imm 500 csBundle(0).lsrc(1) := 0.U 501 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 502 csBundle(0).fuType := FuType.f2v.U 503 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 504 csBundle(0).rfWen := false.B 505 csBundle(0).fpWen := false.B 506 csBundle(0).vecWen := true.B 507 508 for (i <- 0 until MAX_VLMUL / 2) { 509 csBundle(2 * i + 1).srcType(0) := SrcType.vp 510 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 511 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 512 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 513 csBundle(2 * i + 1).ldest := dest + (2 * i).U 514 csBundle(2 * i + 1).uopIdx := (2 * i).U 515 csBundle(2 * i + 2).srcType(0) := SrcType.vp 516 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 517 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 518 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 519 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 520 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 521 } 522 } 523 is(UopSplitType.VEC_WXV) { 524 /* 525 i to vector move 526 */ 527 csBundle(0).srcType(0) := SrcType.reg 528 csBundle(0).srcType(1) := SrcType.imm 529 csBundle(0).lsrc(1) := 0.U 530 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 531 csBundle(0).fuType := FuType.i2v.U 532 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 533 csBundle(0).vecWen := true.B 534 535 for (i <- 0 until MAX_VLMUL / 2) { 536 csBundle(2 * i + 1).srcType(0) := SrcType.vp 537 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 538 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 539 csBundle(2 * i + 1).lsrc(2) := dest + i.U 540 csBundle(2 * i + 1).ldest := dest + i.U 541 csBundle(2 * i + 1).uopIdx := (2 * i).U 542 csBundle(2 * i + 2).srcType(0) := SrcType.vp 543 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 544 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 545 csBundle(2 * i + 2).lsrc(2) := dest + i.U 546 csBundle(2 * i + 2).ldest := dest + i.U 547 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 548 } 549 } 550 is(UopSplitType.VEC_VVM) { 551 csBundle(0).lsrc(2) := dest 552 csBundle(0).ldest := dest 553 csBundle(0).uopIdx := 0.U 554 for (i <- 1 until MAX_VLMUL) { 555 csBundle(i).lsrc(0) := src1 + i.U 556 csBundle(i).lsrc(1) := src2 + i.U 557 csBundle(i).lsrc(2) := dest 558 csBundle(i).ldest := dest 559 csBundle(i).uopIdx := i.U 560 } 561 } 562 is(UopSplitType.VEC_VFM) { 563 /* 564 f to vector move 565 */ 566 csBundle(0).srcType(0) := SrcType.fp 567 csBundle(0).srcType(1) := SrcType.imm 568 csBundle(0).lsrc(1) := 0.U 569 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 570 csBundle(0).fuType := FuType.f2v.U 571 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 572 csBundle(0).rfWen := false.B 573 csBundle(0).fpWen := false.B 574 csBundle(0).vecWen := true.B 575 //LMUL 576 csBundle(1).srcType(0) := SrcType.vp 577 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 578 csBundle(1).lsrc(2) := dest 579 csBundle(1).ldest := dest 580 csBundle(1).uopIdx := 0.U 581 for (i <- 1 until MAX_VLMUL) { 582 csBundle(i + 1).srcType(0) := SrcType.vp 583 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 584 csBundle(i + 1).lsrc(1) := src2 + i.U 585 csBundle(i + 1).lsrc(2) := dest 586 csBundle(i + 1).ldest := dest 587 csBundle(i + 1).uopIdx := i.U 588 } 589 csBundle(numOfUop - 1.U).ldest := dest 590 } 591 is(UopSplitType.VEC_VXM) { 592 /* 593 i to vector move 594 */ 595 csBundle(0).srcType(0) := SrcType.reg 596 csBundle(0).srcType(1) := SrcType.imm 597 csBundle(0).lsrc(1) := 0.U 598 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 599 csBundle(0).fuType := FuType.i2v.U 600 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 601 csBundle(0).vecWen := true.B 602 //LMUL 603 csBundle(1).srcType(0) := SrcType.vp 604 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 605 csBundle(1).lsrc(2) := dest 606 csBundle(1).ldest := dest 607 csBundle(1).uopIdx := 0.U 608 for (i <- 1 until MAX_VLMUL) { 609 csBundle(i + 1).srcType(0) := SrcType.vp 610 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 611 csBundle(i + 1).lsrc(1) := src2 + i.U 612 csBundle(i + 1).lsrc(2) := dest 613 csBundle(i + 1).ldest := dest 614 csBundle(i + 1).uopIdx := i.U 615 } 616 csBundle(numOfUop - 1.U).ldest := dest 617 } 618 is(UopSplitType.VEC_SLIDE1UP) { 619 /* 620 i to vector move 621 */ 622 csBundle(0).srcType(0) := SrcType.reg 623 csBundle(0).srcType(1) := SrcType.imm 624 csBundle(0).lsrc(1) := 0.U 625 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 626 csBundle(0).fuType := FuType.i2v.U 627 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 628 csBundle(0).vecWen := true.B 629 //LMUL 630 csBundle(1).srcType(0) := SrcType.vp 631 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 632 csBundle(1).lsrc(2) := dest 633 csBundle(1).ldest := dest 634 csBundle(1).uopIdx := 0.U 635 for (i <- 1 until MAX_VLMUL) { 636 csBundle(i + 1).srcType(0) := SrcType.vp 637 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 638 csBundle(i + 1).lsrc(1) := src2 + i.U 639 csBundle(i + 1).lsrc(2) := dest + i.U 640 csBundle(i + 1).ldest := dest + i.U 641 csBundle(i + 1).uopIdx := i.U 642 } 643 } 644 is(UopSplitType.VEC_FSLIDE1UP) { 645 /* 646 i to vector move 647 */ 648 csBundle(0).srcType(0) := SrcType.fp 649 csBundle(0).srcType(1) := SrcType.imm 650 csBundle(0).lsrc(1) := 0.U 651 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 652 csBundle(0).fuType := FuType.f2v.U 653 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 654 csBundle(0).rfWen := false.B 655 csBundle(0).fpWen := false.B 656 csBundle(0).vecWen := true.B 657 //LMUL 658 csBundle(1).srcType(0) := SrcType.vp 659 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 660 csBundle(1).lsrc(1) := src2 661 csBundle(1).lsrc(2) := dest 662 csBundle(1).ldest := dest 663 csBundle(1).uopIdx := 0.U 664 for (i <- 1 until MAX_VLMUL) { 665 csBundle(i + 1).srcType(0) := SrcType.vp 666 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 667 csBundle(i + 1).lsrc(1) := src2 + i.U 668 csBundle(i + 1).lsrc(2) := dest + i.U 669 csBundle(i + 1).ldest := dest + i.U 670 csBundle(i + 1).uopIdx := i.U 671 } 672 } 673 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 674 /* 675 i to vector move 676 */ 677 csBundle(0).srcType(0) := SrcType.reg 678 csBundle(0).srcType(1) := SrcType.imm 679 csBundle(0).lsrc(1) := 0.U 680 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 681 csBundle(0).fuType := FuType.i2v.U 682 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 683 csBundle(0).vecWen := true.B 684 //LMUL 685 for (i <- 0 until MAX_VLMUL) { 686 csBundle(2 * i + 1).srcType(0) := SrcType.vp 687 csBundle(2 * i + 1).srcType(1) := SrcType.vp 688 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 689 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 690 csBundle(2 * i + 1).lsrc(2) := dest + i.U 691 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 692 csBundle(2 * i + 1).uopIdx := (2 * i).U 693 if (2 * i + 2 < MAX_VLMUL * 2) { 694 csBundle(2 * i + 2).srcType(0) := SrcType.vp 695 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 696 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 697 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 698 csBundle(2 * i + 2).ldest := dest + i.U 699 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 700 } 701 } 702 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 703 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 704 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 705 } 706 is(UopSplitType.VEC_FSLIDE1DOWN) { 707 /* 708 i to vector move 709 */ 710 csBundle(0).srcType(0) := SrcType.fp 711 csBundle(0).srcType(1) := SrcType.imm 712 csBundle(0).lsrc(1) := 0.U 713 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 714 csBundle(0).fuType := FuType.f2v.U 715 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 716 csBundle(0).rfWen := false.B 717 csBundle(0).fpWen := false.B 718 csBundle(0).vecWen := true.B 719 //LMUL 720 for (i <- 0 until MAX_VLMUL) { 721 csBundle(2 * i + 1).srcType(0) := SrcType.vp 722 csBundle(2 * i + 1).srcType(1) := SrcType.vp 723 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 724 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 725 csBundle(2 * i + 1).lsrc(2) := dest + i.U 726 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 727 csBundle(2 * i + 1).uopIdx := (2 * i).U 728 if (2 * i + 2 < MAX_VLMUL * 2) { 729 csBundle(2 * i + 2).srcType(0) := SrcType.vp 730 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 731 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 732 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 733 csBundle(2 * i + 2).ldest := dest + i.U 734 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 735 } 736 } 737 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 738 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 739 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 740 } 741 is(UopSplitType.VEC_VRED) { 742 when(vlmulReg === "b001".U) { 743 csBundle(0).srcType(2) := SrcType.DC 744 csBundle(0).lsrc(0) := src2 + 1.U 745 csBundle(0).lsrc(1) := src2 746 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 747 csBundle(0).uopIdx := 0.U 748 } 749 when(vlmulReg === "b010".U) { 750 csBundle(0).srcType(2) := SrcType.DC 751 csBundle(0).lsrc(0) := src2 + 1.U 752 csBundle(0).lsrc(1) := src2 753 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 754 csBundle(0).uopIdx := 0.U 755 756 csBundle(1).srcType(2) := SrcType.DC 757 csBundle(1).lsrc(0) := src2 + 3.U 758 csBundle(1).lsrc(1) := src2 + 2.U 759 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 760 csBundle(1).uopIdx := 1.U 761 762 csBundle(2).srcType(2) := SrcType.DC 763 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 764 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 765 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 766 csBundle(2).uopIdx := 2.U 767 } 768 when(vlmulReg === "b011".U) { 769 for (i <- 0 until MAX_VLMUL) { 770 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 771 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 772 csBundle(i).lsrc(1) := src2 + (i * 2).U 773 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 774 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 775 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 776 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 777 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 778 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 779 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 780 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 781 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 782 } 783 csBundle(i).srcType(2) := SrcType.DC 784 csBundle(i).uopIdx := i.U 785 } 786 } 787 when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) { 788 /* 789 * 2 <= vlmul <= 8 790 */ 791 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 792 csBundle(numOfUop - 1.U).lsrc(0) := src1 793 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 794 csBundle(numOfUop - 1.U).lsrc(2) := dest 795 csBundle(numOfUop - 1.U).ldest := dest 796 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 797 } 798 } 799 is(UopSplitType.VEC_VFRED) { 800 val vlmul = vlmulReg 801 val vsew = vsewReg 802 when(vlmul === VLmul.m8){ 803 for (i <- 0 until 4) { 804 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 805 csBundle(i).lsrc(1) := src2 + (i * 2).U 806 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 807 csBundle(i).uopIdx := i.U 808 } 809 for (i <- 4 until 6) { 810 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 811 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 812 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 813 csBundle(i).uopIdx := i.U 814 } 815 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 816 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 817 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 818 csBundle(6).uopIdx := 6.U 819 when(vsew === VSew.e64) { 820 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 821 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 822 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 823 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 824 csBundle(7).uopIdx := 7.U 825 csBundle(8).lsrc(0) := src1 826 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 827 csBundle(8).ldest := dest 828 csBundle(8).uopIdx := 8.U 829 } 830 when(vsew === VSew.e32) { 831 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 832 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 833 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 834 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 835 csBundle(7).uopIdx := 7.U 836 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 837 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 838 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 839 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 840 csBundle(8).uopIdx := 8.U 841 csBundle(9).lsrc(0) := src1 842 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 843 csBundle(9).ldest := dest 844 csBundle(9).uopIdx := 9.U 845 } 846 when(vsew === VSew.e16) { 847 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 848 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 849 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 850 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 851 csBundle(7).uopIdx := 7.U 852 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 853 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 854 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 855 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 856 csBundle(8).uopIdx := 8.U 857 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 858 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 859 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 860 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 861 csBundle(9).uopIdx := 9.U 862 csBundle(10).lsrc(0) := src1 863 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 864 csBundle(10).ldest := dest 865 csBundle(10).uopIdx := 10.U 866 } 867 } 868 when(vlmul === VLmul.m4) { 869 for (i <- 0 until 2) { 870 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 871 csBundle(i).lsrc(1) := src2 + (i * 2).U 872 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 873 csBundle(i).uopIdx := i.U 874 } 875 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 876 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 877 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 878 csBundle(2).uopIdx := 2.U 879 when(vsew === VSew.e64) { 880 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 881 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 882 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 883 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 884 csBundle(3).uopIdx := 3.U 885 csBundle(4).lsrc(0) := src1 886 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 887 csBundle(4).ldest := dest 888 csBundle(4).uopIdx := 4.U 889 } 890 when(vsew === VSew.e32) { 891 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 892 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 893 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 894 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 895 csBundle(3).uopIdx := 3.U 896 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 897 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 898 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 899 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 900 csBundle(4).uopIdx := 4.U 901 csBundle(5).lsrc(0) := src1 902 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 903 csBundle(5).ldest := dest 904 csBundle(5).uopIdx := 5.U 905 } 906 when(vsew === VSew.e16) { 907 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 908 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 909 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 910 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 911 csBundle(3).uopIdx := 3.U 912 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 913 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 914 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 915 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 916 csBundle(4).uopIdx := 4.U 917 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 918 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 919 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 920 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 921 csBundle(5).uopIdx := 5.U 922 csBundle(6).lsrc(0) := src1 923 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 924 csBundle(6).ldest := dest 925 csBundle(6).uopIdx := 6.U 926 } 927 } 928 when(vlmul === VLmul.m2) { 929 csBundle(0).lsrc(0) := src2 + 1.U 930 csBundle(0).lsrc(1) := src2 + 0.U 931 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 932 csBundle(0).uopIdx := 0.U 933 when(vsew === VSew.e64) { 934 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 935 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 936 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 937 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 938 csBundle(1).uopIdx := 1.U 939 csBundle(2).lsrc(0) := src1 940 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 941 csBundle(2).ldest := dest 942 csBundle(2).uopIdx := 2.U 943 } 944 when(vsew === VSew.e32) { 945 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 946 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 947 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 948 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 949 csBundle(1).uopIdx := 1.U 950 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 951 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 952 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 953 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 954 csBundle(2).uopIdx := 2.U 955 csBundle(3).lsrc(0) := src1 956 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 957 csBundle(3).ldest := dest 958 csBundle(3).uopIdx := 3.U 959 } 960 when(vsew === VSew.e16) { 961 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 962 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 963 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 964 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 965 csBundle(1).uopIdx := 1.U 966 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 967 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 968 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 969 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 970 csBundle(2).uopIdx := 2.U 971 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 972 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 973 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 974 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 975 csBundle(3).uopIdx := 3.U 976 csBundle(4).lsrc(0) := src1 977 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 978 csBundle(4).ldest := dest 979 csBundle(4).uopIdx := 4.U 980 } 981 } 982 when(vlmul === VLmul.m1) { 983 when(vsew === VSew.e64) { 984 csBundle(0).lsrc(0) := src2 985 csBundle(0).lsrc(1) := src2 986 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 987 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 988 csBundle(0).uopIdx := 0.U 989 csBundle(1).lsrc(0) := src1 990 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 991 csBundle(1).ldest := dest 992 csBundle(1).uopIdx := 1.U 993 } 994 when(vsew === VSew.e32) { 995 csBundle(0).lsrc(0) := src2 996 csBundle(0).lsrc(1) := src2 997 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 998 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 999 csBundle(0).uopIdx := 0.U 1000 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1001 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1002 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1003 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1004 csBundle(1).uopIdx := 1.U 1005 csBundle(2).lsrc(0) := src1 1006 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1007 csBundle(2).ldest := dest 1008 csBundle(2).uopIdx := 2.U 1009 } 1010 when(vsew === VSew.e16) { 1011 csBundle(0).lsrc(0) := src2 1012 csBundle(0).lsrc(1) := src2 1013 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1014 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1015 csBundle(0).uopIdx := 0.U 1016 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1017 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1018 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1019 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1020 csBundle(1).uopIdx := 1.U 1021 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1022 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1023 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1024 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 1025 csBundle(2).uopIdx := 2.U 1026 csBundle(3).lsrc(0) := src1 1027 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1028 csBundle(3).ldest := dest 1029 csBundle(3).uopIdx := 3.U 1030 } 1031 } 1032 when(vlmul === VLmul.mf2) { 1033 when(vsew === VSew.e32) { 1034 csBundle(0).lsrc(0) := src2 1035 csBundle(0).lsrc(1) := src2 1036 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1037 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1038 csBundle(0).uopIdx := 0.U 1039 csBundle(1).lsrc(0) := src1 1040 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1041 csBundle(1).ldest := dest 1042 csBundle(1).uopIdx := 1.U 1043 } 1044 when(vsew === VSew.e16) { 1045 csBundle(0).lsrc(0) := src2 1046 csBundle(0).lsrc(1) := src2 1047 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1048 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1049 csBundle(0).uopIdx := 0.U 1050 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1051 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1052 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1053 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 1054 csBundle(1).uopIdx := 1.U 1055 csBundle(2).lsrc(0) := src1 1056 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1057 csBundle(2).ldest := dest 1058 csBundle(2).uopIdx := 2.U 1059 } 1060 } 1061 when(vlmul === VLmul.mf4) { 1062 when(vsew === VSew.e16) { 1063 csBundle(0).lsrc(0) := src2 1064 csBundle(0).lsrc(1) := src2 1065 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1066 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 1067 csBundle(0).uopIdx := 0.U 1068 csBundle(1).lsrc(0) := src1 1069 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1070 csBundle(1).ldest := dest 1071 csBundle(1).uopIdx := 1.U 1072 } 1073 } 1074 } 1075 1076 is(UopSplitType.VEC_VFREDOSUM) { 1077 import yunsuan.VfaluType 1078 val vlmul = vlmulReg 1079 val vsew = vsewReg 1080 val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum 1081 when(vlmul === VLmul.m8) { 1082 when(vsew === VSew.e64) { 1083 val vlmax = 16 1084 for (i <- 0 until vlmax) { 1085 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1086 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1087 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1088 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1089 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1090 csBundle(i).uopIdx := i.U 1091 } 1092 } 1093 when(vsew === VSew.e32) { 1094 val vlmax = 32 1095 for (i <- 0 until vlmax) { 1096 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1097 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1098 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1099 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1100 csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B) 1101 csBundle(i).uopIdx := i.U 1102 } 1103 } 1104 when(vsew === VSew.e16) { 1105 val vlmax = 64 1106 for (i <- 0 until vlmax) { 1107 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1108 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1109 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1110 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1111 csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B) 1112 csBundle(i).uopIdx := i.U 1113 } 1114 } 1115 } 1116 when(vlmul === VLmul.m4) { 1117 when(vsew === VSew.e64) { 1118 val vlmax = 8 1119 for (i <- 0 until vlmax) { 1120 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1121 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1122 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1123 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1124 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1125 csBundle(i).uopIdx := i.U 1126 } 1127 } 1128 when(vsew === VSew.e32) { 1129 val vlmax = 16 1130 for (i <- 0 until vlmax) { 1131 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1132 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1133 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1134 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1135 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1136 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1137 csBundle(i).uopIdx := i.U 1138 } 1139 } 1140 when(vsew === VSew.e16) { 1141 val vlmax = 32 1142 for (i <- 0 until vlmax) { 1143 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1144 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1145 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1146 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1147 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1148 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1149 csBundle(i).uopIdx := i.U 1150 } 1151 } 1152 } 1153 when(vlmul === VLmul.m2) { 1154 when(vsew === VSew.e64) { 1155 val vlmax = 4 1156 for (i <- 0 until vlmax) { 1157 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1158 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1159 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1160 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1161 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1162 csBundle(i).uopIdx := i.U 1163 } 1164 } 1165 when(vsew === VSew.e32) { 1166 val vlmax = 8 1167 for (i <- 0 until vlmax) { 1168 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1169 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1170 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1171 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1172 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1173 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1174 csBundle(i).uopIdx := i.U 1175 } 1176 } 1177 when(vsew === VSew.e16) { 1178 val vlmax = 16 1179 for (i <- 0 until vlmax) { 1180 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1181 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1182 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1183 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1184 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1185 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1186 csBundle(i).uopIdx := i.U 1187 } 1188 } 1189 } 1190 when(vlmul === VLmul.m1) { 1191 when(vsew === VSew.e64) { 1192 val vlmax = 2 1193 for (i <- 0 until vlmax) { 1194 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1195 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1196 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1197 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1198 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1199 csBundle(i).uopIdx := i.U 1200 } 1201 } 1202 when(vsew === VSew.e32) { 1203 val vlmax = 4 1204 for (i <- 0 until vlmax) { 1205 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1206 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1207 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1208 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1209 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1210 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1211 csBundle(i).uopIdx := i.U 1212 } 1213 } 1214 when(vsew === VSew.e16) { 1215 val vlmax = 8 1216 for (i <- 0 until vlmax) { 1217 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1218 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1219 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1220 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1221 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1222 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1223 csBundle(i).uopIdx := i.U 1224 } 1225 } 1226 } 1227 when(vlmul === VLmul.mf2) { 1228 when(vsew === VSew.e32) { 1229 val vlmax = 2 1230 for (i <- 0 until vlmax) { 1231 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1232 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1233 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1234 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1235 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1236 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1237 csBundle(i).uopIdx := i.U 1238 } 1239 } 1240 when(vsew === VSew.e16) { 1241 val vlmax = 4 1242 for (i <- 0 until vlmax) { 1243 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1244 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1245 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1246 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1247 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1248 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1249 csBundle(i).uopIdx := i.U 1250 } 1251 } 1252 } 1253 when(vlmul === VLmul.mf4) { 1254 when(vsew === VSew.e16) { 1255 val vlmax = 2 1256 for (i <- 0 until vlmax) { 1257 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1258 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1259 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1260 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1261 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1262 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1263 csBundle(i).uopIdx := i.U 1264 } 1265 } 1266 } 1267 } 1268 1269 is(UopSplitType.VEC_SLIDEUP) { 1270 // i to vector move 1271 csBundle(0).srcType(0) := SrcType.reg 1272 csBundle(0).srcType(1) := SrcType.imm 1273 csBundle(0).lsrc(1) := 0.U 1274 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1275 csBundle(0).fuType := FuType.i2v.U 1276 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1277 csBundle(0).vecWen := true.B 1278 // LMUL 1279 for (i <- 0 until MAX_VLMUL) 1280 for (j <- 0 to i) { 1281 val old_vd = if (j == 0) { 1282 dest + i.U 1283 } else (VECTOR_TMP_REG_LMUL + j).U 1284 val vd = if (j == i) { 1285 dest + i.U 1286 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1287 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1288 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1289 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1290 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1291 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1292 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1293 } 1294 } 1295 1296 is(UopSplitType.VEC_SLIDEDOWN) { 1297 // i to vector move 1298 csBundle(0).srcType(0) := SrcType.reg 1299 csBundle(0).srcType(1) := SrcType.imm 1300 csBundle(0).lsrc(1) := 0.U 1301 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1302 csBundle(0).fuType := FuType.i2v.U 1303 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1304 csBundle(0).vecWen := true.B 1305 // LMUL 1306 for (i <- 0 until MAX_VLMUL) 1307 for (j <- (0 to i).reverse) { 1308 when(i.U < lmul) { 1309 val old_vd = if (j == 0) { 1310 dest + lmul - 1.U - i.U 1311 } else (VECTOR_TMP_REG_LMUL + j).U 1312 val vd = if (j == i) { 1313 dest + lmul - 1.U - i.U 1314 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1315 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1316 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1317 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1318 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1319 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1320 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1321 } 1322 } 1323 } 1324 1325 is(UopSplitType.VEC_M0X) { 1326 // LMUL 1327 for (i <- 0 until MAX_VLMUL) { 1328 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1329 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1330 csBundle(i).srcType(0) := srcType0 1331 csBundle(i).srcType(1) := SrcType.vp 1332 csBundle(i).rfWen := false.B 1333 csBundle(i).fpWen := false.B 1334 csBundle(i).vecWen := true.B 1335 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1336 csBundle(i).lsrc(1) := src2 1337 // csBundle(i).lsrc(2) := dest + i.U DontCare 1338 csBundle(i).ldest := ldest 1339 csBundle(i).uopIdx := i.U 1340 } 1341 csBundle(lmul - 1.U).rfWen := true.B 1342 csBundle(lmul - 1.U).fpWen := false.B 1343 csBundle(lmul - 1.U).vecWen := false.B 1344 csBundle(lmul - 1.U).ldest := dest 1345 } 1346 1347 is(UopSplitType.VEC_MVV) { 1348 // LMUL 1349 for (i <- 0 until MAX_VLMUL) { 1350 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1351 csBundle(i * 2 + 0).srcType(0) := srcType0 1352 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1353 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1354 csBundle(i * 2 + 0).lsrc(1) := src2 1355 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1356 csBundle(i * 2 + 0).ldest := dest + i.U 1357 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1358 1359 csBundle(i * 2 + 1).srcType(0) := srcType0 1360 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1361 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1362 csBundle(i * 2 + 1).lsrc(1) := src2 1363 // csBundle(i).lsrc(2) := dest + i.U DontCare 1364 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1365 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1366 } 1367 } 1368 1369 is(UopSplitType.VEC_M0X_VFIRST) { 1370 // LMUL 1371 csBundle(0).rfWen := true.B 1372 csBundle(0).fpWen := false.B 1373 csBundle(0).vecWen := false.B 1374 csBundle(0).ldest := dest 1375 } 1376 is(UopSplitType.VEC_VWW) { 1377 for (i <- 0 until MAX_VLMUL*2) { 1378 when(i.U < lmul){ 1379 csBundle(i).srcType(2) := SrcType.DC 1380 csBundle(i).lsrc(0) := src2 + i.U 1381 csBundle(i).lsrc(1) := src2 + i.U 1382 // csBundle(i).lsrc(2) := dest + (2 * i).U 1383 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1384 csBundle(i).uopIdx := i.U 1385 } otherwise { 1386 csBundle(i).srcType(2) := SrcType.DC 1387 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1388 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1389 // csBundle(i).lsrc(2) := dest + (2 * i).U 1390 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1391 csBundle(i).uopIdx := i.U 1392 } 1393 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1394 csBundle(numOfUop-1.U).lsrc(0) := src1 1395 csBundle(numOfUop-1.U).lsrc(2) := dest 1396 csBundle(numOfUop-1.U).ldest := dest 1397 } 1398 } 1399 is(UopSplitType.VEC_RGATHER) { 1400 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1401 for (i <- 0 until len) 1402 for (j <- 0 until len) { 1403 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1404 // csBundle(i * len + j).srcType(1) := SrcType.vp 1405 // csBundle(i * len + j).srcType(2) := SrcType.vp 1406 csBundle(i * len + j).lsrc(0) := src1 + i.U 1407 csBundle(i * len + j).lsrc(1) := src2 + j.U 1408 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1409 csBundle(i * len + j).lsrc(2) := vd_old 1410 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1411 csBundle(i * len + j).ldest := vd 1412 csBundle(i * len + j).uopIdx := (i * len + j).U 1413 } 1414 } 1415 switch(vlmulReg) { 1416 is("b001".U ){ 1417 genCsBundle_VEC_RGATHER(2) 1418 } 1419 is("b010".U ){ 1420 genCsBundle_VEC_RGATHER(4) 1421 } 1422 is("b011".U ){ 1423 genCsBundle_VEC_RGATHER(8) 1424 } 1425 } 1426 } 1427 is(UopSplitType.VEC_RGATHER_VX) { 1428 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1429 for (i <- 0 until len) 1430 for (j <- 0 until len) { 1431 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1432 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1433 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1434 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1435 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1436 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1437 csBundle(i * len + j + 1).lsrc(2) := vd_old 1438 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1439 csBundle(i * len + j + 1).ldest := vd 1440 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1441 } 1442 } 1443 // i to vector move 1444 csBundle(0).srcType(0) := SrcType.reg 1445 csBundle(0).srcType(1) := SrcType.imm 1446 csBundle(0).lsrc(1) := 0.U 1447 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1448 csBundle(0).fuType := FuType.i2v.U 1449 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1450 csBundle(0).rfWen := false.B 1451 csBundle(0).fpWen := false.B 1452 csBundle(0).vecWen := true.B 1453 genCsBundle_RGATHER_VX(1) 1454 switch(vlmulReg) { 1455 is("b001".U ){ 1456 genCsBundle_RGATHER_VX(2) 1457 } 1458 is("b010".U ){ 1459 genCsBundle_RGATHER_VX(4) 1460 } 1461 is("b011".U ){ 1462 genCsBundle_RGATHER_VX(8) 1463 } 1464 } 1465 } 1466 is(UopSplitType.VEC_RGATHEREI16) { 1467 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1468 for (i <- 0 until len) 1469 for (j <- 0 until len) { 1470 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1471 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1472 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1473 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1474 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1475 csBundle((i * len + j)*2+0).ldest := vd0 1476 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1477 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1478 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1479 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1480 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1481 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1482 csBundle((i * len + j)*2+1).ldest := vd1 1483 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1484 } 1485 } 1486 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1487 for (i <- 0 until len) 1488 for (j <- 0 until len) { 1489 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1490 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1491 csBundle(i * len + j).lsrc(0) := src1 + i.U 1492 csBundle(i * len + j).lsrc(1) := src2 + j.U 1493 csBundle(i * len + j).lsrc(2) := vd_old 1494 csBundle(i * len + j).ldest := vd 1495 csBundle(i * len + j).uopIdx := (i * len + j).U 1496 } 1497 } 1498 def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={ 1499 for (i <- 0 until len) 1500 for (j <- 0 until len) { 1501 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1502 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1503 csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U 1504 csBundle(i * len + j).lsrc(1) := src2 + j.U 1505 csBundle(i * len + j).lsrc(2) := vd_old 1506 csBundle(i * len + j).ldest := vd 1507 csBundle(i * len + j).uopIdx := (i * len + j).U 1508 } 1509 } 1510 def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={ 1511 for (i <- 0 until len) 1512 for (j <- 0 until len) { 1513 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1514 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1515 csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U 1516 csBundle(i * len + j).lsrc(1) := src2 + j.U 1517 csBundle(i * len + j).lsrc(2) := vd_old 1518 csBundle(i * len + j).ldest := vd 1519 csBundle(i * len + j).uopIdx := (i * len + j).U 1520 } 1521 } 1522 when(!vsewReg.orR){ 1523 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1524 }.elsewhen(vsewReg === VSew.e32){ 1525 genCsBundle_VEC_RGATHEREI16_SEW32(1) 1526 }.elsewhen(vsewReg === VSew.e64){ 1527 genCsBundle_VEC_RGATHEREI16_SEW64(1) 1528 }.otherwise{ 1529 genCsBundle_VEC_RGATHEREI16(1) 1530 } 1531 switch(vlmulReg) { 1532 is("b001".U) { 1533 when(!vsewReg.orR) { 1534 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1535 }.elsewhen(vsewReg === VSew.e32){ 1536 genCsBundle_VEC_RGATHEREI16_SEW32(2) 1537 }.elsewhen(vsewReg === VSew.e64){ 1538 genCsBundle_VEC_RGATHEREI16_SEW64(2) 1539 }.otherwise{ 1540 genCsBundle_VEC_RGATHEREI16(2) 1541 } 1542 } 1543 is("b010".U) { 1544 when(!vsewReg.orR) { 1545 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1546 }.elsewhen(vsewReg === VSew.e32){ 1547 genCsBundle_VEC_RGATHEREI16_SEW32(4) 1548 }.elsewhen(vsewReg === VSew.e64){ 1549 genCsBundle_VEC_RGATHEREI16_SEW64(4) 1550 }.otherwise{ 1551 genCsBundle_VEC_RGATHEREI16(4) 1552 } 1553 } 1554 is("b011".U) { 1555 when(vsewReg === VSew.e32){ 1556 genCsBundle_VEC_RGATHEREI16_SEW32(8) 1557 }.elsewhen(vsewReg === VSew.e64){ 1558 genCsBundle_VEC_RGATHEREI16_SEW64(8) 1559 }.otherwise{ 1560 genCsBundle_VEC_RGATHEREI16(8) 1561 } 1562 } 1563 } 1564 } 1565 is(UopSplitType.VEC_COMPRESS) { 1566 def genCsBundle_VEC_COMPRESS(len:Int): Unit = { 1567 for (i <- 0 until len) { 1568 val jlen = if (i == len-1) i+1 else i+2 1569 for (j <- 0 until jlen) { 1570 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1571 val vd = if(i==len-1) (dest + j.U) else { 1572 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1573 } 1574 val src13Type = if (j == i+1) DontCare else SrcType.vp 1575 csBundle(i*(i+3)/2 + j).srcType(0) := src13Type 1576 csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp 1577 csBundle(i*(i+3)/2 + j).srcType(2) := src13Type 1578 csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp 1579 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1580 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1581 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1582 csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U 1583 csBundle(i*(i+3)/2 + j).ldest := vd 1584 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1585 } 1586 } 1587 } 1588 switch(vlmulReg) { 1589 is("b001".U ){ 1590 genCsBundle_VEC_COMPRESS(2) 1591 } 1592 is("b010".U ){ 1593 genCsBundle_VEC_COMPRESS(4) 1594 } 1595 is("b011".U ){ 1596 genCsBundle_VEC_COMPRESS(8) 1597 } 1598 } 1599 } 1600 is(UopSplitType.VEC_MVNR) { 1601 for (i <- 0 until MAX_VLMUL) { 1602 csBundle(i).lsrc(0) := src1 + i.U 1603 csBundle(i).lsrc(1) := src2 + i.U 1604 csBundle(i).lsrc(2) := dest + i.U 1605 csBundle(i).ldest := dest + i.U 1606 csBundle(i).uopIdx := i.U 1607 } 1608 } 1609 is(UopSplitType.VEC_US_LDST) { 1610 /* 1611 FMV.D.X 1612 */ 1613 csBundle(0).srcType(0) := SrcType.reg 1614 csBundle(0).srcType(1) := SrcType.imm 1615 csBundle(0).lsrc(1) := 0.U 1616 csBundle(0).ldest := FP_TMP_REG_MV.U 1617 csBundle(0).fuType := FuType.i2f.U 1618 csBundle(0).rfWen := false.B 1619 csBundle(0).fpWen := true.B 1620 csBundle(0).vecWen := false.B 1621 csBundle(0).fpu.isAddSub := false.B 1622 csBundle(0).fpu.typeTagIn := FPU.D 1623 csBundle(0).fpu.typeTagOut := FPU.D 1624 csBundle(0).fpu.fromInt := true.B 1625 csBundle(0).fpu.wflags := false.B 1626 csBundle(0).fpu.fpWen := true.B 1627 csBundle(0).fpu.div := false.B 1628 csBundle(0).fpu.sqrt := false.B 1629 csBundle(0).fpu.fcvt := false.B 1630 csBundle(0).vlsInstr := true.B 1631 //LMUL 1632 for (i <- 0 until MAX_VLMUL) { 1633 csBundle(i + 1).srcType(0) := SrcType.fp 1634 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1635 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1636 csBundle(i + 1).ldest := dest + i.U 1637 csBundle(i + 1).uopIdx := i.U 1638 csBundle(i + 1).vlsInstr := true.B 1639 } 1640 } 1641 is(UopSplitType.VEC_S_LDST) { 1642 /* 1643 FMV.D.X 1644 */ 1645 csBundle(0).srcType(0) := SrcType.reg 1646 csBundle(0).srcType(1) := SrcType.imm 1647 csBundle(0).lsrc(1) := 0.U 1648 csBundle(0).ldest := FP_TMP_REG_MV.U 1649 csBundle(0).fuType := FuType.i2f.U 1650 csBundle(0).rfWen := false.B 1651 csBundle(0).fpWen := true.B 1652 csBundle(0).vecWen := false.B 1653 csBundle(0).fpu.isAddSub := false.B 1654 csBundle(0).fpu.typeTagIn := FPU.D 1655 csBundle(0).fpu.typeTagOut := FPU.D 1656 csBundle(0).fpu.fromInt := true.B 1657 csBundle(0).fpu.wflags := false.B 1658 csBundle(0).fpu.fpWen := true.B 1659 csBundle(0).fpu.div := false.B 1660 csBundle(0).fpu.sqrt := false.B 1661 csBundle(0).fpu.fcvt := false.B 1662 csBundle(0).vlsInstr := true.B 1663 1664 csBundle(1).srcType(0) := SrcType.reg 1665 csBundle(1).srcType(1) := SrcType.imm 1666 csBundle(1).lsrc(0) := latchedInst.lsrc(1) 1667 csBundle(1).lsrc(1) := 0.U 1668 csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U 1669 csBundle(1).fuType := FuType.i2f.U 1670 csBundle(1).rfWen := false.B 1671 csBundle(1).fpWen := true.B 1672 csBundle(1).vecWen := false.B 1673 csBundle(1).fpu.isAddSub := false.B 1674 csBundle(1).fpu.typeTagIn := FPU.D 1675 csBundle(1).fpu.typeTagOut := FPU.D 1676 csBundle(1).fpu.fromInt := true.B 1677 csBundle(1).fpu.wflags := false.B 1678 csBundle(1).fpu.fpWen := true.B 1679 csBundle(1).fpu.div := false.B 1680 csBundle(1).fpu.sqrt := false.B 1681 csBundle(1).fpu.fcvt := false.B 1682 csBundle(1).vlsInstr := true.B 1683 1684 //LMUL 1685 for (i <- 0 until MAX_VLMUL) { 1686 csBundle(i + 2).srcType(0) := SrcType.fp 1687 csBundle(i + 2).srcType(1) := SrcType.fp 1688 csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U 1689 csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 1690 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1691 csBundle(i + 2).ldest := dest + i.U 1692 csBundle(i + 2).uopIdx := i.U 1693 csBundle(i + 2).vlsInstr := true.B 1694 } 1695 } 1696 is(UopSplitType.VEC_I_LDST) { 1697 /* 1698 FMV.D.X 1699 */ 1700 val vlmul = vlmulReg 1701 val vsew = Cat(0.U(1.W), vsewReg) 1702 val veew = Cat(0.U(1.W), width) 1703 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1704 val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array( 1705 "b001".U -> 1.U, 1706 "b010".U -> 2.U, 1707 "b011".U -> 3.U 1708 )) 1709 val simple_emul = MuxLookup(vemul, 0.U(2.W), Array( 1710 "b001".U -> 1.U, 1711 "b010".U -> 2.U, 1712 "b011".U -> 3.U 1713 )) 1714 csBundle(0).srcType(0) := SrcType.reg 1715 csBundle(0).srcType(1) := SrcType.imm 1716 csBundle(0).lsrc(1) := 0.U 1717 csBundle(0).ldest := FP_TMP_REG_MV.U 1718 csBundle(0).fuType := FuType.i2f.U 1719 csBundle(0).rfWen := false.B 1720 csBundle(0).fpWen := true.B 1721 csBundle(0).vecWen := false.B 1722 csBundle(0).fpu.isAddSub := false.B 1723 csBundle(0).fpu.typeTagIn := FPU.D 1724 csBundle(0).fpu.typeTagOut := FPU.D 1725 csBundle(0).fpu.fromInt := true.B 1726 csBundle(0).fpu.wflags := false.B 1727 csBundle(0).fpu.fpWen := true.B 1728 csBundle(0).fpu.div := false.B 1729 csBundle(0).fpu.sqrt := false.B 1730 csBundle(0).fpu.fcvt := false.B 1731 csBundle(0).vlsInstr := true.B 1732 1733 //LMUL 1734 for (i <- 0 until MAX_INDEXED_LS_UOPNUM) { 1735 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf) 1736 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1737 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1738 val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd 1739 csBundle(i + 1).srcType(0) := SrcType.fp 1740 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1741 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1742 /** 1743 * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and 1744 * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same 1745 * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be 1746 * deadlock for indexed instructions with emul > lmul. 1747 * 1748 * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest 1749 * N-1 uops will read temporary vector register. 1750 */ 1751 // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1752 csBundle(i + 1).lsrc(2) := Mux( 1753 isFirstUopInVd, 1754 Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)), 1755 VECTOR_TMP_REG_LMUL.U 1756 ) 1757 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1758 csBundle(i + 1).uopIdx := i.U 1759 csBundle(i + 1).vlsInstr := true.B 1760 } 1761 } 1762 } 1763 1764 //readyFromRename Counter 1765 val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U) 1766 1767 // The left uops of the complex inst in ComplexDecoder can be send out this cycle 1768 val thisAllOut = uopRes <= readyCounter 1769 1770 switch(state) { 1771 is(s_idle) { 1772 when (inValid) { 1773 stateNext := s_active 1774 uopResNext := inUopInfo.numOfUop 1775 } 1776 } 1777 is(s_active) { 1778 when (thisAllOut) { 1779 when (inValid) { 1780 stateNext := s_active 1781 uopResNext := inUopInfo.numOfUop 1782 }.otherwise { 1783 stateNext := s_idle 1784 uopResNext := 0.U 1785 } 1786 }.otherwise { 1787 stateNext := s_active 1788 uopResNext := uopRes - readyCounter 1789 } 1790 } 1791 } 1792 1793 state := Mux(io.redirect, s_idle, stateNext) 1794 uopRes := Mux(io.redirect, 0.U, uopResNext) 1795 1796 val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes) 1797 1798 for(i <- 0 until RenameWidth) { 1799 outValids(i) := complexNum > i.U 1800 outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1)) 1801 } 1802 1803 outComplexNum := Mux(state === s_active, complexNum, 0.U) 1804 inReady := state === s_idle || state === s_active && thisAllOut 1805 1806// val validSimple = Wire(Vec(DecodeWidth, Bool())) 1807// validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1808// val notInf = Wire(Vec(DecodeWidth, Bool())) 1809// notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1810// notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1811// val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1812// notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1813// 1814// complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1815// Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1816// 0.U) 1817// validToRename.zipWithIndex.foreach{ 1818// case(dst, i) => 1819// val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1820// dst := MuxCase(false.B, Seq( 1821// (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1822// (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1823// ).toSeq) 1824// } 1825// 1826// readyToIBuf.zipWithIndex.foreach { 1827// case (dst, i) => 1828// val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1829// dst := MuxCase(true.B, Seq( 1830// (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1831// (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1832// ).toSeq) 1833// } 1834// 1835// io.deq.decodedInsts := decodedInsts 1836// io.deq.complexNum := complexNum 1837// io.deq.validToRename := validToRename 1838// io.deq.readyToIBuf := readyToIBuf 1839} 1840