1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul} 34import yunsuan.VpermType 35import scala.collection.Seq 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(4.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={ 43 // only consider non segment indexed load/store 44 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 45 var offset = 1 << (emul - lmul) 46 for (i <- 0 until (1 << emul)) { 47 if (uopIdx == i) { 48 return (i, i / offset) 49 } 50 } 51 } else { // lmul > emul, uop num is depend on lmul * nf 52 var offset = 1 << (lmul - emul) 53 for (i <- 0 until (1 << lmul)) { 54 if (uopIdx == i) { 55 return (i / offset, i) 56 } 57 } 58 } 59 return (0, 0) 60 } 61 // strided load/store 62 var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq() 63 for (emul <- 0 until 4) { 64 for (lmul <- 0 until 4) { 65 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx) 66 var offsetVs2 = offset._1 67 var offsetVd = offset._2 68 combVemulNf :+= (emul, lmul, offsetVs2, offsetVd) 69 } 70 } 71 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 72 case (emul, lmul, offsetVs2, offsetVd) => 73 (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W))) 74 }, BitPat.N(6))) 75 outOffsetVs2 := out(5, 3) 76 outOffsetVd := out(2, 0) 77} 78 79trait VectorConstants { 80 val MAX_VLMUL = 8 81 val VECTOR_TMP_REG_LMUL = 33 // 33~47 -> 15 82 val VECTOR_COMPRESS = 1 // in v0 regfile 83 val MAX_INDEXED_LS_UOPNUM = 64 84} 85 86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 87 val redirect = Input(Bool()) 88 val csrCtrl = Input(new CustomCSRCtrlIO) 89 val vtypeBypass = Input(new VType) 90 // When the first inst in decode vector is complex inst, pass it in 91 val in = Flipped(DecoupledIO(new Bundle { 92 val simpleDecodedInst = new DecodedInst 93 val uopInfo = new UopInfo 94 })) 95 val out = new Bundle { 96 val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst)) 97 } 98 val complexNum = Output(UInt(3.W)) 99} 100 101/** 102 * @author zly 103 */ 104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 105 val io = IO(new DecodeUnitCompIO) 106 107 // alias 108 private val inReady = io.in.ready 109 private val inValid = io.in.valid 110 private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst) 111 private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields) 112 private val inUopInfo = io.in.bits.uopInfo 113 private val outValids = io.out.complexDecodedInsts.map(_.valid) 114 private val outReadys = io.out.complexDecodedInsts.map(_.ready) 115 private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits) 116 private val outComplexNum = io.complexNum 117 118 val maxUopSize = MaxUopSize 119 when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) { 120 when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) { 121 inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType) 122 }.elsewhen(inInstFields.RS1 === 0.U) { 123 inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType) 124 } 125 } 126 127 val latchedInst = RegEnable(inDecodedInst, inValid && inReady) 128 val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady) 129 //input bits 130 private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields) 131 132 val src1 = Cat(0.U(1.W), instFields.RS1) 133 val src2 = Cat(0.U(1.W), instFields.RS2) 134 val dest = Cat(0.U(1.W), instFields.RD) 135 136 val nf = instFields.NF 137 val width = instFields.WIDTH(1, 0) 138 139 //output of DecodeUnit 140 val numOfUop = Wire(UInt(log2Up(maxUopSize).W)) 141 val numOfWB = Wire(UInt(log2Up(maxUopSize).W)) 142 val lmul = Wire(UInt(4.W)) 143 val isVsetSimple = Wire(Bool()) 144 145 val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i))) 146 indexedLSRegOffset.map(_.src := 0.U) 147 148 //pre decode 149 lmul := latchedUopInfo.lmul 150 isVsetSimple := latchedInst.isVset 151 val vlmulReg = latchedInst.vpu.vlmul 152 val vsewReg = latchedInst.vpu.vsew 153 154 //Type of uop Div 155 val typeOfSplit = latchedInst.uopSplitType 156 val src1Type = latchedInst.srcType(0) 157 val src1IsImm = src1Type === SrcType.imm 158 val src1IsFp = src1Type === SrcType.fp 159 160 val isVstore = FuType.isVStore(latchedInst.fuType) 161 162 numOfUop := latchedUopInfo.numOfUop 163 numOfWB := latchedUopInfo.numOfWB 164 165 //uops dispatch 166 val s_idle :: s_active :: Nil = Enum(2) 167 val state = RegInit(s_idle) 168 val stateNext = WireDefault(state) 169 val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W)) 170 val uopRes = RegInit(0.U(log2Up(maxUopSize).W)) 171 val uopResNext = WireInit(uopRes) 172 val e64 = 3.U(2.W) 173 val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U) 174 val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U 175 val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U 176 177 //uop div up to maxUopSize 178 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 179 csBundle.foreach { case dst => 180 dst := latchedInst 181 dst.numUops := latchedUopInfo.numOfUop 182 dst.numWB := latchedUopInfo.numOfWB 183 dst.firstUop := false.B 184 dst.lastUop := false.B 185 dst.vlsInstr := false.B 186 } 187 188 csBundle(0).firstUop := true.B 189 csBundle(numOfUop - 1.U).lastUop := true.B 190 191 switch(typeOfSplit) { 192 is(UopSplitType.VSET) { 193 // In simple decoder, rfWen and vecWen are not set 194 when(isVsetSimple) { 195 // Default 196 // uop0 set rd, never flushPipe 197 csBundle(0).fuType := FuType.vsetiwi.U 198 csBundle(0).flushPipe := false.B 199 csBundle(0).rfWen := true.B 200 // uop1 set vl, vsetvl will flushPipe 201 csBundle(1).ldest := Vl_IDX.U 202 csBundle(1).vecWen := false.B 203 csBundle(1).vlWen := true.B 204 when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 205 // write nothing, uop0 is a nop instruction 206 csBundle(0).rfWen := false.B 207 csBundle(0).fpWen := false.B 208 csBundle(0).vecWen := false.B 209 csBundle(0).vlWen := false.B 210 csBundle(1).fuType := FuType.vsetfwf.U 211 csBundle(1).srcType(0) := SrcType.no 212 csBundle(1).srcType(2) := SrcType.no 213 csBundle(1).srcType(3) := SrcType.no 214 csBundle(1).srcType(4) := SrcType.vp 215 csBundle(1).lsrc(4) := Vl_IDX.U 216 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 217 // uop0: mv vtype gpr to vector region 218 csBundle(0).srcType(0) := SrcType.xp 219 csBundle(0).srcType(1) := SrcType.no 220 csBundle(0).lsrc(0) := src2 221 csBundle(0).lsrc(1) := 0.U 222 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 223 csBundle(0).fuType := FuType.i2v.U 224 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 225 csBundle(0).rfWen := false.B 226 csBundle(0).fpWen := false.B 227 csBundle(0).vecWen := true.B 228 csBundle(0).vlWen := false.B 229 csBundle(0).flushPipe := false.B 230 // uop1: uvsetvcfg_vv 231 csBundle(1).fuType := FuType.vsetfwf.U 232 // vl 233 csBundle(1).srcType(0) := SrcType.no 234 csBundle(1).srcType(2) := SrcType.no 235 csBundle(1).srcType(3) := SrcType.no 236 csBundle(1).srcType(4) := SrcType.vp 237 csBundle(1).lsrc(4) := Vl_IDX.U 238 // vtype 239 csBundle(1).srcType(1) := SrcType.vp 240 csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U 241 csBundle(1).vecWen := false.B 242 csBundle(1).vlWen := true.B 243 csBundle(1).ldest := Vl_IDX.U 244 }.elsewhen(dest === 0.U) { 245 // write nothing, uop0 is a nop instruction 246 csBundle(0).rfWen := false.B 247 csBundle(0).fpWen := false.B 248 csBundle(0).vecWen := false.B 249 csBundle(0).vlWen := false.B 250 } 251 // use bypass vtype from vtypeGen 252 csBundle(0).vpu.connectVType(io.vtypeBypass) 253 csBundle(1).vpu.connectVType(io.vtypeBypass) 254 } 255 } 256 is(UopSplitType.VEC_VVV) { 257 for (i <- 0 until MAX_VLMUL) { 258 csBundle(i).lsrc(0) := src1 + i.U 259 csBundle(i).lsrc(1) := src2 + i.U 260 csBundle(i).lsrc(2) := dest + i.U 261 csBundle(i).ldest := dest + i.U 262 csBundle(i).uopIdx := i.U 263 } 264 } 265 is(UopSplitType.VEC_VFV) { 266 /* 267 f to vector move 268 */ 269 csBundle(0).srcType(0) := SrcType.fp 270 csBundle(0).srcType(1) := SrcType.imm 271 csBundle(0).srcType(2) := SrcType.imm 272 csBundle(0).lsrc(1) := 0.U 273 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 274 csBundle(0).fuType := FuType.f2v.U 275 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 276 csBundle(0).vecWen := true.B 277 csBundle(0).vpu.isReverse := false.B 278 /* 279 LMUL 280 */ 281 for (i <- 0 until MAX_VLMUL) { 282 csBundle(i + 1).srcType(0) := SrcType.vp 283 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 284 csBundle(i + 1).lsrc(1) := src2 + i.U 285 csBundle(i + 1).lsrc(2) := dest + i.U 286 csBundle(i + 1).ldest := dest + i.U 287 csBundle(i + 1).uopIdx := i.U 288 } 289 } 290 is(UopSplitType.VEC_EXT2) { 291 for (i <- 0 until MAX_VLMUL / 2) { 292 csBundle(2 * i).lsrc(1) := src2 + i.U 293 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 294 csBundle(2 * i).ldest := dest + (2 * i).U 295 csBundle(2 * i).uopIdx := (2 * i).U 296 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 297 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 298 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 299 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 300 } 301 } 302 is(UopSplitType.VEC_EXT4) { 303 for (i <- 0 until MAX_VLMUL / 4) { 304 csBundle(4 * i).lsrc(1) := src2 + i.U 305 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 306 csBundle(4 * i).ldest := dest + (4 * i).U 307 csBundle(4 * i).uopIdx := (4 * i).U 308 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 309 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 310 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 311 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 312 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 313 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 314 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 315 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 316 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 317 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 318 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 319 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 320 } 321 } 322 is(UopSplitType.VEC_EXT8) { 323 for (i <- 0 until MAX_VLMUL) { 324 csBundle(i).lsrc(1) := src2 325 csBundle(i).lsrc(2) := dest + i.U 326 csBundle(i).ldest := dest + i.U 327 csBundle(i).uopIdx := i.U 328 } 329 } 330 is(UopSplitType.VEC_0XV) { 331 /* 332 i/f to vector move 333 */ 334 csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg) 335 csBundle(0).srcType(1) := SrcType.imm 336 csBundle(0).srcType(2) := SrcType.imm 337 csBundle(0).lsrc(1) := 0.U 338 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 339 csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U) 340 csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 341 csBundle(0).rfWen := false.B 342 csBundle(0).fpWen := false.B 343 csBundle(0).vecWen := true.B 344 /* 345 vmv.s.x 346 */ 347 csBundle(1).srcType(0) := SrcType.vp 348 csBundle(1).srcType(1) := SrcType.imm 349 csBundle(1).srcType(2) := SrcType.vp 350 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 351 csBundle(1).lsrc(1) := 0.U 352 csBundle(1).lsrc(2) := dest 353 csBundle(1).ldest := dest 354 csBundle(1).rfWen := false.B 355 csBundle(1).fpWen := false.B 356 csBundle(1).vecWen := true.B 357 csBundle(1).uopIdx := 0.U 358 } 359 is(UopSplitType.VEC_VXV) { 360 /* 361 i to vector move 362 */ 363 csBundle(0).srcType(0) := SrcType.reg 364 csBundle(0).srcType(1) := SrcType.imm 365 csBundle(0).srcType(2) := SrcType.imm 366 csBundle(0).lsrc(1) := 0.U 367 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 368 csBundle(0).fuType := FuType.i2v.U 369 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 370 csBundle(0).vecWen := true.B 371 csBundle(0).vpu.isReverse := false.B 372 /* 373 LMUL 374 */ 375 for (i <- 0 until MAX_VLMUL) { 376 csBundle(i + 1).srcType(0) := SrcType.vp 377 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 378 csBundle(i + 1).lsrc(1) := src2 + i.U 379 csBundle(i + 1).lsrc(2) := dest + i.U 380 csBundle(i + 1).ldest := dest + i.U 381 csBundle(i + 1).uopIdx := i.U 382 } 383 } 384 is(UopSplitType.VEC_VVW) { 385 for (i <- 0 until MAX_VLMUL / 2) { 386 csBundle(2 * i).lsrc(0) := src1 + i.U 387 csBundle(2 * i).lsrc(1) := src2 + i.U 388 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 389 csBundle(2 * i).ldest := dest + (2 * i).U 390 csBundle(2 * i).uopIdx := (2 * i).U 391 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 392 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 393 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 394 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 395 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 396 } 397 } 398 is(UopSplitType.VEC_VFW) { 399 /* 400 f to vector move 401 */ 402 csBundle(0).srcType(0) := SrcType.fp 403 csBundle(0).srcType(1) := SrcType.imm 404 csBundle(0).srcType(2) := SrcType.imm 405 csBundle(0).lsrc(1) := 0.U 406 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 407 csBundle(0).fuType := FuType.f2v.U 408 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 409 csBundle(0).rfWen := false.B 410 csBundle(0).fpWen := false.B 411 csBundle(0).vecWen := true.B 412 413 for (i <- 0 until MAX_VLMUL / 2) { 414 csBundle(2 * i + 1).srcType(0) := SrcType.vp 415 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 416 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 417 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 418 csBundle(2 * i + 1).ldest := dest + (2 * i).U 419 csBundle(2 * i + 1).uopIdx := (2 * i).U 420 csBundle(2 * i + 2).srcType(0) := SrcType.vp 421 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 422 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 423 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 424 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 425 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 426 } 427 } 428 is(UopSplitType.VEC_WVW) { 429 for (i <- 0 until MAX_VLMUL / 2) { 430 csBundle(2 * i).lsrc(0) := src1 + i.U 431 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 432 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 433 csBundle(2 * i).ldest := dest + (2 * i).U 434 csBundle(2 * i).uopIdx := (2 * i).U 435 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 436 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 437 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 438 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 439 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 440 } 441 } 442 is(UopSplitType.VEC_VXW) { 443 /* 444 i to vector move 445 */ 446 csBundle(0).srcType(0) := SrcType.reg 447 csBundle(0).srcType(1) := SrcType.imm 448 csBundle(0).srcType(2) := SrcType.imm 449 csBundle(0).lsrc(1) := 0.U 450 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 451 csBundle(0).fuType := FuType.i2v.U 452 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 453 csBundle(0).vecWen := true.B 454 455 for (i <- 0 until MAX_VLMUL / 2) { 456 csBundle(2 * i + 1).srcType(0) := SrcType.vp 457 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 458 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 459 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 460 csBundle(2 * i + 1).ldest := dest + (2 * i).U 461 csBundle(2 * i + 1).uopIdx := (2 * i).U 462 csBundle(2 * i + 2).srcType(0) := SrcType.vp 463 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 464 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 465 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 466 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 467 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 468 } 469 } 470 is(UopSplitType.VEC_WXW) { 471 /* 472 i to vector move 473 */ 474 csBundle(0).srcType(0) := SrcType.reg 475 csBundle(0).srcType(1) := SrcType.imm 476 csBundle(0).srcType(2) := SrcType.imm 477 csBundle(0).lsrc(1) := 0.U 478 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 479 csBundle(0).fuType := FuType.i2v.U 480 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 481 csBundle(0).vecWen := true.B 482 483 for (i <- 0 until MAX_VLMUL / 2) { 484 csBundle(2 * i + 1).srcType(0) := SrcType.vp 485 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 486 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 487 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 488 csBundle(2 * i + 1).ldest := dest + (2 * i).U 489 csBundle(2 * i + 1).uopIdx := (2 * i).U 490 csBundle(2 * i + 2).srcType(0) := SrcType.vp 491 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 492 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 493 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 494 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 495 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 496 } 497 } 498 is(UopSplitType.VEC_WVV) { 499 for (i <- 0 until MAX_VLMUL / 2) { 500 501 csBundle(2 * i).lsrc(0) := src1 + i.U 502 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 503 csBundle(2 * i).lsrc(2) := dest + i.U 504 csBundle(2 * i).ldest := dest + i.U 505 csBundle(2 * i).uopIdx := (2 * i).U 506 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 507 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 508 csBundle(2 * i + 1).lsrc(2) := dest + i.U 509 csBundle(2 * i + 1).ldest := dest + i.U 510 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 511 } 512 } 513 is(UopSplitType.VEC_WFW) { 514 /* 515 f to vector move 516 */ 517 csBundle(0).srcType(0) := SrcType.fp 518 csBundle(0).srcType(1) := SrcType.imm 519 csBundle(0).srcType(2) := SrcType.imm 520 csBundle(0).lsrc(1) := 0.U 521 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 522 csBundle(0).fuType := FuType.f2v.U 523 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 524 csBundle(0).rfWen := false.B 525 csBundle(0).fpWen := false.B 526 csBundle(0).vecWen := true.B 527 528 for (i <- 0 until MAX_VLMUL / 2) { 529 csBundle(2 * i + 1).srcType(0) := SrcType.vp 530 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 531 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 532 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 533 csBundle(2 * i + 1).ldest := dest + (2 * i).U 534 csBundle(2 * i + 1).uopIdx := (2 * i).U 535 csBundle(2 * i + 2).srcType(0) := SrcType.vp 536 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 537 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 538 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 539 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 540 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 541 } 542 } 543 is(UopSplitType.VEC_WXV) { 544 /* 545 i to vector move 546 */ 547 csBundle(0).srcType(0) := SrcType.reg 548 csBundle(0).srcType(1) := SrcType.imm 549 csBundle(0).srcType(2) := SrcType.imm 550 csBundle(0).lsrc(1) := 0.U 551 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 552 csBundle(0).fuType := FuType.i2v.U 553 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 554 csBundle(0).vecWen := true.B 555 556 for (i <- 0 until MAX_VLMUL / 2) { 557 csBundle(2 * i + 1).srcType(0) := SrcType.vp 558 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 559 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 560 csBundle(2 * i + 1).lsrc(2) := dest + i.U 561 csBundle(2 * i + 1).ldest := dest + i.U 562 csBundle(2 * i + 1).uopIdx := (2 * i).U 563 csBundle(2 * i + 2).srcType(0) := SrcType.vp 564 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 565 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 566 csBundle(2 * i + 2).lsrc(2) := dest + i.U 567 csBundle(2 * i + 2).ldest := dest + i.U 568 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 569 } 570 } 571 is(UopSplitType.VEC_VVM) { 572 csBundle(0).lsrc(2) := dest 573 csBundle(0).ldest := dest 574 csBundle(0).uopIdx := 0.U 575 for (i <- 1 until MAX_VLMUL) { 576 csBundle(i).lsrc(0) := src1 + i.U 577 csBundle(i).lsrc(1) := src2 + i.U 578 csBundle(i).lsrc(2) := dest 579 csBundle(i).ldest := dest 580 csBundle(i).uopIdx := i.U 581 } 582 } 583 is(UopSplitType.VEC_VFM) { 584 /* 585 f to vector move 586 */ 587 csBundle(0).srcType(0) := SrcType.fp 588 csBundle(0).srcType(1) := SrcType.imm 589 csBundle(0).srcType(2) := SrcType.imm 590 csBundle(0).lsrc(1) := 0.U 591 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 592 csBundle(0).fuType := FuType.f2v.U 593 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 594 csBundle(0).rfWen := false.B 595 csBundle(0).fpWen := false.B 596 csBundle(0).vecWen := true.B 597 //LMUL 598 csBundle(1).srcType(0) := SrcType.vp 599 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 600 csBundle(1).lsrc(2) := dest 601 csBundle(1).ldest := dest 602 csBundle(1).uopIdx := 0.U 603 for (i <- 1 until MAX_VLMUL) { 604 csBundle(i + 1).srcType(0) := SrcType.vp 605 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 606 csBundle(i + 1).lsrc(1) := src2 + i.U 607 csBundle(i + 1).lsrc(2) := dest 608 csBundle(i + 1).ldest := dest 609 csBundle(i + 1).uopIdx := i.U 610 } 611 csBundle(numOfUop - 1.U).ldest := dest 612 } 613 is(UopSplitType.VEC_VXM) { 614 /* 615 i to vector move 616 */ 617 csBundle(0).srcType(0) := SrcType.reg 618 csBundle(0).srcType(1) := SrcType.imm 619 csBundle(0).srcType(2) := SrcType.imm 620 csBundle(0).lsrc(1) := 0.U 621 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 622 csBundle(0).fuType := FuType.i2v.U 623 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 624 csBundle(0).vecWen := true.B 625 //LMUL 626 csBundle(1).srcType(0) := SrcType.vp 627 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 628 csBundle(1).lsrc(2) := dest 629 csBundle(1).ldest := dest 630 csBundle(1).uopIdx := 0.U 631 for (i <- 1 until MAX_VLMUL) { 632 csBundle(i + 1).srcType(0) := SrcType.vp 633 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 634 csBundle(i + 1).lsrc(1) := src2 + i.U 635 csBundle(i + 1).lsrc(2) := dest 636 csBundle(i + 1).ldest := dest 637 csBundle(i + 1).uopIdx := i.U 638 } 639 csBundle(numOfUop - 1.U).ldest := dest 640 } 641 is(UopSplitType.VEC_SLIDE1UP) { 642 /* 643 i to vector move 644 */ 645 csBundle(0).srcType(0) := SrcType.reg 646 csBundle(0).srcType(1) := SrcType.imm 647 csBundle(0).srcType(2) := SrcType.imm 648 csBundle(0).lsrc(1) := 0.U 649 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 650 csBundle(0).fuType := FuType.i2v.U 651 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 652 csBundle(0).vecWen := true.B 653 //LMUL 654 csBundle(1).srcType(0) := SrcType.vp 655 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 656 csBundle(1).lsrc(2) := dest 657 csBundle(1).ldest := dest 658 csBundle(1).uopIdx := 0.U 659 for (i <- 1 until MAX_VLMUL) { 660 csBundle(i + 1).srcType(0) := SrcType.vp 661 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 662 csBundle(i + 1).lsrc(1) := src2 + i.U 663 csBundle(i + 1).lsrc(2) := dest + i.U 664 csBundle(i + 1).ldest := dest + i.U 665 csBundle(i + 1).uopIdx := i.U 666 } 667 } 668 is(UopSplitType.VEC_FSLIDE1UP) { 669 /* 670 f to vector move 671 */ 672 csBundle(0).srcType(0) := SrcType.fp 673 csBundle(0).srcType(1) := SrcType.imm 674 csBundle(0).srcType(2) := SrcType.imm 675 csBundle(0).lsrc(1) := 0.U 676 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 677 csBundle(0).fuType := FuType.f2v.U 678 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 679 csBundle(0).rfWen := false.B 680 csBundle(0).fpWen := false.B 681 csBundle(0).vecWen := true.B 682 //LMUL 683 csBundle(1).srcType(0) := SrcType.vp 684 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 685 csBundle(1).lsrc(1) := src2 686 csBundle(1).lsrc(2) := dest 687 csBundle(1).ldest := dest 688 csBundle(1).uopIdx := 0.U 689 for (i <- 1 until MAX_VLMUL) { 690 csBundle(i + 1).srcType(0) := SrcType.vp 691 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 692 csBundle(i + 1).lsrc(1) := src2 + i.U 693 csBundle(i + 1).lsrc(2) := dest + i.U 694 csBundle(i + 1).ldest := dest + i.U 695 csBundle(i + 1).uopIdx := i.U 696 } 697 } 698 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 699 /* 700 i to vector move 701 */ 702 csBundle(0).srcType(0) := SrcType.reg 703 csBundle(0).srcType(1) := SrcType.imm 704 csBundle(0).srcType(2) := SrcType.imm 705 csBundle(0).lsrc(1) := 0.U 706 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 707 csBundle(0).fuType := FuType.i2v.U 708 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 709 csBundle(0).vecWen := true.B 710 //LMUL 711 for (i <- 0 until MAX_VLMUL) { 712 csBundle(2 * i + 1).srcType(0) := SrcType.vp 713 csBundle(2 * i + 1).srcType(1) := SrcType.vp 714 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 715 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 716 csBundle(2 * i + 1).lsrc(2) := dest + i.U 717 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 718 csBundle(2 * i + 1).uopIdx := (2 * i).U 719 if (2 * i + 2 < MAX_VLMUL * 2) { 720 csBundle(2 * i + 2).srcType(0) := SrcType.vp 721 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 722 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 723 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 724 csBundle(2 * i + 2).ldest := dest + i.U 725 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 726 } 727 } 728 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 729 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 730 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 731 } 732 is(UopSplitType.VEC_FSLIDE1DOWN) { 733 /* 734 f to vector move 735 */ 736 csBundle(0).srcType(0) := SrcType.fp 737 csBundle(0).srcType(1) := SrcType.imm 738 csBundle(0).srcType(2) := SrcType.imm 739 csBundle(0).lsrc(1) := 0.U 740 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 741 csBundle(0).fuType := FuType.f2v.U 742 csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg) 743 csBundle(0).rfWen := false.B 744 csBundle(0).fpWen := false.B 745 csBundle(0).vecWen := true.B 746 //LMUL 747 for (i <- 0 until MAX_VLMUL) { 748 csBundle(2 * i + 1).srcType(0) := SrcType.vp 749 csBundle(2 * i + 1).srcType(1) := SrcType.vp 750 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 751 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 752 csBundle(2 * i + 1).lsrc(2) := dest + i.U 753 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 754 csBundle(2 * i + 1).uopIdx := (2 * i).U 755 if (2 * i + 2 < MAX_VLMUL * 2) { 756 csBundle(2 * i + 2).srcType(0) := SrcType.vp 757 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 758 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 759 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 760 csBundle(2 * i + 2).ldest := dest + i.U 761 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 762 } 763 } 764 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 765 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 766 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 767 } 768 is(UopSplitType.VEC_VRED) { 769 when(vlmulReg === "b001".U) { 770 csBundle(0).srcType(2) := SrcType.DC 771 csBundle(0).lsrc(0) := src2 + 1.U 772 csBundle(0).lsrc(1) := src2 773 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 774 csBundle(0).uopIdx := 0.U 775 } 776 when(vlmulReg === "b010".U) { 777 csBundle(0).srcType(2) := SrcType.DC 778 csBundle(0).lsrc(0) := src2 + 1.U 779 csBundle(0).lsrc(1) := src2 780 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 781 csBundle(0).uopIdx := 0.U 782 783 csBundle(1).srcType(2) := SrcType.DC 784 csBundle(1).lsrc(0) := src2 + 3.U 785 csBundle(1).lsrc(1) := src2 + 2.U 786 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 787 csBundle(1).uopIdx := 1.U 788 789 csBundle(2).srcType(2) := SrcType.DC 790 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 791 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 792 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 793 csBundle(2).uopIdx := 2.U 794 } 795 when(vlmulReg === "b011".U) { 796 for (i <- 0 until MAX_VLMUL) { 797 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 798 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 799 csBundle(i).lsrc(1) := src2 + (i * 2).U 800 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 801 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 802 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 803 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 804 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 805 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 806 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 807 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 808 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 809 } 810 csBundle(i).srcType(2) := SrcType.DC 811 csBundle(i).uopIdx := i.U 812 } 813 } 814 when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) { 815 /* 816 * 2 <= vlmul <= 8 817 */ 818 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 819 csBundle(numOfUop - 1.U).lsrc(0) := src1 820 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 821 csBundle(numOfUop - 1.U).lsrc(2) := dest 822 csBundle(numOfUop - 1.U).ldest := dest 823 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 824 } 825 } 826 is(UopSplitType.VEC_VFRED) { 827 val vlmul = vlmulReg 828 val vsew = vsewReg 829 when(vlmul === VLmul.m8){ 830 for (i <- 0 until 4) { 831 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 832 csBundle(i).lsrc(1) := src2 + (i * 2).U 833 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 834 csBundle(i).uopIdx := i.U 835 } 836 for (i <- 4 until 6) { 837 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 838 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 839 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 840 csBundle(i).uopIdx := i.U 841 } 842 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 843 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 844 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 845 csBundle(6).uopIdx := 6.U 846 when(vsew === VSew.e64) { 847 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 848 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 849 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 850 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 851 csBundle(7).uopIdx := 7.U 852 csBundle(8).lsrc(0) := src1 853 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 854 csBundle(8).ldest := dest 855 csBundle(8).uopIdx := 8.U 856 } 857 when(vsew === VSew.e32) { 858 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 859 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 860 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 861 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 862 csBundle(7).uopIdx := 7.U 863 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 864 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 865 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 866 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 867 csBundle(8).uopIdx := 8.U 868 csBundle(9).lsrc(0) := src1 869 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 870 csBundle(9).ldest := dest 871 csBundle(9).uopIdx := 9.U 872 } 873 when(vsew === VSew.e16) { 874 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 875 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 876 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 877 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 878 csBundle(7).uopIdx := 7.U 879 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 880 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 881 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 882 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 883 csBundle(8).uopIdx := 8.U 884 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 885 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 886 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 887 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 888 csBundle(9).uopIdx := 9.U 889 csBundle(10).lsrc(0) := src1 890 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 891 csBundle(10).ldest := dest 892 csBundle(10).uopIdx := 10.U 893 } 894 } 895 when(vlmul === VLmul.m4) { 896 for (i <- 0 until 2) { 897 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 898 csBundle(i).lsrc(1) := src2 + (i * 2).U 899 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 900 csBundle(i).uopIdx := i.U 901 } 902 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 903 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 904 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 905 csBundle(2).uopIdx := 2.U 906 when(vsew === VSew.e64) { 907 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 908 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 909 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 910 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 911 csBundle(3).uopIdx := 3.U 912 csBundle(4).lsrc(0) := src1 913 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 914 csBundle(4).ldest := dest 915 csBundle(4).uopIdx := 4.U 916 } 917 when(vsew === VSew.e32) { 918 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 919 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 920 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 921 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 922 csBundle(3).uopIdx := 3.U 923 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 924 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 925 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 926 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 927 csBundle(4).uopIdx := 4.U 928 csBundle(5).lsrc(0) := src1 929 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 930 csBundle(5).ldest := dest 931 csBundle(5).uopIdx := 5.U 932 } 933 when(vsew === VSew.e16) { 934 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 935 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 936 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 937 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 938 csBundle(3).uopIdx := 3.U 939 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 940 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 941 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 942 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 943 csBundle(4).uopIdx := 4.U 944 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 945 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 946 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 947 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 948 csBundle(5).uopIdx := 5.U 949 csBundle(6).lsrc(0) := src1 950 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 951 csBundle(6).ldest := dest 952 csBundle(6).uopIdx := 6.U 953 } 954 } 955 when(vlmul === VLmul.m2) { 956 csBundle(0).lsrc(0) := src2 + 1.U 957 csBundle(0).lsrc(1) := src2 + 0.U 958 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 959 csBundle(0).uopIdx := 0.U 960 when(vsew === VSew.e64) { 961 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 962 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 963 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 964 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 965 csBundle(1).uopIdx := 1.U 966 csBundle(2).lsrc(0) := src1 967 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 968 csBundle(2).ldest := dest 969 csBundle(2).uopIdx := 2.U 970 } 971 when(vsew === VSew.e32) { 972 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 973 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 974 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 975 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 976 csBundle(1).uopIdx := 1.U 977 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 978 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 979 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 980 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 981 csBundle(2).uopIdx := 2.U 982 csBundle(3).lsrc(0) := src1 983 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 984 csBundle(3).ldest := dest 985 csBundle(3).uopIdx := 3.U 986 } 987 when(vsew === VSew.e16) { 988 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 989 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 990 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 991 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 992 csBundle(1).uopIdx := 1.U 993 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 994 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 995 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 996 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 997 csBundle(2).uopIdx := 2.U 998 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 999 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1000 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 1001 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 1002 csBundle(3).uopIdx := 3.U 1003 csBundle(4).lsrc(0) := src1 1004 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 1005 csBundle(4).ldest := dest 1006 csBundle(4).uopIdx := 4.U 1007 } 1008 } 1009 when(vlmul === VLmul.m1) { 1010 when(vsew === VSew.e64) { 1011 csBundle(0).lsrc(0) := src2 1012 csBundle(0).lsrc(1) := src2 1013 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1014 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1015 csBundle(0).uopIdx := 0.U 1016 csBundle(1).lsrc(0) := src1 1017 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1018 csBundle(1).ldest := dest 1019 csBundle(1).uopIdx := 1.U 1020 } 1021 when(vsew === VSew.e32) { 1022 csBundle(0).lsrc(0) := src2 1023 csBundle(0).lsrc(1) := src2 1024 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1025 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1026 csBundle(0).uopIdx := 0.U 1027 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1028 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1029 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1030 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1031 csBundle(1).uopIdx := 1.U 1032 csBundle(2).lsrc(0) := src1 1033 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1034 csBundle(2).ldest := dest 1035 csBundle(2).uopIdx := 2.U 1036 } 1037 when(vsew === VSew.e16) { 1038 csBundle(0).lsrc(0) := src2 1039 csBundle(0).lsrc(1) := src2 1040 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1041 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 1042 csBundle(0).uopIdx := 0.U 1043 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1044 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1045 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1046 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 1047 csBundle(1).uopIdx := 1.U 1048 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 1049 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1050 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 1051 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 1052 csBundle(2).uopIdx := 2.U 1053 csBundle(3).lsrc(0) := src1 1054 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 1055 csBundle(3).ldest := dest 1056 csBundle(3).uopIdx := 3.U 1057 } 1058 } 1059 when(vlmul === VLmul.mf2) { 1060 when(vsew === VSew.e32) { 1061 csBundle(0).lsrc(0) := src2 1062 csBundle(0).lsrc(1) := src2 1063 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1064 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1065 csBundle(0).uopIdx := 0.U 1066 csBundle(1).lsrc(0) := src1 1067 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1068 csBundle(1).ldest := dest 1069 csBundle(1).uopIdx := 1.U 1070 } 1071 when(vsew === VSew.e16) { 1072 csBundle(0).lsrc(0) := src2 1073 csBundle(0).lsrc(1) := src2 1074 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1075 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 1076 csBundle(0).uopIdx := 0.U 1077 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 1078 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1079 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1080 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 1081 csBundle(1).uopIdx := 1.U 1082 csBundle(2).lsrc(0) := src1 1083 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1084 csBundle(2).ldest := dest 1085 csBundle(2).uopIdx := 2.U 1086 } 1087 } 1088 when(vlmul === VLmul.mf4) { 1089 when(vsew === VSew.e16) { 1090 csBundle(0).lsrc(0) := src2 1091 csBundle(0).lsrc(1) := src2 1092 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 1093 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 1094 csBundle(0).uopIdx := 0.U 1095 csBundle(1).lsrc(0) := src1 1096 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 1097 csBundle(1).ldest := dest 1098 csBundle(1).uopIdx := 1.U 1099 } 1100 } 1101 } 1102 1103 is(UopSplitType.VEC_VFREDOSUM) { 1104 import yunsuan.VfaluType 1105 val vlmul = vlmulReg 1106 val vsew = vsewReg 1107 val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum 1108 when(vlmul === VLmul.m8) { 1109 when(vsew === VSew.e64) { 1110 val vlmax = 16 1111 for (i <- 0 until vlmax) { 1112 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1113 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1114 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1115 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1116 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1117 csBundle(i).uopIdx := i.U 1118 } 1119 } 1120 when(vsew === VSew.e32) { 1121 val vlmax = 32 1122 for (i <- 0 until vlmax) { 1123 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1124 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1125 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1126 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1127 csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B) 1128 csBundle(i).uopIdx := i.U 1129 } 1130 } 1131 when(vsew === VSew.e16) { 1132 val vlmax = 64 1133 for (i <- 0 until vlmax) { 1134 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1135 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1136 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1137 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1138 csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B) 1139 csBundle(i).uopIdx := i.U 1140 } 1141 } 1142 } 1143 when(vlmul === VLmul.m4) { 1144 when(vsew === VSew.e64) { 1145 val vlmax = 8 1146 for (i <- 0 until vlmax) { 1147 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1148 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1149 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1150 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1151 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1152 csBundle(i).uopIdx := i.U 1153 } 1154 } 1155 when(vsew === VSew.e32) { 1156 val vlmax = 16 1157 for (i <- 0 until vlmax) { 1158 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1159 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1160 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1161 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1162 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1163 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1164 csBundle(i).uopIdx := i.U 1165 } 1166 } 1167 when(vsew === VSew.e16) { 1168 val vlmax = 32 1169 for (i <- 0 until vlmax) { 1170 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1171 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1172 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1173 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1174 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1175 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1176 csBundle(i).uopIdx := i.U 1177 } 1178 } 1179 } 1180 when(vlmul === VLmul.m2) { 1181 when(vsew === VSew.e64) { 1182 val vlmax = 4 1183 for (i <- 0 until vlmax) { 1184 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1185 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1186 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1187 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1188 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1189 csBundle(i).uopIdx := i.U 1190 } 1191 } 1192 when(vsew === VSew.e32) { 1193 val vlmax = 8 1194 for (i <- 0 until vlmax) { 1195 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1196 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1197 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1198 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1199 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1200 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1201 csBundle(i).uopIdx := i.U 1202 } 1203 } 1204 when(vsew === VSew.e16) { 1205 val vlmax = 16 1206 for (i <- 0 until vlmax) { 1207 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1208 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1209 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1210 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1211 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1212 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1213 csBundle(i).uopIdx := i.U 1214 } 1215 } 1216 } 1217 when(vlmul === VLmul.m1) { 1218 when(vsew === VSew.e64) { 1219 val vlmax = 2 1220 for (i <- 0 until vlmax) { 1221 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1222 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1223 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1224 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1225 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1226 csBundle(i).uopIdx := i.U 1227 } 1228 } 1229 when(vsew === VSew.e32) { 1230 val vlmax = 4 1231 for (i <- 0 until vlmax) { 1232 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1233 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1234 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1235 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1236 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1237 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1238 csBundle(i).uopIdx := i.U 1239 } 1240 } 1241 when(vsew === VSew.e16) { 1242 val vlmax = 8 1243 for (i <- 0 until vlmax) { 1244 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1245 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1246 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1247 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1248 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1249 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1250 csBundle(i).uopIdx := i.U 1251 } 1252 } 1253 } 1254 when(vlmul === VLmul.mf2) { 1255 when(vsew === VSew.e32) { 1256 val vlmax = 2 1257 for (i <- 0 until vlmax) { 1258 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1259 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1260 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1261 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1262 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1263 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1264 csBundle(i).uopIdx := i.U 1265 } 1266 } 1267 when(vsew === VSew.e16) { 1268 val vlmax = 4 1269 for (i <- 0 until vlmax) { 1270 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1271 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1272 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1273 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1274 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1275 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1276 csBundle(i).uopIdx := i.U 1277 } 1278 } 1279 } 1280 when(vlmul === VLmul.mf4) { 1281 when(vsew === VSew.e16) { 1282 val vlmax = 2 1283 for (i <- 0 until vlmax) { 1284 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1285 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1286 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1287 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1288 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1289 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1290 csBundle(i).uopIdx := i.U 1291 } 1292 } 1293 } 1294 } 1295 1296 is(UopSplitType.VEC_SLIDEUP) { 1297 // i to vector move 1298 csBundle(0).srcType(0) := SrcType.reg 1299 csBundle(0).srcType(1) := SrcType.imm 1300 csBundle(0).srcType(2) := SrcType.imm 1301 csBundle(0).lsrc(1) := 0.U 1302 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1303 csBundle(0).fuType := FuType.i2v.U 1304 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1305 csBundle(0).vecWen := true.B 1306 // LMUL 1307 for (i <- 0 until MAX_VLMUL) 1308 for (j <- 0 to i) { 1309 val old_vd = if (j == 0) { 1310 dest + i.U 1311 } else (VECTOR_TMP_REG_LMUL + j).U 1312 val vd = if (j == i) { 1313 dest + i.U 1314 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1315 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1316 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1317 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1318 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1319 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1320 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1321 } 1322 } 1323 1324 is(UopSplitType.VEC_SLIDEDOWN) { 1325 // i to vector move 1326 csBundle(0).srcType(0) := SrcType.reg 1327 csBundle(0).srcType(1) := SrcType.imm 1328 csBundle(0).srcType(2) := SrcType.imm 1329 csBundle(0).lsrc(1) := 0.U 1330 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1331 csBundle(0).fuType := FuType.i2v.U 1332 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1333 csBundle(0).vecWen := true.B 1334 // LMUL 1335 for (i <- 0 until MAX_VLMUL) 1336 for (j <- (0 to i).reverse) { 1337 when(i.U < lmul) { 1338 val old_vd = if (j == 0) { 1339 dest + lmul - 1.U - i.U 1340 } else (VECTOR_TMP_REG_LMUL + j).U 1341 val vd = if (j == i) { 1342 dest + lmul - 1.U - i.U 1343 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1344 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1345 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1346 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1347 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1348 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1349 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1350 } 1351 } 1352 } 1353 1354 is(UopSplitType.VEC_M0X) { 1355 // LMUL 1356 for (i <- 0 until MAX_VLMUL) { 1357 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1358 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1359 csBundle(i).srcType(0) := srcType0 1360 csBundle(i).srcType(1) := SrcType.vp 1361 csBundle(i).rfWen := false.B 1362 csBundle(i).fpWen := false.B 1363 csBundle(i).vecWen := true.B 1364 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1365 csBundle(i).lsrc(1) := src2 1366 // csBundle(i).lsrc(2) := dest + i.U DontCare 1367 csBundle(i).ldest := ldest 1368 csBundle(i).uopIdx := i.U 1369 } 1370 csBundle(lmul - 1.U).rfWen := true.B 1371 csBundle(lmul - 1.U).fpWen := false.B 1372 csBundle(lmul - 1.U).vecWen := false.B 1373 csBundle(lmul - 1.U).ldest := dest 1374 } 1375 1376 is(UopSplitType.VEC_MVV) { 1377 // LMUL 1378 for (i <- 0 until MAX_VLMUL) { 1379 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1380 csBundle(i * 2 + 0).srcType(0) := srcType0 1381 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1382 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1383 csBundle(i * 2 + 0).lsrc(1) := src2 1384 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1385 csBundle(i * 2 + 0).ldest := dest + i.U 1386 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1387 1388 csBundle(i * 2 + 1).srcType(0) := srcType0 1389 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1390 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1391 csBundle(i * 2 + 1).lsrc(1) := src2 1392 // csBundle(i).lsrc(2) := dest + i.U DontCare 1393 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1394 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1395 } 1396 } 1397 1398 is(UopSplitType.VEC_M0X_VFIRST) { 1399 // LMUL 1400 csBundle(0).rfWen := true.B 1401 csBundle(0).fpWen := false.B 1402 csBundle(0).vecWen := false.B 1403 csBundle(0).ldest := dest 1404 } 1405 is(UopSplitType.VEC_VWW) { 1406 for (i <- 0 until MAX_VLMUL*2) { 1407 when(i.U < lmul){ 1408 csBundle(i).srcType(2) := SrcType.DC 1409 csBundle(i).lsrc(0) := src2 + i.U 1410 csBundle(i).lsrc(1) := src2 + i.U 1411 // csBundle(i).lsrc(2) := dest + (2 * i).U 1412 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1413 csBundle(i).uopIdx := i.U 1414 } otherwise { 1415 csBundle(i).srcType(2) := SrcType.DC 1416 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1417 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1418 // csBundle(i).lsrc(2) := dest + (2 * i).U 1419 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1420 csBundle(i).uopIdx := i.U 1421 } 1422 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1423 csBundle(numOfUop-1.U).lsrc(0) := src1 1424 csBundle(numOfUop-1.U).lsrc(2) := dest 1425 csBundle(numOfUop-1.U).ldest := dest 1426 } 1427 } 1428 is(UopSplitType.VEC_RGATHER) { 1429 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1430 for (i <- 0 until len) 1431 for (j <- 0 until len) { 1432 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1433 // csBundle(i * len + j).srcType(1) := SrcType.vp 1434 // csBundle(i * len + j).srcType(2) := SrcType.vp 1435 csBundle(i * len + j).lsrc(0) := src1 + i.U 1436 csBundle(i * len + j).lsrc(1) := src2 + j.U 1437 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1438 csBundle(i * len + j).lsrc(2) := vd_old 1439 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1440 csBundle(i * len + j).ldest := vd 1441 csBundle(i * len + j).uopIdx := (i * len + j).U 1442 } 1443 } 1444 switch(vlmulReg) { 1445 is("b001".U ){ 1446 genCsBundle_VEC_RGATHER(2) 1447 } 1448 is("b010".U ){ 1449 genCsBundle_VEC_RGATHER(4) 1450 } 1451 is("b011".U ){ 1452 genCsBundle_VEC_RGATHER(8) 1453 } 1454 } 1455 } 1456 is(UopSplitType.VEC_RGATHER_VX) { 1457 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1458 for (i <- 0 until len) 1459 for (j <- 0 until len) { 1460 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1461 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1462 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1463 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1464 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1465 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1466 csBundle(i * len + j + 1).lsrc(2) := vd_old 1467 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1468 csBundle(i * len + j + 1).ldest := vd 1469 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1470 } 1471 } 1472 // i to vector move 1473 csBundle(0).srcType(0) := SrcType.reg 1474 csBundle(0).srcType(1) := SrcType.imm 1475 csBundle(0).srcType(2) := SrcType.imm 1476 csBundle(0).lsrc(1) := 0.U 1477 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1478 csBundle(0).fuType := FuType.i2v.U 1479 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1480 csBundle(0).rfWen := false.B 1481 csBundle(0).fpWen := false.B 1482 csBundle(0).vecWen := true.B 1483 genCsBundle_RGATHER_VX(1) 1484 switch(vlmulReg) { 1485 is("b001".U ){ 1486 genCsBundle_RGATHER_VX(2) 1487 } 1488 is("b010".U ){ 1489 genCsBundle_RGATHER_VX(4) 1490 } 1491 is("b011".U ){ 1492 genCsBundle_RGATHER_VX(8) 1493 } 1494 } 1495 } 1496 is(UopSplitType.VEC_RGATHEREI16) { 1497 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1498 for (i <- 0 until len) 1499 for (j <- 0 until len) { 1500 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1501 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1502 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1503 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1504 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1505 csBundle((i * len + j)*2+0).ldest := vd0 1506 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1507 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1508 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1509 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1510 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1511 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1512 csBundle((i * len + j)*2+1).ldest := vd1 1513 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1514 } 1515 } 1516 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1517 for (i <- 0 until len) 1518 for (j <- 0 until len) { 1519 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1520 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1521 csBundle(i * len + j).lsrc(0) := src1 + i.U 1522 csBundle(i * len + j).lsrc(1) := src2 + j.U 1523 csBundle(i * len + j).lsrc(2) := vd_old 1524 csBundle(i * len + j).ldest := vd 1525 csBundle(i * len + j).uopIdx := (i * len + j).U 1526 } 1527 } 1528 def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={ 1529 for (i <- 0 until len) 1530 for (j <- 0 until len) { 1531 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1532 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1533 csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U 1534 csBundle(i * len + j).lsrc(1) := src2 + j.U 1535 csBundle(i * len + j).lsrc(2) := vd_old 1536 csBundle(i * len + j).ldest := vd 1537 csBundle(i * len + j).uopIdx := (i * len + j).U 1538 } 1539 } 1540 def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={ 1541 for (i <- 0 until len) 1542 for (j <- 0 until len) { 1543 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1544 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1545 csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U 1546 csBundle(i * len + j).lsrc(1) := src2 + j.U 1547 csBundle(i * len + j).lsrc(2) := vd_old 1548 csBundle(i * len + j).ldest := vd 1549 csBundle(i * len + j).uopIdx := (i * len + j).U 1550 } 1551 } 1552 when(!vsewReg.orR){ 1553 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1554 }.elsewhen(vsewReg === VSew.e32){ 1555 genCsBundle_VEC_RGATHEREI16_SEW32(1) 1556 }.elsewhen(vsewReg === VSew.e64){ 1557 genCsBundle_VEC_RGATHEREI16_SEW64(1) 1558 }.otherwise{ 1559 genCsBundle_VEC_RGATHEREI16(1) 1560 } 1561 switch(vlmulReg) { 1562 is("b001".U) { 1563 when(!vsewReg.orR) { 1564 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1565 }.elsewhen(vsewReg === VSew.e32){ 1566 genCsBundle_VEC_RGATHEREI16_SEW32(2) 1567 }.elsewhen(vsewReg === VSew.e64){ 1568 genCsBundle_VEC_RGATHEREI16_SEW64(2) 1569 }.otherwise{ 1570 genCsBundle_VEC_RGATHEREI16(2) 1571 } 1572 } 1573 is("b010".U) { 1574 when(!vsewReg.orR) { 1575 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1576 }.elsewhen(vsewReg === VSew.e32){ 1577 genCsBundle_VEC_RGATHEREI16_SEW32(4) 1578 }.elsewhen(vsewReg === VSew.e64){ 1579 genCsBundle_VEC_RGATHEREI16_SEW64(4) 1580 }.otherwise{ 1581 genCsBundle_VEC_RGATHEREI16(4) 1582 } 1583 } 1584 is("b011".U) { 1585 when(vsewReg === VSew.e32){ 1586 genCsBundle_VEC_RGATHEREI16_SEW32(8) 1587 }.elsewhen(vsewReg === VSew.e64){ 1588 genCsBundle_VEC_RGATHEREI16_SEW64(8) 1589 }.otherwise{ 1590 genCsBundle_VEC_RGATHEREI16(8) 1591 } 1592 } 1593 } 1594 } 1595 is(UopSplitType.VEC_COMPRESS) { 1596 def genCsBundle_VEC_COMPRESS(len:Int): Unit = { 1597 for (i <- 0 until len) { 1598 val jlen = if (i == len-1) i+1 else i+2 1599 for (j <- 0 until jlen) { 1600 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1601 val vd = if(i==len-1) (dest + j.U) else { 1602 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1603 } 1604 csBundle(i*(i+3)/2 + j).vecWen := true.B 1605 csBundle(i*(i+3)/2 + j).v0Wen := false.B 1606 val src13Type = if (j == i+1) DontCare else SrcType.vp 1607 csBundle(i*(i+3)/2 + j).srcType(0) := src13Type 1608 csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp 1609 csBundle(i*(i+3)/2 + j).srcType(2) := src13Type 1610 if (i == 0) { 1611 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1612 } else { 1613 csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1614 } 1615 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1616 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1617 csBundle(i*(i+3)/2 + j).ldest := vd 1618 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1619 } 1620 } 1621 } 1622 switch(vlmulReg) { 1623 is("b001".U ){ 1624 genCsBundle_VEC_COMPRESS(2) 1625 } 1626 is("b010".U ){ 1627 genCsBundle_VEC_COMPRESS(4) 1628 } 1629 is("b011".U ){ 1630 genCsBundle_VEC_COMPRESS(8) 1631 } 1632 } 1633 } 1634 is(UopSplitType.VEC_MVNR) { 1635 for (i <- 0 until MAX_VLMUL) { 1636 csBundle(i).lsrc(0) := src1 + i.U 1637 csBundle(i).lsrc(1) := src2 + i.U 1638 csBundle(i).lsrc(2) := dest + i.U 1639 csBundle(i).ldest := dest + i.U 1640 csBundle(i).uopIdx := i.U 1641 } 1642 } 1643 is(UopSplitType.VEC_US_LDST) { 1644 /* 1645 FMV.D.X 1646 */ 1647 csBundle(0).srcType(0) := SrcType.reg 1648 csBundle(0).srcType(1) := SrcType.imm 1649 csBundle(0).lsrc(1) := 0.U 1650 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1651 csBundle(0).fuType := FuType.i2v.U 1652 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1653 csBundle(0).rfWen := false.B 1654 csBundle(0).fpWen := false.B 1655 csBundle(0).vecWen := true.B 1656 csBundle(0).vlsInstr := true.B 1657 //LMUL 1658 for (i <- 0 until MAX_VLMUL) { 1659 csBundle(i + 1).srcType(0) := SrcType.vp 1660 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1661 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1662 csBundle(i + 1).ldest := dest + i.U 1663 csBundle(i + 1).uopIdx := i.U 1664 csBundle(i + 1).vlsInstr := true.B 1665 } 1666 csBundle.head.waitForward := isUsSegment 1667 csBundle(numOfUop - 1.U).blockBackward := isUsSegment 1668 } 1669 is(UopSplitType.VEC_S_LDST) { 1670 /* 1671 FMV.D.X 1672 */ 1673 csBundle(0).srcType(0) := SrcType.reg 1674 csBundle(0).srcType(1) := SrcType.imm 1675 csBundle(0).lsrc(1) := 0.U 1676 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1677 csBundle(0).fuType := FuType.i2v.U 1678 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1679 csBundle(0).rfWen := false.B 1680 csBundle(0).fpWen := false.B 1681 csBundle(0).vecWen := true.B 1682 csBundle(0).vlsInstr := true.B 1683 1684 csBundle(1).srcType(0) := SrcType.reg 1685 csBundle(1).srcType(1) := SrcType.imm 1686 csBundle(1).lsrc(0) := latchedInst.lsrc(1) 1687 csBundle(1).lsrc(1) := 0.U 1688 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 1689 csBundle(1).fuType := FuType.i2v.U 1690 csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1691 csBundle(1).rfWen := false.B 1692 csBundle(1).fpWen := false.B 1693 csBundle(1).vecWen := true.B 1694 csBundle(1).vlsInstr := true.B 1695 1696 //LMUL 1697 for (i <- 0 until MAX_VLMUL) { 1698 csBundle(i + 2).srcType(0) := SrcType.vp 1699 csBundle(i + 2).srcType(1) := SrcType.vp 1700 csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1701 csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 1702 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1703 csBundle(i + 2).ldest := dest + i.U 1704 csBundle(i + 2).uopIdx := i.U 1705 csBundle(i + 2).vlsInstr := true.B 1706 } 1707 csBundle.head.waitForward := isSdSegment 1708 csBundle(numOfUop - 1.U).blockBackward := isSdSegment 1709 } 1710 is(UopSplitType.VEC_I_LDST) { 1711 def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={ 1712 for (i <- 0 until MAX_VLMUL) { 1713 val vecWen = if (i < lmul * nf) true.B else false.B 1714 val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no 1715 csBundle(i + 1).srcType(0) := SrcType.vp 1716 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1717 csBundle(i + 1).srcType(1) := SrcType.no 1718 csBundle(i + 1).lsrc(1) := src2 + i.U 1719 csBundle(i + 1).srcType(2) := src2Type 1720 csBundle(i + 1).lsrc(2) := dest + i.U 1721 csBundle(i + 1).ldest := dest + i.U 1722 csBundle(i + 1).rfWen := false.B 1723 csBundle(i + 1).fpWen := false.B 1724 csBundle(i + 1).vecWen := vecWen 1725 csBundle(i + 1).uopIdx := i.U 1726 csBundle(i + 1).vlsInstr := true.B 1727 } 1728 } 1729 def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={ 1730 for (i <- 0 until MAX_VLMUL) { 1731 val src1Type = if (i < emul) SrcType.vp else SrcType.no 1732 csBundle(i + 1).srcType(1) := src1Type 1733 csBundle(i + 1).lsrc(1) := src2 + i.U 1734 } 1735 } 1736 1737 val vlmul = vlmulReg 1738 val vsew = Cat(0.U(1.W), vsewReg) 1739 val veew = Cat(0.U(1.W), width) 1740 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1741 val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array( 1742 "b001".U -> 1.U, 1743 "b010".U -> 2.U, 1744 "b011".U -> 3.U 1745 )) 1746 val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array( 1747 "b001".U -> 1.U, 1748 "b010".U -> 2.U, 1749 "b011".U -> 3.U 1750 )) 1751 csBundle(0).srcType(0) := SrcType.reg 1752 csBundle(0).srcType(1) := SrcType.imm 1753 csBundle(0).lsrc(1) := 0.U 1754 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1755 csBundle(0).fuType := FuType.i2v.U 1756 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64) 1757 csBundle(0).rfWen := false.B 1758 csBundle(0).fpWen := false.B 1759 csBundle(0).vecWen := true.B 1760 csBundle(0).vlsInstr := true.B 1761 1762 //LMUL 1763 when(nf === 0.U) { 1764 for (i <- 0 until MAX_VLMUL) { 1765 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul) 1766 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1767 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1768 csBundle(i + 1).srcType(0) := SrcType.vp 1769 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1770 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1771 csBundle(i + 1).srcType(2) := SrcType.vp 1772 // lsrc2 is old vd 1773 csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1774 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1775 csBundle(i + 1).uopIdx := i.U 1776 csBundle(i + 1).vlsInstr := true.B 1777 } 1778 }.otherwise{ 1779 // nf > 1, is segment indexed load/store 1780 // gen src0, vd 1781 switch(simple_lmul) { 1782 is(0.U) { 1783 switch(nf) { 1784 is(1.U) { 1785 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2) 1786 } 1787 is(2.U) { 1788 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3) 1789 } 1790 is(3.U) { 1791 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4) 1792 } 1793 is(4.U) { 1794 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5) 1795 } 1796 is(5.U) { 1797 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6) 1798 } 1799 is(6.U) { 1800 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7) 1801 } 1802 is(7.U) { 1803 genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8) 1804 } 1805 } 1806 } 1807 is(1.U) { 1808 switch(nf) { 1809 is(1.U) { 1810 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2) 1811 } 1812 is(2.U) { 1813 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3) 1814 } 1815 is(3.U) { 1816 genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4) 1817 } 1818 } 1819 } 1820 is(2.U) { 1821 switch(nf) { 1822 is(1.U) { 1823 genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2) 1824 } 1825 } 1826 } 1827 } 1828 1829 // gen src1 1830 switch(simple_emul) { 1831 is(0.U) { 1832 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1) 1833 } 1834 is(1.U) { 1835 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2) 1836 } 1837 is(2.U) { 1838 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4) 1839 } 1840 is(3.U) { 1841 genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8) 1842 } 1843 } 1844 1845 // when is vstore instructions, not set vecwen 1846 when(isVstore) { 1847 for (i <- 0 until MAX_VLMUL) { 1848 csBundle(i + 1).vecWen := false.B 1849 } 1850 } 1851 } 1852 csBundle.head.waitForward := isIxSegment 1853 csBundle(numOfUop - 1.U).blockBackward := isIxSegment 1854 } 1855 } 1856 1857 //readyFromRename Counter 1858 val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U) 1859 1860 // The left uops of the complex inst in ComplexDecoder can be send out this cycle 1861 val thisAllOut = uopRes <= readyCounter 1862 1863 switch(state) { 1864 is(s_idle) { 1865 when (inValid) { 1866 stateNext := s_active 1867 uopResNext := inUopInfo.numOfUop 1868 } 1869 } 1870 is(s_active) { 1871 when (thisAllOut) { 1872 when (inValid) { 1873 stateNext := s_active 1874 uopResNext := inUopInfo.numOfUop 1875 }.otherwise { 1876 stateNext := s_idle 1877 uopResNext := 0.U 1878 } 1879 }.otherwise { 1880 stateNext := s_active 1881 uopResNext := uopRes - readyCounter 1882 } 1883 } 1884 } 1885 1886 state := Mux(io.redirect, s_idle, stateNext) 1887 uopRes := Mux(io.redirect, 0.U, uopResNext) 1888 1889 val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes) 1890 1891 for(i <- 0 until RenameWidth) { 1892 outValids(i) := complexNum > i.U 1893 outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1)) 1894 } 1895 1896 outComplexNum := Mux(state === s_active, complexNum, 0.U) 1897 inReady := state === s_idle || state === s_active && thisAllOut 1898 1899// val validSimple = Wire(Vec(DecodeWidth, Bool())) 1900// validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1901// val notInf = Wire(Vec(DecodeWidth, Bool())) 1902// notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1903// notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1904// val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1905// notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1906// 1907// complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1908// Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1909// 0.U) 1910// validToRename.zipWithIndex.foreach{ 1911// case(dst, i) => 1912// val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1913// dst := MuxCase(false.B, Seq( 1914// (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1915// (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1916// ).toSeq) 1917// } 1918// 1919// readyToIBuf.zipWithIndex.foreach { 1920// case (dst, i) => 1921// val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1922// dst := MuxCase(true.B, Seq( 1923// (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1924// (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1925// ).toSeq) 1926// } 1927// 1928// io.deq.decodedInsts := decodedInsts 1929// io.deq.complexNum := complexNum 1930// io.deq.validToRename := validToRename 1931// io.deq.readyToIBuf := readyToIBuf 1932} 1933