1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul} 34import yunsuan.VpermType 35import scala.collection.Seq 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(7.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 val outIsFirstUopInVd = IO(Output(Bool())) 43 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={ 44 if (lmul * nfields <= 8) { 45 for (k <-0 until nfields) { 46 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 47 var offset = 1 << (emul - lmul) 48 for (i <- 0 until (1 << emul)) { 49 if (uopIdx == k * (1 << emul) + i) { 50 return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0) 51 } 52 } 53 } else { // lmul > emul, uop num is depend on lmul * nf 54 var offset = 1 << (lmul - emul) 55 for (i <- 0 until (1 << lmul)) { 56 if (uopIdx == k * (1 << lmul) + i) { 57 return (i / offset, i + k * (1 << lmul), 1) 58 } 59 } 60 } 61 } 62 } 63 return (0, 0, 1) 64 } 65 // strided load/store 66 var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq() 67 for (emul <- 0 until 4) { 68 for (lmul <- 0 until 4) { 69 for (nf <- 0 until 8) { 70 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx) 71 var offsetVs2 = offset._1 72 var offsetVd = offset._2 73 var isFirstUopInVd = offset._3 74 combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) 75 } 76 } 77 } 78 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 79 case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) => 80 (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W))) 81 }, BitPat.N(7))) 82 outOffsetVs2 := out(5, 3) 83 outOffsetVd := out(2, 0) 84 outIsFirstUopInVd := out(6).asBool 85} 86 87trait VectorConstants { 88 val MAX_VLMUL = 8 89 val FP_TMP_REG_MV = 32 90 val VECTOR_TMP_REG_LMUL = 33 // 33~47 -> 15 91 val MAX_INDEXED_LS_UOPNUM = 64 92} 93 94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 95 val simple = new Bundle { 96 val decodedInst = Input(new DecodedInst) 97 val isComplex = Input(Bool()) 98 val uopInfo = Input(new UopInfo) 99 } 100 val vtype = Input(new VType) 101 val in0pc = Input(UInt(VAddrBits.W)) 102 val isComplex = Input(Vec(DecodeWidth, Bool())) 103 val validFromIBuf = Input(Vec(DecodeWidth, Bool())) 104 val readyFromRename = Input(Vec(RenameWidth, Bool())) 105 val deq = new Bundle { 106 val decodedInsts = Output(Vec(RenameWidth, new DecodedInst)) 107 val isVset = Output(Bool()) 108 val readyToIBuf = Output(Vec(DecodeWidth, Bool())) 109 val validToRename = Output(Vec(RenameWidth, Bool())) 110 val complexNum = Output(UInt(3.W)) 111 } 112 val csrCtrl = Input(new CustomCSRCtrlIO) 113} 114 115/** 116 * @author zly 117 */ 118class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 119 val io = IO(new DecodeUnitCompIO) 120 121 val maxUopSize = MaxUopSize 122 //input bits 123 private val inst: XSInstBitFields = io.simple.decodedInst.instr.asTypeOf(new XSInstBitFields) 124 125 val src1 = Cat(0.U(1.W), inst.RS1) 126 val src2 = Cat(0.U(1.W), inst.RS2) 127 val dest = Cat(0.U(1.W), inst.RD) 128 129 val nf = inst.NF 130 val width = inst.WIDTH(1, 0) 131 132 //output bits 133 val decodedInsts = Wire(Vec(RenameWidth, new DecodedInst)) 134 val validToRename = Wire(Vec(RenameWidth, Bool())) 135 val readyToIBuf = Wire(Vec(DecodeWidth, Bool())) 136 val complexNum = Wire(UInt(3.W)) 137 138 //output of DecodeUnit 139 val decodedInstsSimple = Wire(new DecodedInst) 140 val numOfUop = Wire(UInt(log2Up(maxUopSize+1).W)) 141 val lmul = Wire(UInt(4.W)) 142 val isVsetSimple = Wire(Bool()) 143 144 val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i))) 145 indexedLSRegOffset.map(_.src := 0.U) 146 147 //pre decode 148 decodedInstsSimple := io.simple.decodedInst 149 lmul := io.simple.uopInfo.lmul 150 isVsetSimple := io.simple.decodedInst.isVset 151 val vlmulReg = io.simple.decodedInst.vpu.vlmul 152 val vsewReg = io.simple.decodedInst.vpu.vsew 153 when(isVsetSimple) { 154 when(dest === 0.U && src1 === 0.U) { 155 decodedInstsSimple.fuOpType := VSETOpType.keepVl(io.simple.decodedInst.fuOpType) 156 }.elsewhen(src1 === 0.U) { 157 decodedInstsSimple.fuOpType := VSETOpType.setVlmax(io.simple.decodedInst.fuOpType) 158 } 159 when(io.vtype.illegal){ 160 decodedInstsSimple.flushPipe := true.B 161 } 162 } 163 //Type of uop Div 164 val typeOfSplit = decodedInstsSimple.uopSplitType 165 val src1Type = decodedInstsSimple.srcType(0) 166 val src1IsImm = src1Type === SrcType.imm 167 168 when(typeOfSplit === UopSplitType.DIR) { 169 numOfUop := Mux(dest =/= 0.U, 2.U, 170 Mux(src1 =/= 0.U, 1.U, 171 Mux(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType), 2.U, 1.U))) 172 } .otherwise { 173 numOfUop := io.simple.uopInfo.numOfUop 174 } 175 176 177 //uop div up to maxUopSize 178 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 179 csBundle.map { case dst => 180 dst := decodedInstsSimple 181 dst.firstUop := false.B 182 dst.lastUop := false.B 183 } 184 185 csBundle(0).numUops := numOfUop 186 csBundle(0).firstUop := true.B 187 csBundle(numOfUop - 1.U).lastUop := true.B 188 189 switch(typeOfSplit) { 190 is(UopSplitType.DIR) { 191 when(isVsetSimple) { 192 when(dest =/= 0.U) { 193 csBundle(0).fuType := FuType.vsetiwi.U 194 csBundle(0).fuOpType := VSETOpType.switchDest(decodedInstsSimple.fuOpType) 195 csBundle(0).flushPipe := false.B 196 csBundle(0).rfWen := true.B 197 csBundle(0).vecWen := false.B 198 csBundle(1).ldest := VCONFIG_IDX.U 199 csBundle(1).rfWen := false.B 200 csBundle(1).vecWen := true.B 201 }.elsewhen(src1 =/= 0.U) { 202 csBundle(0).ldest := VCONFIG_IDX.U 203 }.elsewhen(VSETOpType.isVsetvli(decodedInstsSimple.fuOpType)) { 204 csBundle(0).fuType := FuType.vsetfwf.U 205 csBundle(0).srcType(0) := SrcType.vp 206 csBundle(0).lsrc(0) := VCONFIG_IDX.U 207 }.elsewhen(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType)) { 208 csBundle(0).srcType(0) := SrcType.reg 209 csBundle(0).srcType(1) := SrcType.imm 210 csBundle(0).lsrc(1) := 0.U 211 csBundle(0).ldest := FP_TMP_REG_MV.U 212 csBundle(0).fuType := FuType.i2f.U 213 csBundle(0).rfWen := false.B 214 csBundle(0).fpWen := true.B 215 csBundle(0).vecWen := false.B 216 csBundle(0).fpu.isAddSub := false.B 217 csBundle(0).fpu.typeTagIn := FPU.D 218 csBundle(0).fpu.typeTagOut := FPU.D 219 csBundle(0).fpu.fromInt := true.B 220 csBundle(0).fpu.wflags := false.B 221 csBundle(0).fpu.fpWen := true.B 222 csBundle(0).fpu.div := false.B 223 csBundle(0).fpu.sqrt := false.B 224 csBundle(0).fpu.fcvt := false.B 225 csBundle(0).flushPipe := false.B 226 csBundle(1).fuType := FuType.vsetfwf.U 227 csBundle(1).srcType(0) := SrcType.vp 228 csBundle(1).lsrc(0) := VCONFIG_IDX.U 229 csBundle(1).srcType(1) := SrcType.fp 230 csBundle(1).lsrc(1) := FP_TMP_REG_MV.U 231 csBundle(1).ldest := VCONFIG_IDX.U 232 } 233 } 234 } 235 is(UopSplitType.VEC_VVV) { 236 for (i <- 0 until MAX_VLMUL) { 237 csBundle(i).lsrc(0) := src1 + i.U 238 csBundle(i).lsrc(1) := src2 + i.U 239 csBundle(i).lsrc(2) := dest + i.U 240 csBundle(i).ldest := dest + i.U 241 csBundle(i).uopIdx := i.U 242 } 243 } 244 is(UopSplitType.VEC_VFV) { 245 for (i <- 0 until MAX_VLMUL) { 246 csBundle(i).lsrc(1) := src2 + i.U 247 csBundle(i).lsrc(2) := dest + i.U 248 csBundle(i).ldest := dest + i.U 249 csBundle(i).uopIdx := i.U 250 } 251 } 252 is(UopSplitType.VEC_EXT2) { 253 for (i <- 0 until MAX_VLMUL / 2) { 254 csBundle(2 * i).lsrc(1) := src2 + i.U 255 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 256 csBundle(2 * i).ldest := dest + (2 * i).U 257 csBundle(2 * i).uopIdx := (2 * i).U 258 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 259 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 260 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 261 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 262 } 263 } 264 is(UopSplitType.VEC_EXT4) { 265 for (i <- 0 until MAX_VLMUL / 4) { 266 csBundle(4 * i).lsrc(1) := src2 + i.U 267 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 268 csBundle(4 * i).ldest := dest + (4 * i).U 269 csBundle(4 * i).uopIdx := (4 * i).U 270 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 271 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 272 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 273 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 274 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 275 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 276 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 277 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 278 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 279 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 280 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 281 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 282 } 283 } 284 is(UopSplitType.VEC_EXT8) { 285 for (i <- 0 until MAX_VLMUL) { 286 csBundle(i).lsrc(1) := src2 287 csBundle(i).lsrc(2) := dest + i.U 288 csBundle(i).ldest := dest + i.U 289 csBundle(i).uopIdx := i.U 290 } 291 } 292 is(UopSplitType.VEC_0XV) { 293 /* 294 FMV.D.X 295 */ 296 csBundle(0).srcType(0) := SrcType.reg 297 csBundle(0).srcType(1) := SrcType.imm 298 csBundle(0).lsrc(1) := 0.U 299 csBundle(0).ldest := FP_TMP_REG_MV.U 300 csBundle(0).fuType := FuType.i2f.U 301 csBundle(0).rfWen := false.B 302 csBundle(0).fpWen := true.B 303 csBundle(0).vecWen := false.B 304 csBundle(0).fpu.isAddSub := false.B 305 csBundle(0).fpu.typeTagIn := FPU.D 306 csBundle(0).fpu.typeTagOut := FPU.D 307 csBundle(0).fpu.fromInt := true.B 308 csBundle(0).fpu.wflags := false.B 309 csBundle(0).fpu.fpWen := true.B 310 csBundle(0).fpu.div := false.B 311 csBundle(0).fpu.sqrt := false.B 312 csBundle(0).fpu.fcvt := false.B 313 /* 314 vfmv.s.f 315 */ 316 csBundle(1).srcType(0) := SrcType.fp 317 csBundle(1).srcType(1) := SrcType.vp 318 csBundle(1).srcType(2) := SrcType.vp 319 csBundle(1).lsrc(0) := FP_TMP_REG_MV.U 320 csBundle(1).lsrc(1) := 0.U 321 csBundle(1).lsrc(2) := dest 322 csBundle(1).ldest := dest 323 csBundle(1).fuType := FuType.vppu.U 324 csBundle(1).fuOpType := VpermType.dummy 325 csBundle(1).rfWen := false.B 326 csBundle(1).fpWen := false.B 327 csBundle(1).vecWen := true.B 328 } 329 is(UopSplitType.VEC_VXV) { 330 /* 331 i to vector move 332 */ 333 csBundle(0).srcType(0) := SrcType.reg 334 csBundle(0).srcType(1) := SrcType.imm 335 csBundle(0).lsrc(1) := 0.U 336 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 337 csBundle(0).fuType := FuType.i2v.U 338 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 339 csBundle(0).vecWen := true.B 340 /* 341 LMUL 342 */ 343 for (i <- 0 until MAX_VLMUL) { 344 csBundle(i + 1).srcType(0) := SrcType.vp 345 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 346 csBundle(i + 1).lsrc(1) := src2 + i.U 347 csBundle(i + 1).lsrc(2) := dest + i.U 348 csBundle(i + 1).ldest := dest + i.U 349 csBundle(i + 1).uopIdx := i.U 350 } 351 } 352 is(UopSplitType.VEC_VVW) { 353 for (i <- 0 until MAX_VLMUL / 2) { 354 csBundle(2 * i).lsrc(0) := src1 + i.U 355 csBundle(2 * i).lsrc(1) := src2 + i.U 356 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 357 csBundle(2 * i).ldest := dest + (2 * i).U 358 csBundle(2 * i).uopIdx := (2 * i).U 359 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 360 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 361 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 362 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 363 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 364 } 365 } 366 is(UopSplitType.VEC_VFW) { 367 for (i <- 0 until MAX_VLMUL / 2) { 368 csBundle(2 * i).lsrc(0) := src1 369 csBundle(2 * i).lsrc(1) := src2 + i.U 370 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 371 csBundle(2 * i).ldest := dest + (2 * i).U 372 csBundle(2 * i).uopIdx := (2 * i).U 373 csBundle(2 * i + 1).lsrc(0) := src1 374 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 375 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 376 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 377 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 378 } 379 } 380 is(UopSplitType.VEC_WVW) { 381 for (i <- 0 until MAX_VLMUL / 2) { 382 csBundle(2 * i).lsrc(0) := src1 + i.U 383 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 384 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 385 csBundle(2 * i).ldest := dest + (2 * i).U 386 csBundle(2 * i).uopIdx := (2 * i).U 387 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 388 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 389 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 390 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 391 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 392 } 393 } 394 is(UopSplitType.VEC_VXW) { 395 /* 396 i to vector move 397 */ 398 csBundle(0).srcType(0) := SrcType.reg 399 csBundle(0).srcType(1) := SrcType.imm 400 csBundle(0).lsrc(1) := 0.U 401 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 402 csBundle(0).fuType := FuType.i2v.U 403 csBundle(0).fuOpType := vsewReg 404 csBundle(0).vecWen := true.B 405 406 for (i <- 0 until MAX_VLMUL / 2) { 407 csBundle(2 * i + 1).srcType(0) := SrcType.vp 408 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 409 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 410 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 411 csBundle(2 * i + 1).ldest := dest + (2 * i).U 412 csBundle(2 * i + 1).uopIdx := (2 * i).U 413 csBundle(2 * i + 2).srcType(0) := SrcType.vp 414 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 415 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 416 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 417 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 418 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 419 } 420 } 421 is(UopSplitType.VEC_WXW) { 422 /* 423 i to vector move 424 */ 425 csBundle(0).srcType(0) := SrcType.reg 426 csBundle(0).srcType(1) := SrcType.imm 427 csBundle(0).lsrc(1) := 0.U 428 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 429 csBundle(0).fuType := FuType.i2v.U 430 csBundle(0).fuOpType := vsewReg 431 csBundle(0).vecWen := true.B 432 433 for (i <- 0 until MAX_VLMUL / 2) { 434 csBundle(2 * i + 1).srcType(0) := SrcType.vp 435 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 436 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 437 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 438 csBundle(2 * i + 1).ldest := dest + (2 * i).U 439 csBundle(2 * i + 1).uopIdx := (2 * i).U 440 csBundle(2 * i + 2).srcType(0) := SrcType.vp 441 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 442 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 443 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 444 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 445 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 446 } 447 } 448 is(UopSplitType.VEC_WVV) { 449 for (i <- 0 until MAX_VLMUL / 2) { 450 451 csBundle(2 * i).lsrc(0) := src1 + i.U 452 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 453 csBundle(2 * i).lsrc(2) := dest + i.U 454 csBundle(2 * i).ldest := dest + i.U 455 csBundle(2 * i).uopIdx := (2 * i).U 456 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 457 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 458 csBundle(2 * i + 1).lsrc(2) := dest + i.U 459 csBundle(2 * i + 1).ldest := dest + i.U 460 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 461 } 462 } 463 is(UopSplitType.VEC_WFW) { 464 for (i <- 0 until MAX_VLMUL / 2) { 465 csBundle(2 * i).lsrc(0) := src1 466 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 467 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 468 csBundle(2 * i).ldest := dest + (2 * i).U 469 csBundle(2 * i).uopIdx := (2 * i).U 470 csBundle(2 * i + 1).lsrc(0) := src1 471 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 472 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 473 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 474 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 475 } 476 } 477 is(UopSplitType.VEC_WXV) { 478 /* 479 i to vector move 480 */ 481 csBundle(0).srcType(0) := SrcType.reg 482 csBundle(0).srcType(1) := SrcType.imm 483 csBundle(0).lsrc(1) := 0.U 484 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 485 csBundle(0).fuType := FuType.i2v.U 486 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 487 csBundle(0).vecWen := true.B 488 489 for (i <- 0 until MAX_VLMUL / 2) { 490 csBundle(2 * i + 1).srcType(0) := SrcType.vp 491 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 492 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 493 csBundle(2 * i + 1).lsrc(2) := dest + i.U 494 csBundle(2 * i + 1).ldest := dest + i.U 495 csBundle(2 * i + 1).uopIdx := (2 * i).U 496 csBundle(2 * i + 2).srcType(0) := SrcType.vp 497 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 498 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 499 csBundle(2 * i + 2).lsrc(2) := dest + i.U 500 csBundle(2 * i + 2).ldest := dest + i.U 501 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 502 } 503 } 504 is(UopSplitType.VEC_VVM) { 505 csBundle(0).lsrc(2) := dest 506 csBundle(0).ldest := dest 507 csBundle(0).uopIdx := 0.U 508 for (i <- 1 until MAX_VLMUL) { 509 csBundle(i).lsrc(0) := src1 + i.U 510 csBundle(i).lsrc(1) := src2 + i.U 511 csBundle(i).lsrc(2) := dest 512 csBundle(i).ldest := dest 513 csBundle(i).uopIdx := i.U 514 } 515 } 516 is(UopSplitType.VEC_VFM) { 517 csBundle(0).lsrc(2) := dest 518 csBundle(0).ldest := dest 519 csBundle(0).uopIdx := 0.U 520 for (i <- 1 until MAX_VLMUL) { 521 csBundle(i).lsrc(0) := src1 522 csBundle(i).lsrc(1) := src2 + i.U 523 csBundle(i).lsrc(2) := dest 524 csBundle(i).ldest := dest 525 csBundle(i).uopIdx := i.U 526 } 527 csBundle(numOfUop - 1.U).ldest := dest 528 } 529 is(UopSplitType.VEC_VXM) { 530 /* 531 i to vector move 532 */ 533 csBundle(0).srcType(0) := SrcType.reg 534 csBundle(0).srcType(1) := SrcType.imm 535 csBundle(0).lsrc(1) := 0.U 536 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 537 csBundle(0).fuType := FuType.i2v.U 538 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 539 csBundle(0).vecWen := true.B 540 //LMUL 541 csBundle(1).srcType(0) := SrcType.vp 542 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 543 csBundle(1).lsrc(2) := dest 544 csBundle(1).ldest := dest 545 csBundle(1).uopIdx := 0.U 546 for (i <- 1 until MAX_VLMUL) { 547 csBundle(i + 1).srcType(0) := SrcType.vp 548 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 549 csBundle(i + 1).lsrc(1) := src2 + i.U 550 csBundle(i + 1).lsrc(2) := dest 551 csBundle(i + 1).ldest := dest 552 csBundle(i + 1).uopIdx := i.U 553 } 554 csBundle(numOfUop - 1.U).ldest := dest 555 } 556 is(UopSplitType.VEC_SLIDE1UP) { 557 /* 558 i to vector move 559 */ 560 csBundle(0).srcType(0) := SrcType.reg 561 csBundle(0).srcType(1) := SrcType.imm 562 csBundle(0).lsrc(1) := 0.U 563 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 564 csBundle(0).fuType := FuType.i2v.U 565 csBundle(0).fuOpType := vsewReg 566 csBundle(0).vecWen := true.B 567 //LMUL 568 csBundle(1).srcType(0) := SrcType.vp 569 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 570 csBundle(1).lsrc(2) := dest 571 csBundle(1).ldest := dest 572 csBundle(1).uopIdx := 0.U 573 for (i <- 1 until MAX_VLMUL) { 574 csBundle(i + 1).srcType(0) := SrcType.vp 575 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 576 csBundle(i + 1).lsrc(1) := src2 + i.U 577 csBundle(i + 1).lsrc(2) := dest + i.U 578 csBundle(i + 1).ldest := dest + i.U 579 csBundle(i + 1).uopIdx := i.U 580 } 581 } 582 is(UopSplitType.VEC_FSLIDE1UP) { 583 //LMUL 584 csBundle(0).srcType(0) := SrcType.fp 585 csBundle(0).lsrc(0) := src1 586 csBundle(0).lsrc(1) := src2 587 csBundle(0).lsrc(2) := dest 588 csBundle(0).ldest := dest 589 csBundle(0).uopIdx := 0.U 590 for (i <- 1 until MAX_VLMUL) { 591 csBundle(i).srcType(0) := SrcType.vp 592 csBundle(i).lsrc(0) := src2 + (i - 1).U 593 csBundle(i).lsrc(1) := src2 + i.U 594 csBundle(i).lsrc(2) := dest + i.U 595 csBundle(i).ldest := dest + i.U 596 csBundle(i).uopIdx := i.U 597 } 598 } 599 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 600 /* 601 i to vector move 602 */ 603 csBundle(0).srcType(0) := SrcType.reg 604 csBundle(0).srcType(1) := SrcType.imm 605 csBundle(0).lsrc(1) := 0.U 606 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 607 csBundle(0).fuType := FuType.i2v.U 608 csBundle(0).fuOpType := vsewReg 609 csBundle(0).vecWen := true.B 610 //LMUL 611 for (i <- 0 until MAX_VLMUL) { 612 csBundle(2 * i + 1).srcType(0) := SrcType.vp 613 csBundle(2 * i + 1).srcType(1) := SrcType.vp 614 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 615 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 616 csBundle(2 * i + 1).lsrc(2) := dest + i.U 617 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 618 csBundle(2 * i + 1).uopIdx := (2 * i).U 619 if (2 * i + 2 < MAX_VLMUL * 2) { 620 csBundle(2 * i + 2).srcType(0) := SrcType.vp 621 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 622 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 623 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 624 csBundle(2 * i + 2).ldest := dest + i.U 625 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 626 } 627 } 628 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 629 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 630 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 631 } 632 is(UopSplitType.VEC_FSLIDE1DOWN) { 633 //LMUL 634 for (i <- 0 until MAX_VLMUL) { 635 csBundle(2 * i).srcType(0) := SrcType.vp 636 csBundle(2 * i).srcType(1) := SrcType.vp 637 csBundle(2 * i).lsrc(0) := src2 + (i + 1).U 638 csBundle(2 * i).lsrc(1) := src2 + i.U 639 csBundle(2 * i).lsrc(2) := dest + i.U 640 csBundle(2 * i).ldest := VECTOR_TMP_REG_LMUL.U 641 csBundle(2 * i).uopIdx := (2 * i).U 642 csBundle(2 * i + 1).srcType(0) := SrcType.fp 643 csBundle(2 * i + 1).lsrc(0) := src1 644 csBundle(2 * i + 1).lsrc(2) := VECTOR_TMP_REG_LMUL.U 645 csBundle(2 * i + 1).ldest := dest + i.U 646 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 647 } 648 csBundle(numOfUop - 1.U).srcType(0) := SrcType.fp 649 csBundle(numOfUop - 1.U).lsrc(0) := src1 650 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 651 } 652 is(UopSplitType.VEC_VRED) { 653 when(vlmulReg === "b001".U) { 654 csBundle(0).srcType(2) := SrcType.DC 655 csBundle(0).lsrc(0) := src2 + 1.U 656 csBundle(0).lsrc(1) := src2 657 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 658 csBundle(0).uopIdx := 0.U 659 } 660 when(vlmulReg === "b010".U) { 661 csBundle(0).srcType(2) := SrcType.DC 662 csBundle(0).lsrc(0) := src2 + 1.U 663 csBundle(0).lsrc(1) := src2 664 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 665 csBundle(0).uopIdx := 0.U 666 667 csBundle(1).srcType(2) := SrcType.DC 668 csBundle(1).lsrc(0) := src2 + 3.U 669 csBundle(1).lsrc(1) := src2 + 2.U 670 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 671 csBundle(1).uopIdx := 1.U 672 673 csBundle(2).srcType(2) := SrcType.DC 674 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 675 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 676 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 677 csBundle(2).uopIdx := 2.U 678 } 679 when(vlmulReg === "b011".U) { 680 for (i <- 0 until MAX_VLMUL) { 681 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 682 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 683 csBundle(i).lsrc(1) := src2 + (i * 2).U 684 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 685 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 686 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 687 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 688 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 689 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 690 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 691 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 692 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 693 } 694 csBundle(i).srcType(2) := SrcType.DC 695 csBundle(i).uopIdx := i.U 696 } 697 } 698 when(vlmulReg.orR) { 699 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 700 csBundle(numOfUop - 1.U).lsrc(0) := src1 701 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 702 csBundle(numOfUop - 1.U).lsrc(2) := dest 703 csBundle(numOfUop - 1.U).ldest := dest 704 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 705 } 706 } 707 is(UopSplitType.VEC_VFRED) { 708 val vlmul = vlmulReg 709 val vsew = vsewReg 710 when(vlmul === VLmul.m8){ 711 for (i <- 0 until 4) { 712 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 713 csBundle(i).lsrc(1) := src2 + (i * 2).U 714 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 715 csBundle(i).uopIdx := i.U 716 } 717 for (i <- 4 until 6) { 718 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 719 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 720 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 721 csBundle(i).uopIdx := i.U 722 } 723 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 724 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 725 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 726 csBundle(6).uopIdx := 6.U 727 when(vsew === VSew.e64) { 728 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 729 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 730 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 731 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 732 csBundle(7).uopIdx := 7.U 733 csBundle(8).lsrc(0) := src1 734 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 735 csBundle(8).ldest := dest 736 csBundle(8).uopIdx := 8.U 737 } 738 when(vsew === VSew.e32) { 739 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 740 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 741 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 742 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 743 csBundle(7).uopIdx := 7.U 744 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 745 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 746 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 747 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 748 csBundle(8).uopIdx := 8.U 749 csBundle(9).lsrc(0) := src1 750 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 751 csBundle(9).ldest := dest 752 csBundle(9).uopIdx := 9.U 753 } 754 when(vsew === VSew.e16) { 755 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 756 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 757 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 758 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 759 csBundle(7).uopIdx := 7.U 760 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 761 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 762 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 763 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 764 csBundle(8).uopIdx := 8.U 765 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 766 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 767 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 768 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 769 csBundle(9).uopIdx := 9.U 770 csBundle(10).lsrc(0) := src1 771 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 772 csBundle(10).ldest := dest 773 csBundle(10).uopIdx := 10.U 774 } 775 } 776 when(vlmul === VLmul.m4) { 777 for (i <- 0 until 2) { 778 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 779 csBundle(i).lsrc(1) := src2 + (i * 2).U 780 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 781 csBundle(i).uopIdx := i.U 782 } 783 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 784 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 785 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 786 csBundle(2).uopIdx := 2.U 787 when(vsew === VSew.e64) { 788 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 789 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 790 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 791 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 792 csBundle(3).uopIdx := 3.U 793 csBundle(4).lsrc(0) := src1 794 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 795 csBundle(4).ldest := dest 796 csBundle(4).uopIdx := 4.U 797 } 798 when(vsew === VSew.e32) { 799 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 800 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 801 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 802 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 803 csBundle(3).uopIdx := 3.U 804 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 805 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 806 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 807 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 808 csBundle(4).uopIdx := 4.U 809 csBundle(5).lsrc(0) := src1 810 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 811 csBundle(5).ldest := dest 812 csBundle(5).uopIdx := 5.U 813 } 814 when(vsew === VSew.e16) { 815 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 816 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 817 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 818 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 819 csBundle(3).uopIdx := 3.U 820 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 821 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 822 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 823 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 824 csBundle(4).uopIdx := 4.U 825 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 826 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 827 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 828 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 829 csBundle(5).uopIdx := 5.U 830 csBundle(6).lsrc(0) := src1 831 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 832 csBundle(6).ldest := dest 833 csBundle(6).uopIdx := 6.U 834 } 835 } 836 when(vlmul === VLmul.m2) { 837 csBundle(0).lsrc(0) := src2 + 1.U 838 csBundle(0).lsrc(1) := src2 + 0.U 839 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 840 csBundle(0).uopIdx := 0.U 841 when(vsew === VSew.e64) { 842 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 843 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 844 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 845 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 846 csBundle(1).uopIdx := 1.U 847 csBundle(2).lsrc(0) := src1 848 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 849 csBundle(2).ldest := dest 850 csBundle(2).uopIdx := 2.U 851 } 852 when(vsew === VSew.e32) { 853 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 854 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 855 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 856 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 857 csBundle(1).uopIdx := 1.U 858 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 859 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 860 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 861 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 862 csBundle(2).uopIdx := 2.U 863 csBundle(3).lsrc(0) := src1 864 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 865 csBundle(3).ldest := dest 866 csBundle(3).uopIdx := 3.U 867 } 868 when(vsew === VSew.e16) { 869 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 870 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 871 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 872 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 873 csBundle(1).uopIdx := 1.U 874 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 875 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 876 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 877 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 878 csBundle(2).uopIdx := 2.U 879 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 880 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 881 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 882 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 883 csBundle(3).uopIdx := 3.U 884 csBundle(4).lsrc(0) := src1 885 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 886 csBundle(4).ldest := dest 887 csBundle(4).uopIdx := 4.U 888 } 889 } 890 when(vlmul === VLmul.m1) { 891 when(vsew === VSew.e64) { 892 csBundle(0).lsrc(0) := src2 893 csBundle(0).lsrc(1) := src2 894 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 895 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 896 csBundle(0).uopIdx := 0.U 897 csBundle(1).lsrc(0) := src1 898 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 899 csBundle(1).ldest := dest 900 csBundle(1).uopIdx := 1.U 901 } 902 when(vsew === VSew.e32) { 903 csBundle(0).lsrc(0) := src2 904 csBundle(0).lsrc(1) := src2 905 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 906 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 907 csBundle(0).uopIdx := 0.U 908 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 909 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 910 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 911 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 912 csBundle(1).uopIdx := 1.U 913 csBundle(2).lsrc(0) := src1 914 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 915 csBundle(2).ldest := dest 916 csBundle(2).uopIdx := 2.U 917 } 918 when(vsew === VSew.e16) { 919 csBundle(0).lsrc(0) := src2 920 csBundle(0).lsrc(1) := src2 921 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 922 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 923 csBundle(0).uopIdx := 0.U 924 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 925 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 926 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 927 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 928 csBundle(1).uopIdx := 1.U 929 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 930 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 931 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 932 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 933 csBundle(2).uopIdx := 2.U 934 csBundle(3).lsrc(0) := src1 935 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 936 csBundle(3).ldest := dest 937 csBundle(3).uopIdx := 3.U 938 } 939 } 940 when(vlmul === VLmul.mf2) { 941 when(vsew === VSew.e32) { 942 csBundle(0).lsrc(0) := src2 943 csBundle(0).lsrc(1) := src2 944 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 945 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 946 csBundle(0).uopIdx := 0.U 947 csBundle(1).lsrc(0) := src1 948 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 949 csBundle(1).ldest := dest 950 csBundle(1).uopIdx := 1.U 951 } 952 when(vsew === VSew.e16) { 953 csBundle(0).lsrc(0) := src2 954 csBundle(0).lsrc(1) := src2 955 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 956 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 957 csBundle(0).uopIdx := 0.U 958 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 959 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 960 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 961 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 962 csBundle(1).uopIdx := 1.U 963 csBundle(2).lsrc(0) := src1 964 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 965 csBundle(2).ldest := dest 966 csBundle(2).uopIdx := 2.U 967 } 968 } 969 when(vlmul === VLmul.mf4) { 970 when(vsew === VSew.e16) { 971 csBundle(0).lsrc(0) := src2 972 csBundle(0).lsrc(1) := src2 973 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 974 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 975 csBundle(0).uopIdx := 0.U 976 csBundle(1).lsrc(0) := src1 977 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 978 csBundle(1).ldest := dest 979 csBundle(1).uopIdx := 1.U 980 } 981 } 982 } 983 984 is(UopSplitType.VEC_VFREDOSUM) { 985 import yunsuan.VfaluType 986 val vlmul = vlmulReg 987 val vsew = vsewReg 988 val isWiden = decodedInstsSimple.fuOpType === VfaluType.vfwredosum 989 when(vlmul === VLmul.m8) { 990 when(vsew === VSew.e64) { 991 val vlmax = 16 992 for (i <- 0 until vlmax) { 993 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 994 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 995 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 996 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 997 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 998 csBundle(i).uopIdx := i.U 999 } 1000 } 1001 when(vsew === VSew.e32) { 1002 val vlmax = 32 1003 for (i <- 0 until vlmax) { 1004 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1005 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1006 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1007 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1008 csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B) 1009 csBundle(i).uopIdx := i.U 1010 } 1011 } 1012 when(vsew === VSew.e16) { 1013 val vlmax = 64 1014 for (i <- 0 until vlmax) { 1015 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1016 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1017 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1018 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1019 csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B) 1020 csBundle(i).uopIdx := i.U 1021 } 1022 } 1023 } 1024 when(vlmul === VLmul.m4) { 1025 when(vsew === VSew.e64) { 1026 val vlmax = 8 1027 for (i <- 0 until vlmax) { 1028 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1029 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1030 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1031 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1032 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1033 csBundle(i).uopIdx := i.U 1034 } 1035 } 1036 when(vsew === VSew.e32) { 1037 val vlmax = 16 1038 for (i <- 0 until vlmax) { 1039 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1040 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1041 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1042 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1043 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1044 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1045 csBundle(i).uopIdx := i.U 1046 } 1047 } 1048 when(vsew === VSew.e16) { 1049 val vlmax = 32 1050 for (i <- 0 until vlmax) { 1051 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1052 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1053 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1054 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1055 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1056 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1057 csBundle(i).uopIdx := i.U 1058 } 1059 } 1060 } 1061 when(vlmul === VLmul.m2) { 1062 when(vsew === VSew.e64) { 1063 val vlmax = 4 1064 for (i <- 0 until vlmax) { 1065 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1066 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1067 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1068 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1069 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1070 csBundle(i).uopIdx := i.U 1071 } 1072 } 1073 when(vsew === VSew.e32) { 1074 val vlmax = 8 1075 for (i <- 0 until vlmax) { 1076 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1077 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1078 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1079 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1080 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1081 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1082 csBundle(i).uopIdx := i.U 1083 } 1084 } 1085 when(vsew === VSew.e16) { 1086 val vlmax = 16 1087 for (i <- 0 until vlmax) { 1088 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1089 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1090 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1091 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1092 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1093 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1094 csBundle(i).uopIdx := i.U 1095 } 1096 } 1097 } 1098 when(vlmul === VLmul.m1) { 1099 when(vsew === VSew.e64) { 1100 val vlmax = 2 1101 for (i <- 0 until vlmax) { 1102 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1103 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1104 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1105 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1106 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1107 csBundle(i).uopIdx := i.U 1108 } 1109 } 1110 when(vsew === VSew.e32) { 1111 val vlmax = 4 1112 for (i <- 0 until vlmax) { 1113 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1114 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1115 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1116 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1117 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1118 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1119 csBundle(i).uopIdx := i.U 1120 } 1121 } 1122 when(vsew === VSew.e16) { 1123 val vlmax = 8 1124 for (i <- 0 until vlmax) { 1125 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1126 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1127 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1128 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1129 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1130 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1131 csBundle(i).uopIdx := i.U 1132 } 1133 } 1134 } 1135 when(vlmul === VLmul.mf2) { 1136 when(vsew === VSew.e32) { 1137 val vlmax = 2 1138 for (i <- 0 until vlmax) { 1139 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1140 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1141 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1142 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1143 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1144 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1145 csBundle(i).uopIdx := i.U 1146 } 1147 } 1148 when(vsew === VSew.e16) { 1149 val vlmax = 4 1150 for (i <- 0 until vlmax) { 1151 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1152 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1153 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1154 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1155 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1156 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1157 csBundle(i).uopIdx := i.U 1158 } 1159 } 1160 } 1161 when(vlmul === VLmul.mf4) { 1162 when(vsew === VSew.e16) { 1163 val vlmax = 2 1164 for (i <- 0 until vlmax) { 1165 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1166 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1167 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1168 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1169 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1170 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1171 csBundle(i).uopIdx := i.U 1172 } 1173 } 1174 } 1175 } 1176 1177 is(UopSplitType.VEC_SLIDEUP) { 1178 // i to vector move 1179 csBundle(0).srcType(0) := SrcType.reg 1180 csBundle(0).srcType(1) := SrcType.imm 1181 csBundle(0).lsrc(1) := 0.U 1182 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1183 csBundle(0).fuType := FuType.i2v.U 1184 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 1185 csBundle(0).vecWen := true.B 1186 // LMUL 1187 for (i <- 0 until MAX_VLMUL) 1188 for (j <- 0 to i) { 1189 val old_vd = if (j == 0) { 1190 dest + i.U 1191 } else (VECTOR_TMP_REG_LMUL + j).U 1192 val vd = if (j == i) { 1193 dest + i.U 1194 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1195 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1196 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1197 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1198 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1199 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1200 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1201 } 1202 } 1203 1204 is(UopSplitType.VEC_SLIDEDOWN) { 1205 // i to vector move 1206 csBundle(0).srcType(0) := SrcType.reg 1207 csBundle(0).srcType(1) := SrcType.imm 1208 csBundle(0).lsrc(1) := 0.U 1209 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1210 csBundle(0).fuType := FuType.i2v.U 1211 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 1212 csBundle(0).vecWen := true.B 1213 // LMUL 1214 for (i <- 0 until MAX_VLMUL) 1215 for (j <- (0 to i).reverse) { 1216 when(i.U < lmul) { 1217 val old_vd = if (j == 0) { 1218 dest + lmul - 1.U - i.U 1219 } else (VECTOR_TMP_REG_LMUL + j).U 1220 val vd = if (j == i) { 1221 dest + lmul - 1.U - i.U 1222 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1223 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1224 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1225 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1226 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1227 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1228 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1229 } 1230 } 1231 } 1232 1233 is(UopSplitType.VEC_M0X) { 1234 // LMUL 1235 for (i <- 0 until MAX_VLMUL) { 1236 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1237 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1238 csBundle(i).srcType(0) := srcType0 1239 csBundle(i).srcType(1) := SrcType.vp 1240 csBundle(i).rfWen := false.B 1241 csBundle(i).vecWen := true.B 1242 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1243 csBundle(i).lsrc(1) := src2 1244 // csBundle(i).lsrc(2) := dest + i.U DontCare 1245 csBundle(i).ldest := ldest 1246 csBundle(i).uopIdx := i.U 1247 } 1248 csBundle(lmul - 1.U).vecWen := false.B 1249 csBundle(lmul - 1.U).fpWen := true.B 1250 csBundle(lmul - 1.U).ldest := FP_TMP_REG_MV.U 1251 // FMV_X_D 1252 csBundle(lmul).srcType(0) := SrcType.fp 1253 csBundle(lmul).srcType(1) := SrcType.imm 1254 csBundle(lmul).lsrc(0) := FP_TMP_REG_MV.U 1255 csBundle(lmul).lsrc(1) := 0.U 1256 csBundle(lmul).ldest := dest 1257 csBundle(lmul).fuType := FuType.fmisc.U 1258 csBundle(lmul).rfWen := true.B 1259 csBundle(lmul).fpWen := false.B 1260 csBundle(lmul).vecWen := false.B 1261 csBundle(lmul).fpu.isAddSub := false.B 1262 csBundle(lmul).fpu.typeTagIn := FPU.D 1263 csBundle(lmul).fpu.typeTagOut := FPU.D 1264 csBundle(lmul).fpu.fromInt := false.B 1265 csBundle(lmul).fpu.wflags := false.B 1266 csBundle(lmul).fpu.fpWen := false.B 1267 csBundle(lmul).fpu.div := false.B 1268 csBundle(lmul).fpu.sqrt := false.B 1269 csBundle(lmul).fpu.fcvt := false.B 1270 } 1271 1272 is(UopSplitType.VEC_MVV) { 1273 // LMUL 1274 for (i <- 0 until MAX_VLMUL) { 1275 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1276 csBundle(i * 2 + 0).srcType(0) := srcType0 1277 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1278 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1279 csBundle(i * 2 + 0).lsrc(1) := src2 1280 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1281 csBundle(i * 2 + 0).ldest := dest + i.U 1282 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1283 1284 csBundle(i * 2 + 1).srcType(0) := srcType0 1285 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1286 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1287 csBundle(i * 2 + 1).lsrc(1) := src2 1288 // csBundle(i).lsrc(2) := dest + i.U DontCare 1289 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1290 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1291 } 1292 } 1293 1294 is(UopSplitType.VEC_M0X_VFIRST) { 1295 // LMUL 1296 csBundle(0).rfWen := false.B 1297 csBundle(0).fpWen := true.B 1298 csBundle(0).ldest := FP_TMP_REG_MV.U 1299 // FMV_X_D 1300 csBundle(1).srcType(0) := SrcType.fp 1301 csBundle(1).srcType(1) := SrcType.imm 1302 csBundle(1).lsrc(0) := FP_TMP_REG_MV.U 1303 csBundle(1).lsrc(1) := 0.U 1304 csBundle(1).ldest := dest 1305 csBundle(1).fuType := FuType.fmisc.U 1306 csBundle(1).rfWen := true.B 1307 csBundle(1).fpWen := false.B 1308 csBundle(1).vecWen := false.B 1309 csBundle(1).fpu.isAddSub := false.B 1310 csBundle(1).fpu.typeTagIn := FPU.D 1311 csBundle(1).fpu.typeTagOut := FPU.D 1312 csBundle(1).fpu.fromInt := false.B 1313 csBundle(1).fpu.wflags := false.B 1314 csBundle(1).fpu.fpWen := false.B 1315 csBundle(1).fpu.div := false.B 1316 csBundle(1).fpu.sqrt := false.B 1317 csBundle(1).fpu.fcvt := false.B 1318 } 1319 is(UopSplitType.VEC_VWW) { 1320 for (i <- 0 until MAX_VLMUL*2) { 1321 when(i.U < lmul){ 1322 csBundle(i).srcType(2) := SrcType.DC 1323 csBundle(i).lsrc(0) := src2 + i.U 1324 csBundle(i).lsrc(1) := src2 + i.U 1325 // csBundle(i).lsrc(2) := dest + (2 * i).U 1326 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1327 csBundle(i).uopIdx := i.U 1328 } otherwise { 1329 csBundle(i).srcType(2) := SrcType.DC 1330 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1331 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1332 // csBundle(i).lsrc(2) := dest + (2 * i).U 1333 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1334 csBundle(i).uopIdx := i.U 1335 } 1336 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1337 csBundle(numOfUop-1.U).lsrc(0) := src1 1338 csBundle(numOfUop-1.U).lsrc(2) := dest 1339 csBundle(numOfUop-1.U).ldest := dest 1340 } 1341 } 1342 is(UopSplitType.VEC_RGATHER) { 1343 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1344 for (i <- 0 until len) 1345 for (j <- 0 until len) { 1346 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1347 // csBundle(i * len + j).srcType(1) := SrcType.vp 1348 // csBundle(i * len + j).srcType(2) := SrcType.vp 1349 csBundle(i * len + j).lsrc(0) := src1 + i.U 1350 csBundle(i * len + j).lsrc(1) := src2 + j.U 1351 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1352 csBundle(i * len + j).lsrc(2) := vd_old 1353 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1354 csBundle(i * len + j).ldest := vd 1355 csBundle(i * len + j).uopIdx := (i * len + j).U 1356 } 1357 } 1358 switch(vlmulReg) { 1359 is("b001".U ){ 1360 genCsBundle_VEC_RGATHER(2) 1361 } 1362 is("b010".U ){ 1363 genCsBundle_VEC_RGATHER(4) 1364 } 1365 is("b011".U ){ 1366 genCsBundle_VEC_RGATHER(8) 1367 } 1368 } 1369 } 1370 is(UopSplitType.VEC_RGATHER_VX) { 1371 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1372 for (i <- 0 until len) 1373 for (j <- 0 until len) { 1374 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1375 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1376 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1377 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1378 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1379 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1380 csBundle(i * len + j + 1).lsrc(2) := vd_old 1381 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1382 csBundle(i * len + j + 1).ldest := vd 1383 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1384 } 1385 } 1386 // i to vector move 1387 csBundle(0).srcType(0) := SrcType.reg 1388 csBundle(0).srcType(1) := SrcType.imm 1389 csBundle(0).lsrc(1) := 0.U 1390 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1391 csBundle(0).fuType := FuType.i2v.U 1392 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 1393 csBundle(0).vecWen := true.B 1394 switch(vlmulReg) { 1395 is("b000".U ){ 1396 genCsBundle_RGATHER_VX(1) 1397 } 1398 is("b001".U ){ 1399 genCsBundle_RGATHER_VX(2) 1400 } 1401 is("b010".U ){ 1402 genCsBundle_RGATHER_VX(4) 1403 } 1404 is("b011".U ){ 1405 genCsBundle_RGATHER_VX(8) 1406 } 1407 } 1408 } 1409 is(UopSplitType.VEC_RGATHEREI16) { 1410 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1411 for (i <- 0 until len) 1412 for (j <- 0 until len) { 1413 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1414 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1415 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1416 // csBundle(i * len + j).srcType(1) := SrcType.vp 1417 // csBundle(i * len + j).srcType(2) := SrcType.vp 1418 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1419 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1420 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1421 csBundle((i * len + j)*2+0).ldest := vd0 1422 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1423 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1424 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1425 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1426 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1427 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1428 csBundle((i * len + j)*2+1).ldest := vd1 1429 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1430 } 1431 } 1432 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1433 for (i <- 0 until len) 1434 for (j <- 0 until len) { 1435 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1436 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1437 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1438 // csBundle(i * len + j).srcType(1) := SrcType.vp 1439 // csBundle(i * len + j).srcType(2) := SrcType.vp 1440 csBundle(i * len + j).lsrc(0) := src1 + i.U 1441 csBundle(i * len + j).lsrc(1) := src2 + j.U 1442 csBundle(i * len + j).lsrc(2) := vd_old 1443 csBundle(i * len + j).ldest := vd 1444 csBundle(i * len + j).uopIdx := (i * len + j).U 1445 } 1446 } 1447 switch(vlmulReg) { 1448 is("b000".U ){ 1449 when(!vsewReg.orR){ 1450 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1451 } .otherwise{ 1452 genCsBundle_VEC_RGATHEREI16(1) 1453 } 1454 } 1455 is("b001".U) { 1456 when(!vsewReg.orR) { 1457 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1458 }.otherwise { 1459 genCsBundle_VEC_RGATHEREI16(2) 1460 } 1461 } 1462 is("b010".U) { 1463 when(!vsewReg.orR) { 1464 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1465 }.otherwise { 1466 genCsBundle_VEC_RGATHEREI16(4) 1467 } 1468 } 1469 is("b011".U) { 1470 genCsBundle_VEC_RGATHEREI16(8) 1471 } 1472 } 1473 } 1474 is(UopSplitType.VEC_COMPRESS) { 1475 def genCsBundle_VEC_COMPRESS(len:Int): Unit ={ 1476 for (i <- 0 until len){ 1477 val jlen = if (i == len-1) i+1 else i+2 1478 for (j <- 0 until jlen) { 1479 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1480 val vd = if(i==len-1) (dest + j.U) else{ 1481 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1482 } 1483 val src23Type = if (j == i+1) DontCare else SrcType.vp 1484 csBundle(i*(i+3)/2 + j).srcType(0) := SrcType.vp 1485 csBundle(i*(i+3)/2 + j).srcType(1) := src23Type 1486 csBundle(i*(i+3)/2 + j).srcType(2) := src23Type 1487 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1488 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1489 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1490 // csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U 1491 csBundle(i*(i+3)/2 + j).ldest := vd 1492 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1493 } 1494 } 1495 } 1496 switch(vlmulReg) { 1497 is("b001".U ){ 1498 genCsBundle_VEC_COMPRESS(2) 1499 } 1500 is("b010".U ){ 1501 genCsBundle_VEC_COMPRESS(4) 1502 } 1503 is("b011".U ){ 1504 genCsBundle_VEC_COMPRESS(8) 1505 } 1506 } 1507 } 1508 is(UopSplitType.VEC_MVNR) { 1509 for (i <- 0 until MAX_VLMUL) { 1510 csBundle(i).lsrc(0) := src1 + i.U 1511 csBundle(i).lsrc(1) := src2 + i.U 1512 csBundle(i).lsrc(2) := dest + i.U 1513 csBundle(i).ldest := dest + i.U 1514 csBundle(i).uopIdx := i.U 1515 } 1516 } 1517 is(UopSplitType.VEC_US_LDST) { 1518 /* 1519 FMV.D.X 1520 */ 1521 csBundle(0).srcType(0) := SrcType.reg 1522 csBundle(0).srcType(1) := SrcType.imm 1523 csBundle(0).lsrc(1) := 0.U 1524 csBundle(0).ldest := FP_TMP_REG_MV.U 1525 csBundle(0).fuType := FuType.i2f.U 1526 csBundle(0).rfWen := false.B 1527 csBundle(0).fpWen := true.B 1528 csBundle(0).vecWen := false.B 1529 csBundle(0).fpu.isAddSub := false.B 1530 csBundle(0).fpu.typeTagIn := FPU.D 1531 csBundle(0).fpu.typeTagOut := FPU.D 1532 csBundle(0).fpu.fromInt := true.B 1533 csBundle(0).fpu.wflags := false.B 1534 csBundle(0).fpu.fpWen := true.B 1535 csBundle(0).fpu.div := false.B 1536 csBundle(0).fpu.sqrt := false.B 1537 csBundle(0).fpu.fcvt := false.B 1538 //LMUL 1539 for (i <- 0 until MAX_VLMUL) { 1540 csBundle(i + 1).srcType(0) := SrcType.fp 1541 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1542 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1543 csBundle(i + 1).ldest := dest + i.U 1544 csBundle(i + 1).uopIdx := i.U 1545 } 1546 } 1547 is(UopSplitType.VEC_S_LDST) { 1548 /* 1549 FMV.D.X 1550 */ 1551 csBundle(0).srcType(0) := SrcType.reg 1552 csBundle(0).srcType(1) := SrcType.imm 1553 csBundle(0).lsrc(1) := 0.U 1554 csBundle(0).ldest := FP_TMP_REG_MV.U 1555 csBundle(0).fuType := FuType.i2f.U 1556 csBundle(0).rfWen := false.B 1557 csBundle(0).fpWen := true.B 1558 csBundle(0).vecWen := false.B 1559 csBundle(0).fpu.isAddSub := false.B 1560 csBundle(0).fpu.typeTagIn := FPU.D 1561 csBundle(0).fpu.typeTagOut := FPU.D 1562 csBundle(0).fpu.fromInt := true.B 1563 csBundle(0).fpu.wflags := false.B 1564 csBundle(0).fpu.fpWen := true.B 1565 csBundle(0).fpu.div := false.B 1566 csBundle(0).fpu.sqrt := false.B 1567 csBundle(0).fpu.fcvt := false.B 1568 1569 csBundle(1).srcType(0) := SrcType.reg 1570 csBundle(1).srcType(1) := SrcType.imm 1571 csBundle(1).lsrc(0) := decodedInstsSimple.lsrc(1) 1572 csBundle(1).lsrc(1) := 0.U 1573 csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U 1574 csBundle(1).fuType := FuType.i2f.U 1575 csBundle(1).rfWen := false.B 1576 csBundle(1).fpWen := true.B 1577 csBundle(1).vecWen := false.B 1578 csBundle(1).fpu.isAddSub := false.B 1579 csBundle(1).fpu.typeTagIn := FPU.D 1580 csBundle(1).fpu.typeTagOut := FPU.D 1581 csBundle(1).fpu.fromInt := true.B 1582 csBundle(1).fpu.wflags := false.B 1583 csBundle(1).fpu.fpWen := true.B 1584 csBundle(1).fpu.div := false.B 1585 csBundle(1).fpu.sqrt := false.B 1586 csBundle(1).fpu.fcvt := false.B 1587 1588 //LMUL 1589 for (i <- 0 until MAX_VLMUL) { 1590 csBundle(i + 2).srcType(0) := SrcType.fp 1591 csBundle(i + 2).srcType(1) := SrcType.fp 1592 csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U 1593 csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 1594 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1595 csBundle(i + 2).ldest := dest + i.U 1596 csBundle(i + 2).uopIdx := i.U 1597 } 1598 } 1599 is(UopSplitType.VEC_I_LDST) { 1600 /* 1601 FMV.D.X 1602 */ 1603 val vlmul = vlmulReg 1604 val vsew = Cat(0.U(1.W), vsewReg) 1605 val veew = Cat(0.U(1.W), width) 1606 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1607 val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array( 1608 "b001".U -> 1.U, 1609 "b010".U -> 2.U, 1610 "b011".U -> 3.U 1611 )) 1612 val simple_emul = MuxLookup(vemul, 0.U(2.W), Array( 1613 "b001".U -> 1.U, 1614 "b010".U -> 2.U, 1615 "b011".U -> 3.U 1616 )) 1617 csBundle(0).srcType(0) := SrcType.reg 1618 csBundle(0).srcType(1) := SrcType.imm 1619 csBundle(0).lsrc(1) := 0.U 1620 csBundle(0).ldest := FP_TMP_REG_MV.U 1621 csBundle(0).fuType := FuType.i2f.U 1622 csBundle(0).rfWen := false.B 1623 csBundle(0).fpWen := true.B 1624 csBundle(0).vecWen := false.B 1625 csBundle(0).fpu.isAddSub := false.B 1626 csBundle(0).fpu.typeTagIn := FPU.D 1627 csBundle(0).fpu.typeTagOut := FPU.D 1628 csBundle(0).fpu.fromInt := true.B 1629 csBundle(0).fpu.wflags := false.B 1630 csBundle(0).fpu.fpWen := true.B 1631 csBundle(0).fpu.div := false.B 1632 csBundle(0).fpu.sqrt := false.B 1633 csBundle(0).fpu.fcvt := false.B 1634 1635 //LMUL 1636 for (i <- 0 until MAX_INDEXED_LS_UOPNUM) { 1637 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf) 1638 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1639 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1640 val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd 1641 csBundle(i + 1).srcType(0) := SrcType.fp 1642 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1643 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1644 /** 1645 * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and 1646 * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same 1647 * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be 1648 * deadlock for indexed instructions with emul > lmul. 1649 * 1650 * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest 1651 * N-1 uops will read temporary vector register. 1652 */ 1653 // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1654 csBundle(i + 1).lsrc(2) := Mux( 1655 isFirstUopInVd, 1656 Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)), 1657 VECTOR_TMP_REG_LMUL.U 1658 ) 1659 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1660 csBundle(i + 1).uopIdx := i.U 1661 } 1662 } 1663 } 1664 1665 //uops dispatch 1666 val s_normal :: s_ext :: Nil = Enum(2) 1667 val state = RegInit(s_normal) 1668 val state_next = WireDefault(state) 1669 val uopRes = RegInit(0.U) 1670 1671 //readyFromRename Counter 1672 val readyCounter = PriorityMuxDefault(io.readyFromRename.map(x => !x).zip((0 to (RenameWidth - 1)).map(_.U)), RenameWidth.U) 1673 1674 switch(state) { 1675 is(s_normal) { 1676 state_next := Mux(io.validFromIBuf(0) && (numOfUop > readyCounter) && (readyCounter =/= 0.U), s_ext, s_normal) 1677 } 1678 is(s_ext) { 1679 state_next := Mux(io.validFromIBuf(0) && (uopRes > readyCounter), s_ext, s_normal) 1680 } 1681 } 1682 1683 state := state_next 1684 1685 val uopRes0 = Mux(state === s_normal, numOfUop, uopRes) 1686 val uopResJudge = Mux(state === s_normal, 1687 io.validFromIBuf(0) && (readyCounter =/= 0.U) && (uopRes0 > readyCounter), 1688 io.validFromIBuf(0) && (uopRes0 > readyCounter)) 1689 uopRes := Mux(uopResJudge, uopRes0 - readyCounter, 0.U) 1690 1691 for(i <- 0 until RenameWidth) { 1692 decodedInsts(i) := MuxCase(csBundle(i), Seq( 1693 (state === s_normal) -> csBundle(i), 1694 (state === s_ext) -> Mux((i.U + numOfUop -uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1)) 1695 ).toSeq) 1696 } 1697 1698 val validSimple = Wire(Vec(DecodeWidth, Bool())) 1699 validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1700 val notInf = Wire(Vec(DecodeWidth, Bool())) 1701 notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1702 notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1703 val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1704 notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1705 1706 complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1707 Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1708 0.U) 1709 validToRename.zipWithIndex.foreach{ 1710 case(dst, i) => 1711 val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1712 dst := MuxCase(false.B, Seq( 1713 (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1714 (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1715 ).toSeq) 1716 } 1717 1718 readyToIBuf.zipWithIndex.foreach { 1719 case (dst, i) => 1720 val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1721 dst := MuxCase(true.B, Seq( 1722 (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1723 (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1724 ).toSeq) 1725 } 1726 1727 io.deq.decodedInsts := decodedInsts 1728 io.deq.isVset := isVsetSimple 1729 io.deq.complexNum := complexNum 1730 io.deq.validToRename := validToRename 1731 io.deq.readyToIBuf := readyToIBuf 1732 1733} 1734