1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul} 34import yunsuan.VpermType 35import scala.collection.Seq 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(7.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 val outIsFirstUopInVd = IO(Output(Bool())) 43 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={ 44 if (lmul * nfields <= 8) { 45 for (k <-0 until nfields) { 46 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 47 var offset = 1 << (emul - lmul) 48 for (i <- 0 until (1 << emul)) { 49 if (uopIdx == k * (1 << emul) + i) { 50 return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0) 51 } 52 } 53 } else { // lmul > emul, uop num is depend on lmul * nf 54 var offset = 1 << (lmul - emul) 55 for (i <- 0 until (1 << lmul)) { 56 if (uopIdx == k * (1 << lmul) + i) { 57 return (i / offset, i + k * (1 << lmul), 1) 58 } 59 } 60 } 61 } 62 } 63 return (0, 0, 1) 64 } 65 // strided load/store 66 var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq() 67 for (emul <- 0 until 4) { 68 for (lmul <- 0 until 4) { 69 for (nf <- 0 until 8) { 70 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx) 71 var offsetVs2 = offset._1 72 var offsetVd = offset._2 73 var isFirstUopInVd = offset._3 74 combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) 75 } 76 } 77 } 78 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 79 case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) => 80 (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W))) 81 }, BitPat.N(7))) 82 outOffsetVs2 := out(5, 3) 83 outOffsetVd := out(2, 0) 84 outIsFirstUopInVd := out(6).asBool 85} 86 87trait VectorConstants { 88 val MAX_VLMUL = 8 89 val FP_TMP_REG_MV = 32 90 val VECTOR_TMP_REG_LMUL = 33 // 33~47 -> 15 91 val MAX_INDEXED_LS_UOPNUM = 64 92} 93 94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 95 val simple = new Bundle { 96 val decodedInst = Input(new DecodedInst) 97 val isComplex = Input(Bool()) 98 val uopInfo = Input(new UopInfo) 99 } 100 val vtype = Input(new VType) 101 val in0pc = Input(UInt(VAddrBits.W)) 102 val isComplex = Input(Vec(DecodeWidth, Bool())) 103 val validFromIBuf = Input(Vec(DecodeWidth, Bool())) 104 val readyFromRename = Input(Vec(RenameWidth, Bool())) 105 val deq = new Bundle { 106 val decodedInsts = Output(Vec(RenameWidth, new DecodedInst)) 107 val isVset = Output(Bool()) 108 val readyToIBuf = Output(Vec(DecodeWidth, Bool())) 109 val validToRename = Output(Vec(RenameWidth, Bool())) 110 val complexNum = Output(UInt(3.W)) 111 } 112 val csrCtrl = Input(new CustomCSRCtrlIO) 113} 114 115/** 116 * @author zly 117 */ 118class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 119 val io = IO(new DecodeUnitCompIO) 120 121 val maxUopSize = MaxUopSize 122 //input bits 123 private val inst: XSInstBitFields = io.simple.decodedInst.instr.asTypeOf(new XSInstBitFields) 124 125 val src1 = Cat(0.U(1.W), inst.RS1) 126 val src2 = Cat(0.U(1.W), inst.RS2) 127 val dest = Cat(0.U(1.W), inst.RD) 128 129 val nf = inst.NF 130 val width = inst.WIDTH(1, 0) 131 132 //output bits 133 val decodedInsts = Wire(Vec(RenameWidth, new DecodedInst)) 134 val validToRename = Wire(Vec(RenameWidth, Bool())) 135 val readyToIBuf = Wire(Vec(DecodeWidth, Bool())) 136 val complexNum = Wire(UInt(3.W)) 137 138 //output of DecodeUnit 139 val decodedInstsSimple = Wire(new DecodedInst) 140 val numOfUop = Wire(UInt(log2Up(maxUopSize+1).W)) 141 val numOfWB = Wire(UInt(log2Up(maxUopSize+1).W)) 142 val lmul = Wire(UInt(4.W)) 143 val isVsetSimple = Wire(Bool()) 144 145 val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i))) 146 indexedLSRegOffset.map(_.src := 0.U) 147 148 //pre decode 149 decodedInstsSimple := io.simple.decodedInst 150 lmul := io.simple.uopInfo.lmul 151 isVsetSimple := io.simple.decodedInst.isVset 152 val vlmulReg = io.simple.decodedInst.vpu.vlmul 153 val vsewReg = io.simple.decodedInst.vpu.vsew 154 when(isVsetSimple) { 155 when(dest === 0.U && src1 === 0.U) { 156 decodedInstsSimple.fuOpType := VSETOpType.keepVl(io.simple.decodedInst.fuOpType) 157 }.elsewhen(src1 === 0.U) { 158 decodedInstsSimple.fuOpType := VSETOpType.setVlmax(io.simple.decodedInst.fuOpType) 159 } 160 when(io.vtype.illegal){ 161 decodedInstsSimple.flushPipe := true.B 162 } 163 } 164 //Type of uop Div 165 val typeOfSplit = decodedInstsSimple.uopSplitType 166 val src1Type = decodedInstsSimple.srcType(0) 167 val src1IsImm = src1Type === SrcType.imm 168 169 when(typeOfSplit === UopSplitType.DIR) { 170 numOfUop := Mux(dest =/= 0.U, 2.U, 171 Mux(src1 =/= 0.U, 1.U, 172 Mux(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType), 2.U, 1.U))) 173 numOfWB := Mux(dest =/= 0.U, 2.U, 174 Mux(src1 =/= 0.U, 1.U, 175 Mux(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType), 2.U, 1.U))) 176 } .otherwise { 177 numOfUop := io.simple.uopInfo.numOfUop 178 numOfWB := io.simple.uopInfo.numOfWB 179 } 180 181 //uop div up to maxUopSize 182 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 183 csBundle.map { case dst => 184 dst := decodedInstsSimple 185 dst.firstUop := false.B 186 dst.lastUop := false.B 187 } 188 189 csBundle(0).numUops := numOfUop 190 csBundle(0).numWB := numOfWB 191 csBundle(0).firstUop := true.B 192 csBundle(numOfUop - 1.U).lastUop := true.B 193 194 switch(typeOfSplit) { 195 is(UopSplitType.DIR) { 196 when(isVsetSimple) { 197 when(dest =/= 0.U) { 198 csBundle(0).fuType := FuType.vsetiwi.U 199 csBundle(0).fuOpType := VSETOpType.switchDest(decodedInstsSimple.fuOpType) 200 csBundle(0).flushPipe := false.B 201 csBundle(0).rfWen := true.B 202 csBundle(0).vecWen := false.B 203 csBundle(1).ldest := VCONFIG_IDX.U 204 csBundle(1).rfWen := false.B 205 csBundle(1).vecWen := true.B 206 }.elsewhen(src1 =/= 0.U) { 207 csBundle(0).ldest := VCONFIG_IDX.U 208 }.elsewhen(VSETOpType.isVsetvli(decodedInstsSimple.fuOpType)) { 209 csBundle(0).fuType := FuType.vsetfwf.U 210 csBundle(0).srcType(0) := SrcType.vp 211 csBundle(0).lsrc(0) := VCONFIG_IDX.U 212 }.elsewhen(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType)) { 213 csBundle(0).srcType(0) := SrcType.reg 214 csBundle(0).srcType(1) := SrcType.imm 215 csBundle(0).lsrc(1) := 0.U 216 csBundle(0).ldest := FP_TMP_REG_MV.U 217 csBundle(0).fuType := FuType.i2f.U 218 csBundle(0).rfWen := false.B 219 csBundle(0).fpWen := true.B 220 csBundle(0).vecWen := false.B 221 csBundle(0).fpu.isAddSub := false.B 222 csBundle(0).fpu.typeTagIn := FPU.D 223 csBundle(0).fpu.typeTagOut := FPU.D 224 csBundle(0).fpu.fromInt := true.B 225 csBundle(0).fpu.wflags := false.B 226 csBundle(0).fpu.fpWen := true.B 227 csBundle(0).fpu.div := false.B 228 csBundle(0).fpu.sqrt := false.B 229 csBundle(0).fpu.fcvt := false.B 230 csBundle(0).flushPipe := false.B 231 csBundle(1).fuType := FuType.vsetfwf.U 232 csBundle(1).srcType(0) := SrcType.vp 233 csBundle(1).lsrc(0) := VCONFIG_IDX.U 234 csBundle(1).srcType(1) := SrcType.fp 235 csBundle(1).lsrc(1) := FP_TMP_REG_MV.U 236 csBundle(1).ldest := VCONFIG_IDX.U 237 } 238 } 239 } 240 is(UopSplitType.VEC_VVV) { 241 for (i <- 0 until MAX_VLMUL) { 242 csBundle(i).lsrc(0) := src1 + i.U 243 csBundle(i).lsrc(1) := src2 + i.U 244 csBundle(i).lsrc(2) := dest + i.U 245 csBundle(i).ldest := dest + i.U 246 csBundle(i).uopIdx := i.U 247 } 248 } 249 is(UopSplitType.VEC_VFV) { 250 for (i <- 0 until MAX_VLMUL) { 251 csBundle(i).lsrc(1) := src2 + i.U 252 csBundle(i).lsrc(2) := dest + i.U 253 csBundle(i).ldest := dest + i.U 254 csBundle(i).uopIdx := i.U 255 } 256 } 257 is(UopSplitType.VEC_EXT2) { 258 for (i <- 0 until MAX_VLMUL / 2) { 259 csBundle(2 * i).lsrc(1) := src2 + i.U 260 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 261 csBundle(2 * i).ldest := dest + (2 * i).U 262 csBundle(2 * i).uopIdx := (2 * i).U 263 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 264 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 265 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 266 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 267 } 268 } 269 is(UopSplitType.VEC_EXT4) { 270 for (i <- 0 until MAX_VLMUL / 4) { 271 csBundle(4 * i).lsrc(1) := src2 + i.U 272 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 273 csBundle(4 * i).ldest := dest + (4 * i).U 274 csBundle(4 * i).uopIdx := (4 * i).U 275 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 276 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 277 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 278 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 279 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 280 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 281 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 282 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 283 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 284 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 285 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 286 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 287 } 288 } 289 is(UopSplitType.VEC_EXT8) { 290 for (i <- 0 until MAX_VLMUL) { 291 csBundle(i).lsrc(1) := src2 292 csBundle(i).lsrc(2) := dest + i.U 293 csBundle(i).ldest := dest + i.U 294 csBundle(i).uopIdx := i.U 295 } 296 } 297 is(UopSplitType.VEC_0XV) { 298 /* 299 FMV.D.X 300 */ 301 csBundle(0).srcType(0) := SrcType.reg 302 csBundle(0).srcType(1) := SrcType.imm 303 csBundle(0).lsrc(1) := 0.U 304 csBundle(0).ldest := FP_TMP_REG_MV.U 305 csBundle(0).fuType := FuType.i2f.U 306 csBundle(0).rfWen := false.B 307 csBundle(0).fpWen := true.B 308 csBundle(0).vecWen := false.B 309 csBundle(0).fpu.isAddSub := false.B 310 csBundle(0).fpu.typeTagIn := FPU.D 311 csBundle(0).fpu.typeTagOut := FPU.D 312 csBundle(0).fpu.fromInt := true.B 313 csBundle(0).fpu.wflags := false.B 314 csBundle(0).fpu.fpWen := true.B 315 csBundle(0).fpu.div := false.B 316 csBundle(0).fpu.sqrt := false.B 317 csBundle(0).fpu.fcvt := false.B 318 /* 319 vfmv.s.f 320 */ 321 csBundle(1).srcType(0) := SrcType.fp 322 csBundle(1).srcType(1) := SrcType.vp 323 csBundle(1).srcType(2) := SrcType.vp 324 csBundle(1).lsrc(0) := FP_TMP_REG_MV.U 325 csBundle(1).lsrc(1) := 0.U 326 csBundle(1).lsrc(2) := dest 327 csBundle(1).ldest := dest 328 csBundle(1).fuType := FuType.vppu.U 329 csBundle(1).fuOpType := VpermType.dummy 330 csBundle(1).rfWen := false.B 331 csBundle(1).fpWen := false.B 332 csBundle(1).vecWen := true.B 333 } 334 is(UopSplitType.VEC_VXV) { 335 /* 336 i to vector move 337 */ 338 csBundle(0).srcType(0) := SrcType.reg 339 csBundle(0).srcType(1) := SrcType.imm 340 csBundle(0).lsrc(1) := 0.U 341 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 342 csBundle(0).fuType := FuType.i2v.U 343 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 344 csBundle(0).vecWen := true.B 345 /* 346 LMUL 347 */ 348 for (i <- 0 until MAX_VLMUL) { 349 csBundle(i + 1).srcType(0) := SrcType.vp 350 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 351 csBundle(i + 1).lsrc(1) := src2 + i.U 352 csBundle(i + 1).lsrc(2) := dest + i.U 353 csBundle(i + 1).ldest := dest + i.U 354 csBundle(i + 1).uopIdx := i.U 355 } 356 } 357 is(UopSplitType.VEC_VVW) { 358 for (i <- 0 until MAX_VLMUL / 2) { 359 csBundle(2 * i).lsrc(0) := src1 + i.U 360 csBundle(2 * i).lsrc(1) := src2 + i.U 361 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 362 csBundle(2 * i).ldest := dest + (2 * i).U 363 csBundle(2 * i).uopIdx := (2 * i).U 364 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 365 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 366 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 367 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 368 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 369 } 370 } 371 is(UopSplitType.VEC_VFW) { 372 for (i <- 0 until MAX_VLMUL / 2) { 373 csBundle(2 * i).lsrc(0) := src1 374 csBundle(2 * i).lsrc(1) := src2 + i.U 375 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 376 csBundle(2 * i).ldest := dest + (2 * i).U 377 csBundle(2 * i).uopIdx := (2 * i).U 378 csBundle(2 * i + 1).lsrc(0) := src1 379 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 380 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 381 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 382 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 383 } 384 } 385 is(UopSplitType.VEC_WVW) { 386 for (i <- 0 until MAX_VLMUL / 2) { 387 csBundle(2 * i).lsrc(0) := src1 + i.U 388 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 389 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 390 csBundle(2 * i).ldest := dest + (2 * i).U 391 csBundle(2 * i).uopIdx := (2 * i).U 392 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 393 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 394 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 395 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 396 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 397 } 398 } 399 is(UopSplitType.VEC_VXW) { 400 /* 401 i to vector move 402 */ 403 csBundle(0).srcType(0) := SrcType.reg 404 csBundle(0).srcType(1) := SrcType.imm 405 csBundle(0).lsrc(1) := 0.U 406 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 407 csBundle(0).fuType := FuType.i2v.U 408 csBundle(0).fuOpType := vsewReg 409 csBundle(0).vecWen := true.B 410 411 for (i <- 0 until MAX_VLMUL / 2) { 412 csBundle(2 * i + 1).srcType(0) := SrcType.vp 413 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 414 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 415 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 416 csBundle(2 * i + 1).ldest := dest + (2 * i).U 417 csBundle(2 * i + 1).uopIdx := (2 * i).U 418 csBundle(2 * i + 2).srcType(0) := SrcType.vp 419 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 420 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 421 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 422 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 423 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 424 } 425 } 426 is(UopSplitType.VEC_WXW) { 427 /* 428 i to vector move 429 */ 430 csBundle(0).srcType(0) := SrcType.reg 431 csBundle(0).srcType(1) := SrcType.imm 432 csBundle(0).lsrc(1) := 0.U 433 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 434 csBundle(0).fuType := FuType.i2v.U 435 csBundle(0).fuOpType := vsewReg 436 csBundle(0).vecWen := true.B 437 438 for (i <- 0 until MAX_VLMUL / 2) { 439 csBundle(2 * i + 1).srcType(0) := SrcType.vp 440 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 441 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 442 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 443 csBundle(2 * i + 1).ldest := dest + (2 * i).U 444 csBundle(2 * i + 1).uopIdx := (2 * i).U 445 csBundle(2 * i + 2).srcType(0) := SrcType.vp 446 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 447 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 448 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 449 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 450 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 451 } 452 } 453 is(UopSplitType.VEC_WVV) { 454 for (i <- 0 until MAX_VLMUL / 2) { 455 456 csBundle(2 * i).lsrc(0) := src1 + i.U 457 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 458 csBundle(2 * i).lsrc(2) := dest + i.U 459 csBundle(2 * i).ldest := dest + i.U 460 csBundle(2 * i).uopIdx := (2 * i).U 461 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 462 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 463 csBundle(2 * i + 1).lsrc(2) := dest + i.U 464 csBundle(2 * i + 1).ldest := dest + i.U 465 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 466 } 467 } 468 is(UopSplitType.VEC_WFW) { 469 for (i <- 0 until MAX_VLMUL / 2) { 470 csBundle(2 * i).lsrc(0) := src1 471 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 472 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 473 csBundle(2 * i).ldest := dest + (2 * i).U 474 csBundle(2 * i).uopIdx := (2 * i).U 475 csBundle(2 * i + 1).lsrc(0) := src1 476 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 477 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 478 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 479 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 480 } 481 } 482 is(UopSplitType.VEC_WXV) { 483 /* 484 i to vector move 485 */ 486 csBundle(0).srcType(0) := SrcType.reg 487 csBundle(0).srcType(1) := SrcType.imm 488 csBundle(0).lsrc(1) := 0.U 489 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 490 csBundle(0).fuType := FuType.i2v.U 491 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 492 csBundle(0).vecWen := true.B 493 494 for (i <- 0 until MAX_VLMUL / 2) { 495 csBundle(2 * i + 1).srcType(0) := SrcType.vp 496 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 497 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 498 csBundle(2 * i + 1).lsrc(2) := dest + i.U 499 csBundle(2 * i + 1).ldest := dest + i.U 500 csBundle(2 * i + 1).uopIdx := (2 * i).U 501 csBundle(2 * i + 2).srcType(0) := SrcType.vp 502 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 503 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 504 csBundle(2 * i + 2).lsrc(2) := dest + i.U 505 csBundle(2 * i + 2).ldest := dest + i.U 506 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 507 } 508 } 509 is(UopSplitType.VEC_VVM) { 510 csBundle(0).lsrc(2) := dest 511 csBundle(0).ldest := dest 512 csBundle(0).uopIdx := 0.U 513 for (i <- 1 until MAX_VLMUL) { 514 csBundle(i).lsrc(0) := src1 + i.U 515 csBundle(i).lsrc(1) := src2 + i.U 516 csBundle(i).lsrc(2) := dest 517 csBundle(i).ldest := dest 518 csBundle(i).uopIdx := i.U 519 } 520 } 521 is(UopSplitType.VEC_VFM) { 522 csBundle(0).lsrc(2) := dest 523 csBundle(0).ldest := dest 524 csBundle(0).uopIdx := 0.U 525 for (i <- 1 until MAX_VLMUL) { 526 csBundle(i).lsrc(0) := src1 527 csBundle(i).lsrc(1) := src2 + i.U 528 csBundle(i).lsrc(2) := dest 529 csBundle(i).ldest := dest 530 csBundle(i).uopIdx := i.U 531 } 532 csBundle(numOfUop - 1.U).ldest := dest 533 } 534 is(UopSplitType.VEC_VXM) { 535 /* 536 i to vector move 537 */ 538 csBundle(0).srcType(0) := SrcType.reg 539 csBundle(0).srcType(1) := SrcType.imm 540 csBundle(0).lsrc(1) := 0.U 541 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 542 csBundle(0).fuType := FuType.i2v.U 543 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 544 csBundle(0).vecWen := true.B 545 //LMUL 546 csBundle(1).srcType(0) := SrcType.vp 547 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 548 csBundle(1).lsrc(2) := dest 549 csBundle(1).ldest := dest 550 csBundle(1).uopIdx := 0.U 551 for (i <- 1 until MAX_VLMUL) { 552 csBundle(i + 1).srcType(0) := SrcType.vp 553 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 554 csBundle(i + 1).lsrc(1) := src2 + i.U 555 csBundle(i + 1).lsrc(2) := dest 556 csBundle(i + 1).ldest := dest 557 csBundle(i + 1).uopIdx := i.U 558 } 559 csBundle(numOfUop - 1.U).ldest := dest 560 } 561 is(UopSplitType.VEC_SLIDE1UP) { 562 /* 563 i to vector move 564 */ 565 csBundle(0).srcType(0) := SrcType.reg 566 csBundle(0).srcType(1) := SrcType.imm 567 csBundle(0).lsrc(1) := 0.U 568 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 569 csBundle(0).fuType := FuType.i2v.U 570 csBundle(0).fuOpType := vsewReg 571 csBundle(0).vecWen := true.B 572 //LMUL 573 csBundle(1).srcType(0) := SrcType.vp 574 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 575 csBundle(1).lsrc(2) := dest 576 csBundle(1).ldest := dest 577 csBundle(1).uopIdx := 0.U 578 for (i <- 1 until MAX_VLMUL) { 579 csBundle(i + 1).srcType(0) := SrcType.vp 580 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 581 csBundle(i + 1).lsrc(1) := src2 + i.U 582 csBundle(i + 1).lsrc(2) := dest + i.U 583 csBundle(i + 1).ldest := dest + i.U 584 csBundle(i + 1).uopIdx := i.U 585 } 586 } 587 is(UopSplitType.VEC_FSLIDE1UP) { 588 //LMUL 589 csBundle(0).srcType(0) := SrcType.fp 590 csBundle(0).lsrc(0) := src1 591 csBundle(0).lsrc(1) := src2 592 csBundle(0).lsrc(2) := dest 593 csBundle(0).ldest := dest 594 csBundle(0).uopIdx := 0.U 595 for (i <- 1 until MAX_VLMUL) { 596 csBundle(i).srcType(0) := SrcType.vp 597 csBundle(i).lsrc(0) := src2 + (i - 1).U 598 csBundle(i).lsrc(1) := src2 + i.U 599 csBundle(i).lsrc(2) := dest + i.U 600 csBundle(i).ldest := dest + i.U 601 csBundle(i).uopIdx := i.U 602 } 603 } 604 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 605 /* 606 i to vector move 607 */ 608 csBundle(0).srcType(0) := SrcType.reg 609 csBundle(0).srcType(1) := SrcType.imm 610 csBundle(0).lsrc(1) := 0.U 611 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 612 csBundle(0).fuType := FuType.i2v.U 613 csBundle(0).fuOpType := vsewReg 614 csBundle(0).vecWen := true.B 615 //LMUL 616 for (i <- 0 until MAX_VLMUL) { 617 csBundle(2 * i + 1).srcType(0) := SrcType.vp 618 csBundle(2 * i + 1).srcType(1) := SrcType.vp 619 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 620 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 621 csBundle(2 * i + 1).lsrc(2) := dest + i.U 622 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 623 csBundle(2 * i + 1).uopIdx := (2 * i).U 624 if (2 * i + 2 < MAX_VLMUL * 2) { 625 csBundle(2 * i + 2).srcType(0) := SrcType.vp 626 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 627 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 628 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 629 csBundle(2 * i + 2).ldest := dest + i.U 630 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 631 } 632 } 633 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 634 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 635 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 636 } 637 is(UopSplitType.VEC_FSLIDE1DOWN) { 638 //LMUL 639 for (i <- 0 until MAX_VLMUL) { 640 csBundle(2 * i).srcType(0) := SrcType.vp 641 csBundle(2 * i).srcType(1) := SrcType.vp 642 csBundle(2 * i).lsrc(0) := src2 + (i + 1).U 643 csBundle(2 * i).lsrc(1) := src2 + i.U 644 csBundle(2 * i).lsrc(2) := dest + i.U 645 csBundle(2 * i).ldest := VECTOR_TMP_REG_LMUL.U 646 csBundle(2 * i).uopIdx := (2 * i).U 647 csBundle(2 * i + 1).srcType(0) := SrcType.fp 648 csBundle(2 * i + 1).lsrc(0) := src1 649 csBundle(2 * i + 1).lsrc(2) := VECTOR_TMP_REG_LMUL.U 650 csBundle(2 * i + 1).ldest := dest + i.U 651 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 652 } 653 csBundle(numOfUop - 1.U).srcType(0) := SrcType.fp 654 csBundle(numOfUop - 1.U).lsrc(0) := src1 655 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 656 } 657 is(UopSplitType.VEC_VRED) { 658 when(vlmulReg === "b001".U) { 659 csBundle(0).srcType(2) := SrcType.DC 660 csBundle(0).lsrc(0) := src2 + 1.U 661 csBundle(0).lsrc(1) := src2 662 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 663 csBundle(0).uopIdx := 0.U 664 } 665 when(vlmulReg === "b010".U) { 666 csBundle(0).srcType(2) := SrcType.DC 667 csBundle(0).lsrc(0) := src2 + 1.U 668 csBundle(0).lsrc(1) := src2 669 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 670 csBundle(0).uopIdx := 0.U 671 672 csBundle(1).srcType(2) := SrcType.DC 673 csBundle(1).lsrc(0) := src2 + 3.U 674 csBundle(1).lsrc(1) := src2 + 2.U 675 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 676 csBundle(1).uopIdx := 1.U 677 678 csBundle(2).srcType(2) := SrcType.DC 679 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 680 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 681 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 682 csBundle(2).uopIdx := 2.U 683 } 684 when(vlmulReg === "b011".U) { 685 for (i <- 0 until MAX_VLMUL) { 686 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 687 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 688 csBundle(i).lsrc(1) := src2 + (i * 2).U 689 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 690 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 691 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 692 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 693 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 694 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 695 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 696 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 697 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 698 } 699 csBundle(i).srcType(2) := SrcType.DC 700 csBundle(i).uopIdx := i.U 701 } 702 } 703 when(vlmulReg.orR) { 704 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 705 csBundle(numOfUop - 1.U).lsrc(0) := src1 706 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 707 csBundle(numOfUop - 1.U).lsrc(2) := dest 708 csBundle(numOfUop - 1.U).ldest := dest 709 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 710 } 711 } 712 is(UopSplitType.VEC_VFRED) { 713 val vlmul = vlmulReg 714 val vsew = vsewReg 715 when(vlmul === VLmul.m8){ 716 for (i <- 0 until 4) { 717 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 718 csBundle(i).lsrc(1) := src2 + (i * 2).U 719 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 720 csBundle(i).uopIdx := i.U 721 } 722 for (i <- 4 until 6) { 723 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 724 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 725 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 726 csBundle(i).uopIdx := i.U 727 } 728 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 729 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 730 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 731 csBundle(6).uopIdx := 6.U 732 when(vsew === VSew.e64) { 733 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 734 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 735 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 736 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 737 csBundle(7).uopIdx := 7.U 738 csBundle(8).lsrc(0) := src1 739 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 740 csBundle(8).ldest := dest 741 csBundle(8).uopIdx := 8.U 742 } 743 when(vsew === VSew.e32) { 744 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 745 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 746 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 747 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 748 csBundle(7).uopIdx := 7.U 749 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 750 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 751 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 752 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 753 csBundle(8).uopIdx := 8.U 754 csBundle(9).lsrc(0) := src1 755 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 756 csBundle(9).ldest := dest 757 csBundle(9).uopIdx := 9.U 758 } 759 when(vsew === VSew.e16) { 760 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 761 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 762 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 763 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 764 csBundle(7).uopIdx := 7.U 765 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 766 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 767 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 768 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 769 csBundle(8).uopIdx := 8.U 770 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 771 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 772 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 773 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 774 csBundle(9).uopIdx := 9.U 775 csBundle(10).lsrc(0) := src1 776 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 777 csBundle(10).ldest := dest 778 csBundle(10).uopIdx := 10.U 779 } 780 } 781 when(vlmul === VLmul.m4) { 782 for (i <- 0 until 2) { 783 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 784 csBundle(i).lsrc(1) := src2 + (i * 2).U 785 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 786 csBundle(i).uopIdx := i.U 787 } 788 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 789 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 790 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 791 csBundle(2).uopIdx := 2.U 792 when(vsew === VSew.e64) { 793 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 794 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 795 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 796 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 797 csBundle(3).uopIdx := 3.U 798 csBundle(4).lsrc(0) := src1 799 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 800 csBundle(4).ldest := dest 801 csBundle(4).uopIdx := 4.U 802 } 803 when(vsew === VSew.e32) { 804 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 805 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 806 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 807 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 808 csBundle(3).uopIdx := 3.U 809 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 810 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 811 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 812 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 813 csBundle(4).uopIdx := 4.U 814 csBundle(5).lsrc(0) := src1 815 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 816 csBundle(5).ldest := dest 817 csBundle(5).uopIdx := 5.U 818 } 819 when(vsew === VSew.e16) { 820 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 821 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 822 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 823 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 824 csBundle(3).uopIdx := 3.U 825 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 826 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 827 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 828 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 829 csBundle(4).uopIdx := 4.U 830 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 831 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 832 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 833 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 834 csBundle(5).uopIdx := 5.U 835 csBundle(6).lsrc(0) := src1 836 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 837 csBundle(6).ldest := dest 838 csBundle(6).uopIdx := 6.U 839 } 840 } 841 when(vlmul === VLmul.m2) { 842 csBundle(0).lsrc(0) := src2 + 1.U 843 csBundle(0).lsrc(1) := src2 + 0.U 844 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 845 csBundle(0).uopIdx := 0.U 846 when(vsew === VSew.e64) { 847 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 848 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 849 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 850 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 851 csBundle(1).uopIdx := 1.U 852 csBundle(2).lsrc(0) := src1 853 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 854 csBundle(2).ldest := dest 855 csBundle(2).uopIdx := 2.U 856 } 857 when(vsew === VSew.e32) { 858 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 859 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 860 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 861 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 862 csBundle(1).uopIdx := 1.U 863 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 864 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 865 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 866 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 867 csBundle(2).uopIdx := 2.U 868 csBundle(3).lsrc(0) := src1 869 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 870 csBundle(3).ldest := dest 871 csBundle(3).uopIdx := 3.U 872 } 873 when(vsew === VSew.e16) { 874 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 875 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 876 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 877 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 878 csBundle(1).uopIdx := 1.U 879 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 880 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 881 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 882 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 883 csBundle(2).uopIdx := 2.U 884 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 885 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 886 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 887 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 888 csBundle(3).uopIdx := 3.U 889 csBundle(4).lsrc(0) := src1 890 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 891 csBundle(4).ldest := dest 892 csBundle(4).uopIdx := 4.U 893 } 894 } 895 when(vlmul === VLmul.m1) { 896 when(vsew === VSew.e64) { 897 csBundle(0).lsrc(0) := src2 898 csBundle(0).lsrc(1) := src2 899 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 900 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 901 csBundle(0).uopIdx := 0.U 902 csBundle(1).lsrc(0) := src1 903 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 904 csBundle(1).ldest := dest 905 csBundle(1).uopIdx := 1.U 906 } 907 when(vsew === VSew.e32) { 908 csBundle(0).lsrc(0) := src2 909 csBundle(0).lsrc(1) := src2 910 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 911 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 912 csBundle(0).uopIdx := 0.U 913 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 914 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 915 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 916 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 917 csBundle(1).uopIdx := 1.U 918 csBundle(2).lsrc(0) := src1 919 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 920 csBundle(2).ldest := dest 921 csBundle(2).uopIdx := 2.U 922 } 923 when(vsew === VSew.e16) { 924 csBundle(0).lsrc(0) := src2 925 csBundle(0).lsrc(1) := src2 926 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 927 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 928 csBundle(0).uopIdx := 0.U 929 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 930 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 931 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 932 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 933 csBundle(1).uopIdx := 1.U 934 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 935 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 936 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 937 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 938 csBundle(2).uopIdx := 2.U 939 csBundle(3).lsrc(0) := src1 940 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 941 csBundle(3).ldest := dest 942 csBundle(3).uopIdx := 3.U 943 } 944 } 945 when(vlmul === VLmul.mf2) { 946 when(vsew === VSew.e32) { 947 csBundle(0).lsrc(0) := src2 948 csBundle(0).lsrc(1) := src2 949 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 950 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 951 csBundle(0).uopIdx := 0.U 952 csBundle(1).lsrc(0) := src1 953 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 954 csBundle(1).ldest := dest 955 csBundle(1).uopIdx := 1.U 956 } 957 when(vsew === VSew.e16) { 958 csBundle(0).lsrc(0) := src2 959 csBundle(0).lsrc(1) := src2 960 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 961 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 962 csBundle(0).uopIdx := 0.U 963 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 964 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 965 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 966 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 967 csBundle(1).uopIdx := 1.U 968 csBundle(2).lsrc(0) := src1 969 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 970 csBundle(2).ldest := dest 971 csBundle(2).uopIdx := 2.U 972 } 973 } 974 when(vlmul === VLmul.mf4) { 975 when(vsew === VSew.e16) { 976 csBundle(0).lsrc(0) := src2 977 csBundle(0).lsrc(1) := src2 978 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 979 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 980 csBundle(0).uopIdx := 0.U 981 csBundle(1).lsrc(0) := src1 982 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 983 csBundle(1).ldest := dest 984 csBundle(1).uopIdx := 1.U 985 } 986 } 987 } 988 989 is(UopSplitType.VEC_VFREDOSUM) { 990 import yunsuan.VfaluType 991 val vlmul = vlmulReg 992 val vsew = vsewReg 993 val isWiden = decodedInstsSimple.fuOpType === VfaluType.vfwredosum 994 when(vlmul === VLmul.m8) { 995 when(vsew === VSew.e64) { 996 val vlmax = 16 997 for (i <- 0 until vlmax) { 998 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 999 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1000 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1001 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1002 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1003 csBundle(i).uopIdx := i.U 1004 } 1005 } 1006 when(vsew === VSew.e32) { 1007 val vlmax = 32 1008 for (i <- 0 until vlmax) { 1009 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1010 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1011 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1012 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1013 csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B) 1014 csBundle(i).uopIdx := i.U 1015 } 1016 } 1017 when(vsew === VSew.e16) { 1018 val vlmax = 64 1019 for (i <- 0 until vlmax) { 1020 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1021 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1022 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1023 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1024 csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B) 1025 csBundle(i).uopIdx := i.U 1026 } 1027 } 1028 } 1029 when(vlmul === VLmul.m4) { 1030 when(vsew === VSew.e64) { 1031 val vlmax = 8 1032 for (i <- 0 until vlmax) { 1033 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1034 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1035 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1036 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1037 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1038 csBundle(i).uopIdx := i.U 1039 } 1040 } 1041 when(vsew === VSew.e32) { 1042 val vlmax = 16 1043 for (i <- 0 until vlmax) { 1044 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1045 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1046 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1047 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1048 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1049 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1050 csBundle(i).uopIdx := i.U 1051 } 1052 } 1053 when(vsew === VSew.e16) { 1054 val vlmax = 32 1055 for (i <- 0 until vlmax) { 1056 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1057 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1058 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1059 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1060 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1061 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1062 csBundle(i).uopIdx := i.U 1063 } 1064 } 1065 } 1066 when(vlmul === VLmul.m2) { 1067 when(vsew === VSew.e64) { 1068 val vlmax = 4 1069 for (i <- 0 until vlmax) { 1070 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1071 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1072 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1073 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1074 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1075 csBundle(i).uopIdx := i.U 1076 } 1077 } 1078 when(vsew === VSew.e32) { 1079 val vlmax = 8 1080 for (i <- 0 until vlmax) { 1081 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1082 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1083 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1084 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1085 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1086 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1087 csBundle(i).uopIdx := i.U 1088 } 1089 } 1090 when(vsew === VSew.e16) { 1091 val vlmax = 16 1092 for (i <- 0 until vlmax) { 1093 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1094 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1095 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1096 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1097 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1098 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1099 csBundle(i).uopIdx := i.U 1100 } 1101 } 1102 } 1103 when(vlmul === VLmul.m1) { 1104 when(vsew === VSew.e64) { 1105 val vlmax = 2 1106 for (i <- 0 until vlmax) { 1107 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1108 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1109 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1110 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1111 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1112 csBundle(i).uopIdx := i.U 1113 } 1114 } 1115 when(vsew === VSew.e32) { 1116 val vlmax = 4 1117 for (i <- 0 until vlmax) { 1118 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1119 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1120 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1121 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1122 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1123 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1124 csBundle(i).uopIdx := i.U 1125 } 1126 } 1127 when(vsew === VSew.e16) { 1128 val vlmax = 8 1129 for (i <- 0 until vlmax) { 1130 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1131 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1132 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1133 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1134 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1135 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1136 csBundle(i).uopIdx := i.U 1137 } 1138 } 1139 } 1140 when(vlmul === VLmul.mf2) { 1141 when(vsew === VSew.e32) { 1142 val vlmax = 2 1143 for (i <- 0 until vlmax) { 1144 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1145 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1146 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1147 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1148 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1149 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1150 csBundle(i).uopIdx := i.U 1151 } 1152 } 1153 when(vsew === VSew.e16) { 1154 val vlmax = 4 1155 for (i <- 0 until vlmax) { 1156 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1157 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1158 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1159 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1160 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1161 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1162 csBundle(i).uopIdx := i.U 1163 } 1164 } 1165 } 1166 when(vlmul === VLmul.mf4) { 1167 when(vsew === VSew.e16) { 1168 val vlmax = 2 1169 for (i <- 0 until vlmax) { 1170 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1171 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1172 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1173 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1174 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1175 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1176 csBundle(i).uopIdx := i.U 1177 } 1178 } 1179 } 1180 } 1181 1182 is(UopSplitType.VEC_SLIDEUP) { 1183 // i to vector move 1184 csBundle(0).srcType(0) := SrcType.reg 1185 csBundle(0).srcType(1) := SrcType.imm 1186 csBundle(0).lsrc(1) := 0.U 1187 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1188 csBundle(0).fuType := FuType.i2v.U 1189 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 1190 csBundle(0).vecWen := true.B 1191 // LMUL 1192 for (i <- 0 until MAX_VLMUL) 1193 for (j <- 0 to i) { 1194 val old_vd = if (j == 0) { 1195 dest + i.U 1196 } else (VECTOR_TMP_REG_LMUL + j).U 1197 val vd = if (j == i) { 1198 dest + i.U 1199 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1200 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1201 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1202 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1203 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1204 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1205 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1206 } 1207 } 1208 1209 is(UopSplitType.VEC_SLIDEDOWN) { 1210 // i to vector move 1211 csBundle(0).srcType(0) := SrcType.reg 1212 csBundle(0).srcType(1) := SrcType.imm 1213 csBundle(0).lsrc(1) := 0.U 1214 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1215 csBundle(0).fuType := FuType.i2v.U 1216 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 1217 csBundle(0).vecWen := true.B 1218 // LMUL 1219 for (i <- 0 until MAX_VLMUL) 1220 for (j <- (0 to i).reverse) { 1221 when(i.U < lmul) { 1222 val old_vd = if (j == 0) { 1223 dest + lmul - 1.U - i.U 1224 } else (VECTOR_TMP_REG_LMUL + j).U 1225 val vd = if (j == i) { 1226 dest + lmul - 1.U - i.U 1227 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1228 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1229 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1230 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1231 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1232 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1233 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1234 } 1235 } 1236 } 1237 1238 is(UopSplitType.VEC_M0X) { 1239 // LMUL 1240 for (i <- 0 until MAX_VLMUL) { 1241 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1242 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1243 csBundle(i).srcType(0) := srcType0 1244 csBundle(i).srcType(1) := SrcType.vp 1245 csBundle(i).rfWen := false.B 1246 csBundle(i).vecWen := true.B 1247 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1248 csBundle(i).lsrc(1) := src2 1249 // csBundle(i).lsrc(2) := dest + i.U DontCare 1250 csBundle(i).ldest := ldest 1251 csBundle(i).uopIdx := i.U 1252 } 1253 csBundle(lmul - 1.U).vecWen := false.B 1254 csBundle(lmul - 1.U).fpWen := true.B 1255 csBundle(lmul - 1.U).ldest := FP_TMP_REG_MV.U 1256 // FMV_X_D 1257 csBundle(lmul).srcType(0) := SrcType.fp 1258 csBundle(lmul).srcType(1) := SrcType.imm 1259 csBundle(lmul).lsrc(0) := FP_TMP_REG_MV.U 1260 csBundle(lmul).lsrc(1) := 0.U 1261 csBundle(lmul).ldest := dest 1262 csBundle(lmul).fuType := FuType.fmisc.U 1263 csBundle(lmul).rfWen := true.B 1264 csBundle(lmul).fpWen := false.B 1265 csBundle(lmul).vecWen := false.B 1266 csBundle(lmul).fpu.isAddSub := false.B 1267 csBundle(lmul).fpu.typeTagIn := FPU.D 1268 csBundle(lmul).fpu.typeTagOut := FPU.D 1269 csBundle(lmul).fpu.fromInt := false.B 1270 csBundle(lmul).fpu.wflags := false.B 1271 csBundle(lmul).fpu.fpWen := false.B 1272 csBundle(lmul).fpu.div := false.B 1273 csBundle(lmul).fpu.sqrt := false.B 1274 csBundle(lmul).fpu.fcvt := false.B 1275 } 1276 1277 is(UopSplitType.VEC_MVV) { 1278 // LMUL 1279 for (i <- 0 until MAX_VLMUL) { 1280 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1281 csBundle(i * 2 + 0).srcType(0) := srcType0 1282 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1283 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1284 csBundle(i * 2 + 0).lsrc(1) := src2 1285 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1286 csBundle(i * 2 + 0).ldest := dest + i.U 1287 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1288 1289 csBundle(i * 2 + 1).srcType(0) := srcType0 1290 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1291 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1292 csBundle(i * 2 + 1).lsrc(1) := src2 1293 // csBundle(i).lsrc(2) := dest + i.U DontCare 1294 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1295 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1296 } 1297 } 1298 1299 is(UopSplitType.VEC_M0X_VFIRST) { 1300 // LMUL 1301 csBundle(0).rfWen := false.B 1302 csBundle(0).fpWen := true.B 1303 csBundle(0).ldest := FP_TMP_REG_MV.U 1304 // FMV_X_D 1305 csBundle(1).srcType(0) := SrcType.fp 1306 csBundle(1).srcType(1) := SrcType.imm 1307 csBundle(1).lsrc(0) := FP_TMP_REG_MV.U 1308 csBundle(1).lsrc(1) := 0.U 1309 csBundle(1).ldest := dest 1310 csBundle(1).fuType := FuType.fmisc.U 1311 csBundle(1).rfWen := true.B 1312 csBundle(1).fpWen := false.B 1313 csBundle(1).vecWen := false.B 1314 csBundle(1).fpu.isAddSub := false.B 1315 csBundle(1).fpu.typeTagIn := FPU.D 1316 csBundle(1).fpu.typeTagOut := FPU.D 1317 csBundle(1).fpu.fromInt := false.B 1318 csBundle(1).fpu.wflags := false.B 1319 csBundle(1).fpu.fpWen := false.B 1320 csBundle(1).fpu.div := false.B 1321 csBundle(1).fpu.sqrt := false.B 1322 csBundle(1).fpu.fcvt := false.B 1323 } 1324 is(UopSplitType.VEC_VWW) { 1325 for (i <- 0 until MAX_VLMUL*2) { 1326 when(i.U < lmul){ 1327 csBundle(i).srcType(2) := SrcType.DC 1328 csBundle(i).lsrc(0) := src2 + i.U 1329 csBundle(i).lsrc(1) := src2 + i.U 1330 // csBundle(i).lsrc(2) := dest + (2 * i).U 1331 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1332 csBundle(i).uopIdx := i.U 1333 } otherwise { 1334 csBundle(i).srcType(2) := SrcType.DC 1335 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1336 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1337 // csBundle(i).lsrc(2) := dest + (2 * i).U 1338 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1339 csBundle(i).uopIdx := i.U 1340 } 1341 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1342 csBundle(numOfUop-1.U).lsrc(0) := src1 1343 csBundle(numOfUop-1.U).lsrc(2) := dest 1344 csBundle(numOfUop-1.U).ldest := dest 1345 } 1346 } 1347 is(UopSplitType.VEC_RGATHER) { 1348 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1349 for (i <- 0 until len) 1350 for (j <- 0 until len) { 1351 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1352 // csBundle(i * len + j).srcType(1) := SrcType.vp 1353 // csBundle(i * len + j).srcType(2) := SrcType.vp 1354 csBundle(i * len + j).lsrc(0) := src1 + i.U 1355 csBundle(i * len + j).lsrc(1) := src2 + j.U 1356 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1357 csBundle(i * len + j).lsrc(2) := vd_old 1358 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1359 csBundle(i * len + j).ldest := vd 1360 csBundle(i * len + j).uopIdx := (i * len + j).U 1361 } 1362 } 1363 switch(vlmulReg) { 1364 is("b001".U ){ 1365 genCsBundle_VEC_RGATHER(2) 1366 } 1367 is("b010".U ){ 1368 genCsBundle_VEC_RGATHER(4) 1369 } 1370 is("b011".U ){ 1371 genCsBundle_VEC_RGATHER(8) 1372 } 1373 } 1374 } 1375 is(UopSplitType.VEC_RGATHER_VX) { 1376 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1377 for (i <- 0 until len) 1378 for (j <- 0 until len) { 1379 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1380 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1381 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1382 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1383 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1384 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1385 csBundle(i * len + j + 1).lsrc(2) := vd_old 1386 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1387 csBundle(i * len + j + 1).ldest := vd 1388 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1389 } 1390 } 1391 // i to vector move 1392 csBundle(0).srcType(0) := SrcType.reg 1393 csBundle(0).srcType(1) := SrcType.imm 1394 csBundle(0).lsrc(1) := 0.U 1395 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1396 csBundle(0).fuType := FuType.i2v.U 1397 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg) 1398 csBundle(0).vecWen := true.B 1399 switch(vlmulReg) { 1400 is("b000".U ){ 1401 genCsBundle_RGATHER_VX(1) 1402 } 1403 is("b001".U ){ 1404 genCsBundle_RGATHER_VX(2) 1405 } 1406 is("b010".U ){ 1407 genCsBundle_RGATHER_VX(4) 1408 } 1409 is("b011".U ){ 1410 genCsBundle_RGATHER_VX(8) 1411 } 1412 } 1413 } 1414 is(UopSplitType.VEC_RGATHEREI16) { 1415 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1416 for (i <- 0 until len) 1417 for (j <- 0 until len) { 1418 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1419 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1420 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1421 // csBundle(i * len + j).srcType(1) := SrcType.vp 1422 // csBundle(i * len + j).srcType(2) := SrcType.vp 1423 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1424 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1425 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1426 csBundle((i * len + j)*2+0).ldest := vd0 1427 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1428 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1429 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1430 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1431 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1432 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1433 csBundle((i * len + j)*2+1).ldest := vd1 1434 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1435 } 1436 } 1437 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1438 for (i <- 0 until len) 1439 for (j <- 0 until len) { 1440 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1441 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1442 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1443 // csBundle(i * len + j).srcType(1) := SrcType.vp 1444 // csBundle(i * len + j).srcType(2) := SrcType.vp 1445 csBundle(i * len + j).lsrc(0) := src1 + i.U 1446 csBundle(i * len + j).lsrc(1) := src2 + j.U 1447 csBundle(i * len + j).lsrc(2) := vd_old 1448 csBundle(i * len + j).ldest := vd 1449 csBundle(i * len + j).uopIdx := (i * len + j).U 1450 } 1451 } 1452 switch(vlmulReg) { 1453 is("b000".U ){ 1454 when(!vsewReg.orR){ 1455 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1456 } .otherwise{ 1457 genCsBundle_VEC_RGATHEREI16(1) 1458 } 1459 } 1460 is("b001".U) { 1461 when(!vsewReg.orR) { 1462 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1463 }.otherwise { 1464 genCsBundle_VEC_RGATHEREI16(2) 1465 } 1466 } 1467 is("b010".U) { 1468 when(!vsewReg.orR) { 1469 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1470 }.otherwise { 1471 genCsBundle_VEC_RGATHEREI16(4) 1472 } 1473 } 1474 is("b011".U) { 1475 genCsBundle_VEC_RGATHEREI16(8) 1476 } 1477 } 1478 } 1479 is(UopSplitType.VEC_COMPRESS) { 1480 def genCsBundle_VEC_COMPRESS(len:Int): Unit ={ 1481 for (i <- 0 until len){ 1482 val jlen = if (i == len-1) i+1 else i+2 1483 for (j <- 0 until jlen) { 1484 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1485 val vd = if(i==len-1) (dest + j.U) else{ 1486 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1487 } 1488 val src23Type = if (j == i+1) DontCare else SrcType.vp 1489 csBundle(i*(i+3)/2 + j).srcType(0) := SrcType.vp 1490 csBundle(i*(i+3)/2 + j).srcType(1) := src23Type 1491 csBundle(i*(i+3)/2 + j).srcType(2) := src23Type 1492 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1493 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1494 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1495 // csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U 1496 csBundle(i*(i+3)/2 + j).ldest := vd 1497 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1498 } 1499 } 1500 } 1501 switch(vlmulReg) { 1502 is("b001".U ){ 1503 genCsBundle_VEC_COMPRESS(2) 1504 } 1505 is("b010".U ){ 1506 genCsBundle_VEC_COMPRESS(4) 1507 } 1508 is("b011".U ){ 1509 genCsBundle_VEC_COMPRESS(8) 1510 } 1511 } 1512 } 1513 is(UopSplitType.VEC_MVNR) { 1514 for (i <- 0 until MAX_VLMUL) { 1515 csBundle(i).lsrc(0) := src1 + i.U 1516 csBundle(i).lsrc(1) := src2 + i.U 1517 csBundle(i).lsrc(2) := dest + i.U 1518 csBundle(i).ldest := dest + i.U 1519 csBundle(i).uopIdx := i.U 1520 } 1521 } 1522 is(UopSplitType.VEC_US_LDST) { 1523 /* 1524 FMV.D.X 1525 */ 1526 csBundle(0).srcType(0) := SrcType.reg 1527 csBundle(0).srcType(1) := SrcType.imm 1528 csBundle(0).lsrc(1) := 0.U 1529 csBundle(0).ldest := FP_TMP_REG_MV.U 1530 csBundle(0).fuType := FuType.i2f.U 1531 csBundle(0).rfWen := false.B 1532 csBundle(0).fpWen := true.B 1533 csBundle(0).vecWen := false.B 1534 csBundle(0).fpu.isAddSub := false.B 1535 csBundle(0).fpu.typeTagIn := FPU.D 1536 csBundle(0).fpu.typeTagOut := FPU.D 1537 csBundle(0).fpu.fromInt := true.B 1538 csBundle(0).fpu.wflags := false.B 1539 csBundle(0).fpu.fpWen := true.B 1540 csBundle(0).fpu.div := false.B 1541 csBundle(0).fpu.sqrt := false.B 1542 csBundle(0).fpu.fcvt := false.B 1543 //LMUL 1544 for (i <- 0 until MAX_VLMUL) { 1545 csBundle(i + 1).srcType(0) := SrcType.fp 1546 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1547 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1548 csBundle(i + 1).ldest := dest + i.U 1549 csBundle(i + 1).uopIdx := i.U 1550 } 1551 } 1552 is(UopSplitType.VEC_S_LDST) { 1553 /* 1554 FMV.D.X 1555 */ 1556 csBundle(0).srcType(0) := SrcType.reg 1557 csBundle(0).srcType(1) := SrcType.imm 1558 csBundle(0).lsrc(1) := 0.U 1559 csBundle(0).ldest := FP_TMP_REG_MV.U 1560 csBundle(0).fuType := FuType.i2f.U 1561 csBundle(0).rfWen := false.B 1562 csBundle(0).fpWen := true.B 1563 csBundle(0).vecWen := false.B 1564 csBundle(0).fpu.isAddSub := false.B 1565 csBundle(0).fpu.typeTagIn := FPU.D 1566 csBundle(0).fpu.typeTagOut := FPU.D 1567 csBundle(0).fpu.fromInt := true.B 1568 csBundle(0).fpu.wflags := false.B 1569 csBundle(0).fpu.fpWen := true.B 1570 csBundle(0).fpu.div := false.B 1571 csBundle(0).fpu.sqrt := false.B 1572 csBundle(0).fpu.fcvt := false.B 1573 1574 csBundle(1).srcType(0) := SrcType.reg 1575 csBundle(1).srcType(1) := SrcType.imm 1576 csBundle(1).lsrc(0) := decodedInstsSimple.lsrc(1) 1577 csBundle(1).lsrc(1) := 0.U 1578 csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U 1579 csBundle(1).fuType := FuType.i2f.U 1580 csBundle(1).rfWen := false.B 1581 csBundle(1).fpWen := true.B 1582 csBundle(1).vecWen := false.B 1583 csBundle(1).fpu.isAddSub := false.B 1584 csBundle(1).fpu.typeTagIn := FPU.D 1585 csBundle(1).fpu.typeTagOut := FPU.D 1586 csBundle(1).fpu.fromInt := true.B 1587 csBundle(1).fpu.wflags := false.B 1588 csBundle(1).fpu.fpWen := true.B 1589 csBundle(1).fpu.div := false.B 1590 csBundle(1).fpu.sqrt := false.B 1591 csBundle(1).fpu.fcvt := false.B 1592 1593 //LMUL 1594 for (i <- 0 until MAX_VLMUL) { 1595 csBundle(i + 2).srcType(0) := SrcType.fp 1596 csBundle(i + 2).srcType(1) := SrcType.fp 1597 csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U 1598 csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 1599 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1600 csBundle(i + 2).ldest := dest + i.U 1601 csBundle(i + 2).uopIdx := i.U 1602 } 1603 } 1604 is(UopSplitType.VEC_I_LDST) { 1605 /* 1606 FMV.D.X 1607 */ 1608 val vlmul = vlmulReg 1609 val vsew = Cat(0.U(1.W), vsewReg) 1610 val veew = Cat(0.U(1.W), width) 1611 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1612 val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array( 1613 "b001".U -> 1.U, 1614 "b010".U -> 2.U, 1615 "b011".U -> 3.U 1616 )) 1617 val simple_emul = MuxLookup(vemul, 0.U(2.W), Array( 1618 "b001".U -> 1.U, 1619 "b010".U -> 2.U, 1620 "b011".U -> 3.U 1621 )) 1622 csBundle(0).srcType(0) := SrcType.reg 1623 csBundle(0).srcType(1) := SrcType.imm 1624 csBundle(0).lsrc(1) := 0.U 1625 csBundle(0).ldest := FP_TMP_REG_MV.U 1626 csBundle(0).fuType := FuType.i2f.U 1627 csBundle(0).rfWen := false.B 1628 csBundle(0).fpWen := true.B 1629 csBundle(0).vecWen := false.B 1630 csBundle(0).fpu.isAddSub := false.B 1631 csBundle(0).fpu.typeTagIn := FPU.D 1632 csBundle(0).fpu.typeTagOut := FPU.D 1633 csBundle(0).fpu.fromInt := true.B 1634 csBundle(0).fpu.wflags := false.B 1635 csBundle(0).fpu.fpWen := true.B 1636 csBundle(0).fpu.div := false.B 1637 csBundle(0).fpu.sqrt := false.B 1638 csBundle(0).fpu.fcvt := false.B 1639 1640 //LMUL 1641 for (i <- 0 until MAX_INDEXED_LS_UOPNUM) { 1642 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf) 1643 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1644 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1645 val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd 1646 csBundle(i + 1).srcType(0) := SrcType.fp 1647 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1648 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1649 /** 1650 * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and 1651 * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same 1652 * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be 1653 * deadlock for indexed instructions with emul > lmul. 1654 * 1655 * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest 1656 * N-1 uops will read temporary vector register. 1657 */ 1658 // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1659 csBundle(i + 1).lsrc(2) := Mux( 1660 isFirstUopInVd, 1661 Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)), 1662 VECTOR_TMP_REG_LMUL.U 1663 ) 1664 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1665 csBundle(i + 1).uopIdx := i.U 1666 } 1667 } 1668 } 1669 1670 //uops dispatch 1671 val s_normal :: s_ext :: Nil = Enum(2) 1672 val state = RegInit(s_normal) 1673 val state_next = WireDefault(state) 1674 val uopRes = RegInit(0.U) 1675 1676 //readyFromRename Counter 1677 val readyCounter = PriorityMuxDefault(io.readyFromRename.map(x => !x).zip((0 to (RenameWidth - 1)).map(_.U)), RenameWidth.U) 1678 1679 switch(state) { 1680 is(s_normal) { 1681 state_next := Mux(io.validFromIBuf(0) && (numOfUop > readyCounter) && (readyCounter =/= 0.U), s_ext, s_normal) 1682 } 1683 is(s_ext) { 1684 state_next := Mux(io.validFromIBuf(0) && (uopRes > readyCounter), s_ext, s_normal) 1685 } 1686 } 1687 1688 state := state_next 1689 1690 val uopRes0 = Mux(state === s_normal, numOfUop, uopRes) 1691 val uopResJudge = Mux(state === s_normal, 1692 io.validFromIBuf(0) && (readyCounter =/= 0.U) && (uopRes0 > readyCounter), 1693 io.validFromIBuf(0) && (uopRes0 > readyCounter)) 1694 uopRes := Mux(uopResJudge, uopRes0 - readyCounter, 0.U) 1695 1696 for(i <- 0 until RenameWidth) { 1697 decodedInsts(i) := MuxCase(csBundle(i), Seq( 1698 (state === s_normal) -> csBundle(i), 1699 (state === s_ext) -> Mux((i.U + numOfUop -uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1)) 1700 ).toSeq) 1701 } 1702 1703 val validSimple = Wire(Vec(DecodeWidth, Bool())) 1704 validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1705 val notInf = Wire(Vec(DecodeWidth, Bool())) 1706 notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1707 notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1708 val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1709 notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1710 1711 complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1712 Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1713 0.U) 1714 validToRename.zipWithIndex.foreach{ 1715 case(dst, i) => 1716 val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1717 dst := MuxCase(false.B, Seq( 1718 (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1719 (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1720 ).toSeq) 1721 } 1722 1723 readyToIBuf.zipWithIndex.foreach { 1724 case (dst, i) => 1725 val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1726 dst := MuxCase(true.B, Seq( 1727 (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1728 (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1729 ).toSeq) 1730 } 1731 1732 io.deq.decodedInsts := decodedInsts 1733 io.deq.isVset := isVsetSimple 1734 io.deq.complexNum := complexNum 1735 io.deq.validToRename := validToRename 1736 io.deq.readyToIBuf := readyToIBuf 1737 1738} 1739