1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27 28/** 29 * Common used parameters or functions in vlsu 30 */ 31trait VLSUConstants { 32 val VLEN = 128 33 def VLENB = VLEN/8 34 def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg 35 36 def alignTypes = 4 // eew/sew = 1/2/4/8 37 def alignTypeBits = log2Up(alignTypes) 38 def maxMUL = 8 39 def maxFields = 8 40 /** 41 * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1, 42 * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total, 43 * each for a field. Therefore an instruction can be divided into 64 uops at most. 44 */ 45 def maxUopNum = maxMUL * maxFields // 64 46 def maxFlowNum = 16 47 def maxElemNum = maxMUL * maxFlowNum // 128 48 // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx 49 def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction 50 def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop 51 def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8 52 53 def ewBits = 3 // bits-width of EEW/SEW 54 def mulBits = 3 // bits-width of emul/lmul 55 56 def getSlice(data: UInt, i: Int, alignBits: Int): UInt = { 57 require(data.getWidth >= (i+1) * alignBits) 58 data((i+1) * alignBits - 1, i * alignBits) 59 } 60 61 def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8) 62 def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16) 63 def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32) 64 def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64) 65} 66 67trait HasVLSUParameters extends HasXSParameter with VLSUConstants { 68 override val VLEN = coreParams.VLEN 69 def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U 70 def isStrided(instType: UInt) = instType(1, 0) === "b10".U 71 def isIndexed(instType: UInt) = instType(0) === "b1".U 72 def isNotIndexed(instType: UInt) = instType(0) === "b0".U 73 74 def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = { 75 require(oldData.getWidth == newData.getWidth) 76 require(oldData.getWidth == mask.getWidth * 8) 77 VecInit(mask.asBools.zipWithIndex.map { case (en, i) => 78 Mux(en, getByte(newData, i), getByte(oldData, i)) 79 }) 80 } 81 82 // def asBytes(data: UInt) = { 83 // require(data.getWidth % 8 == 0) 84 // (0 until data.getWidth/8).map(i => getByte(data, i)) 85 // } 86 87 def mergeDataWithElemIdx( 88 oldData: UInt, 89 newData: Seq[UInt], 90 alignedType: UInt, 91 elemIdx: Seq[UInt], 92 valids: Seq[Bool] 93 ): UInt = { 94 require(newData.length == elemIdx.length) 95 require(newData.length == valids.length) 96 LookupTree(alignedType, List( 97 "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 98 ParallelPosteriorityMux( 99 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 100 getByte(oldData, i) +: newData.map(getByte(_)) 101 )}).asUInt, 102 "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 103 ParallelPosteriorityMux( 104 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 105 getHalfWord(oldData, i) +: newData.map(getHalfWord(_)) 106 )}).asUInt, 107 "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 108 ParallelPosteriorityMux( 109 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 110 getWord(oldData, i) +: newData.map(getWord(_)) 111 )}).asUInt, 112 "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 113 ParallelPosteriorityMux( 114 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 115 getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_)) 116 )}).asUInt 117 )) 118 } 119 120 def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = { 121 mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B)) 122 } 123} 124abstract class VLSUModule(implicit p: Parameters) extends XSModule 125 with HasVLSUParameters 126 with HasCircularQueuePtrHelper 127abstract class VLSUBundle(implicit p: Parameters) extends XSBundle 128 with HasVLSUParameters 129 130class VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle { 131 val uop = new DynInst 132} 133 134// Where is VecOperand used? 135class VecOperand(implicit p: Parameters) extends VLSUBundleWithMicroOp { 136 val vmask = UInt(VLEN.W) // the mask of inst which is readed from reg 137 val vecData = UInt(VLEN.W) 138 val baseAddr = UInt(VAddrBits.W) // base address from rs1 139 val stride = UInt(XLEN.W) // stride from rs2 140 val index = UInt(VLEN.W) // index from vs2 141 val pvd = UInt(5.W) // physical vector register destination 142 val lmul = UInt(3.W) 143 val sew = UInt(2.W) 144 val vma = Bool() 145 val vta = Bool() 146 val inner_idx = UInt(3.W) // the number index among 8 uop 147 val vl = UInt(8.W) 148 // TODO: How will OOO calculatr vector register numbers? 149 // (EEW / SEW) * LMUL or (vl * EEW) / VLEN ? 150 // So OOO will always use eew ? 151 // val eew = UInt(3.W) 152 val total_num = UInt(3.W) // An inst to how many uops 153} 154 155class VecDecode(implicit p: Parameters) extends VLSUBundle { 156 val uop_segment_num = UInt(3.W) 157 val uop_type = UInt(2.W) 158 val mask_en = Bool() 159 val uop_unit_stride_whole_reg = Bool() 160 val uop_unit_stride_mask = Bool() 161 val uop_unit_stride_fof = Bool() 162 val uop_eew = UInt(ewBits.W) // this is also the index width when the inst is a index load 163 164 def apply(inst: UInt) = { 165 this.uop_segment_num := inst(31, 29) 166 this.uop_type := inst(27, 26) 167 this.mask_en := inst(25) 168 this.uop_unit_stride_whole_reg := (inst(24,20) === "b01000".U) 169 this.uop_unit_stride_mask := (inst(24,20) === "b01011".U) 170 this.uop_unit_stride_fof := (inst(24,20) === "b10000".U) 171 this.uop_eew := inst(12 + ewBits - 1, 12) 172 this 173 } 174 175 def isUnitStride = uop_type === "b00".U 176 def isStrided = uop_type === "b10".U 177 def isIndexed = uop_type(0) === "b1".U 178} 179 180class OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle { 181 val isvec = Bool() 182 val vecdata = UInt(VLEN.W) 183 val mask = UInt(VLENB.W) 184 // val rob_idx_valid = Vec(2, Bool()) 185 // val inner_idx = Vec(2, UInt(3.W)) 186 // val rob_idx = Vec(2, new RobPtr) 187 // val offset = Vec(2, UInt(4.W)) 188 val reg_offset = UInt(vOffsetBits.W) 189 val exp = Bool() 190 val is_first_ele = Bool() 191 val exp_ele_index = UInt(elemIdxBits.W) // element index 192 val uopQueuePtr = new VluopPtr 193 val flowPtr = new VlflowPtr 194} 195 196class VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 197 val vec = new OnlyVecExuOutput 198} 199 200class VecStoreExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 201 val exp_ele_index = UInt(elemIdxBits.W) 202 val uopQueuePtr = new VsUopPtr 203} 204 205class VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 206 val flowMask = UInt(VLENB.W) // each bit for a flow 207 val byteMask = UInt(VLENB.W) // each bit for a byte 208 val data = UInt(VLEN.W) 209 // val fof = Bool() // fof is only used for vector loads 210 val excp_eew_index = UInt(elemIdxBits.W) 211 // val exceptionVec = ExceptionVec() // uop has exceptionVec 212 val baseAddr = UInt(VAddrBits.W) 213 val stride = UInt(VLEN.W) 214 val flow_counter = UInt(flowIdxBits.W) 215 216 // instruction decode result 217 val flowNum = UInt(flowIdxBits.W) // # of flows in a uop 218 // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication 219 val nfields = UInt(fieldBits.W) // NFIELDS 220 val vm = Bool() // whether vector masking is enabled 221 val usWholeReg = Bool() // unit-stride, whole register load 222 val usMaskReg = Bool() // unit-stride, masked store/load 223 val eew = UInt(ewBits.W) // size of memory elements 224 val sew = UInt(ewBits.W) 225 val emul = UInt(mulBits.W) 226 val lmul = UInt(mulBits.W) 227 val vlmax = UInt(elemIdxBits.W) 228 val instType = UInt(3.W) 229} 230 231class VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 232 val vaddr = UInt(VAddrBits.W) 233 val mask = UInt(VLENB.W) 234 val alignedType = UInt(alignTypeBits.W) 235 val exp = Bool() 236 val flow_idx = UInt(elemIdxBits.W) 237 val is_first_ele = Bool() 238} 239 240object MulNum { 241 def apply (mul: UInt): UInt = { //mul means emul or lmul 242 (LookupTree(mul,List( 243 "b101".U -> 1.U , // 1/8 244 "b110".U -> 1.U , // 1/4 245 "b111".U -> 1.U , // 1/2 246 "b000".U -> 1.U , // 1 247 "b001".U -> 2.U , // 2 248 "b010".U -> 4.U , // 4 249 "b011".U -> 8.U // 8 250 )))} 251} 252/** 253 * when emul is greater than or equal to 1, this means the entire register needs to be written; 254 * otherwise, only write the specified number of bytes */ 255object MulDataSize { 256 def apply (mul: UInt): UInt = { //mul means emul or lmul 257 (LookupTree(mul,List( 258 "b101".U -> 2.U , // 1/8 259 "b110".U -> 4.U , // 1/4 260 "b111".U -> 8.U , // 1/2 261 "b000".U -> 16.U , // 1 262 "b001".U -> 16.U , // 2 263 "b010".U -> 16.U , // 4 264 "b011".U -> 16.U // 8 265 )))} 266} 267 268object OneRegNum { 269 def apply (eew: UInt): UInt = { //mul means emul or lmul 270 (LookupTree(eew,List( 271 "b000".U -> 16.U , // 1 272 "b101".U -> 8.U , // 2 273 "b110".U -> 4.U , // 4 274 "b111".U -> 2.U // 8 275 )))} 276} 277 278//index inst read data byte 279object SewDataSize { 280 def apply (sew: UInt): UInt = { 281 (LookupTree(sew,List( 282 "b000".U -> 1.U , // 1 283 "b001".U -> 2.U , // 2 284 "b010".U -> 4.U , // 4 285 "b011".U -> 8.U // 8 286 )))} 287} 288 289// strided inst read data byte 290object EewDataSize { 291 def apply (eew: UInt): UInt = { 292 (LookupTree(eew,List( 293 "b000".U -> 1.U , // 1 294 "b101".U -> 2.U , // 2 295 "b110".U -> 4.U , // 4 296 "b111".U -> 8.U // 8 297 )))} 298} 299 300object loadDataSize { 301 def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = { 302 (LookupTree(instType,List( 303 "b000".U -> MulDataSize(emul), // unit-stride 304 "b010".U -> EewDataSize(eew) , // strided 305 "b001".U -> SewDataSize(sew) , // indexed-unordered 306 "b011".U -> SewDataSize(sew) , // indexed-ordered 307 "b100".U -> EewDataSize(eew) , // segment unit-stride 308 "b110".U -> EewDataSize(eew) , // segment strided 309 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 310 "b111".U -> SewDataSize(sew) // segment indexed-ordered 311 )))} 312} 313 314// object GenVecLoadMask extends VLSUConstants { 315// def apply(alignedType: UInt, vaddr: UInt): UInt = { 316// LookupTree(alignedType, List( 317// "b00".U -> 0x1.U, // b1 318// "b01".U -> 0x3.U, // b11 319// "b10".U -> 0xf.U, // b1111 320// "b11".U -> 0xff.U // b11111111 321// )) << vaddr(vOffsetBits - 1, 0) 322// } 323// } 324 325object storeDataSize { 326 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 327 (LookupTree(instType,List( 328 "b000".U -> EewDataSize(eew) , // unit-stride, do not use 329 "b010".U -> EewDataSize(eew) , // strided 330 "b001".U -> SewDataSize(sew) , // indexed-unordered 331 "b011".U -> SewDataSize(sew) , // indexed-ordered 332 "b100".U -> EewDataSize(eew) , // segment unit-stride 333 "b110".U -> EewDataSize(eew) , // segment strided 334 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 335 "b111".U -> SewDataSize(sew) // segment indexed-ordered 336 )))} 337} 338 339object GenVecStoreMask { 340 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 341 val mask = Wire(UInt(16.W)) 342 mask := UIntToOH(storeDataSize(instType = instType, eew = eew, sew = sew)) - 1.U 343 mask 344 } 345} 346 347/** 348 * these are used to obtain immediate addresses for index instruction */ 349object EewEq8 { 350 def apply(index:UInt, flow_inner_idx: UInt): UInt = { 351 (LookupTree(flow_inner_idx,List( 352 0.U -> index(7 ,0 ), 353 1.U -> index(15,8 ), 354 2.U -> index(23,16 ), 355 3.U -> index(31,24 ), 356 4.U -> index(39,32 ), 357 5.U -> index(47,40 ), 358 6.U -> index(55,48 ), 359 7.U -> index(63,56 ), 360 8.U -> index(71,64 ), 361 9.U -> index(79,72 ), 362 10.U -> index(87,80 ), 363 11.U -> index(95,88 ), 364 12.U -> index(103,96 ), 365 13.U -> index(111,104), 366 14.U -> index(119,112), 367 15.U -> index(127,120) 368 )))} 369} 370 371object EewEq16 { 372 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 373 (LookupTree(flow_inner_idx, List( 374 0.U -> index(15, 0), 375 1.U -> index(31, 16), 376 2.U -> index(47, 32), 377 3.U -> index(63, 48), 378 4.U -> index(79, 64), 379 5.U -> index(95, 80), 380 6.U -> index(111, 96), 381 7.U -> index(127, 112) 382 )))} 383} 384 385object EewEq32 { 386 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 387 (LookupTree(flow_inner_idx, List( 388 0.U -> index(31, 0), 389 1.U -> index(63, 32), 390 2.U -> index(95, 64), 391 3.U -> index(127, 96) 392 )))} 393} 394 395object EewEq64 { 396 def apply (index: UInt, flow_inner_idx: UInt): UInt = { 397 (LookupTree(flow_inner_idx, List( 398 0.U -> index(63, 0), 399 1.U -> index(127, 64) 400 )))} 401} 402 403object IndexAddr { 404 def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = { 405 (LookupTree(eew,List( 406 "b000".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register 407 "b101".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte 408 "b110".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte 409 "b111".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx ) // Imm is 8 Byte 410 )))} 411} 412/* 413object RegFLowCnt { 414 def apply (emul: UInt, lmul:UInt, eew: UInt, uopIdx: UInt, flowIdx: UInt): UInt = { 415 416 (LookupTree(Cat(emul,lmul),List( 417 "b001000".U -> ((uopIdx(0 ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 2,lmul = 1 418 "b010000".U -> ((uopIdx(1,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 4,lmul = 1 419 "b011000".U -> ((uopIdx(2,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 8,lmul = 1 420 "b010001".U -> ((uopIdx(0 ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 4,lmul = 2 421 "b011001".U -> ((uopIdx(1,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 8,lmul = 2 422 "b011010".U -> ((uopIdx(0 ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx) //emul = 8,lmul = 4 423 )))} 424} 425 426object AddrFLowCnt { 427 def apply (emul: UInt, lmul:UInt, sew:UInt, uopIdx: UInt, flowIdx: UInt):UInt = { 428 (LookupTree(Cat(lmul,emul),List( 429 "b001000".U -> ((uopIdx(0 ) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 2, emul = 1 430 "b010000".U -> ((uopIdx(1,0) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 4, emul = 1 431 "b011000".U -> ((uopIdx(2,0) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 8, emul = 1 432 "b010001".U -> ((uopIdx(0 ) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 4, emul = 2 433 "b011001".U -> ((uopIdx(1,0) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 8, emul = 2 434 "b011011".U -> ((uopIdx(0 ) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx) //lmul = 8, emul = 4 435 )))} 436} 437*/ 438 439object RegFLowCnt { 440 def apply (emulNum: UInt, lmulNum:UInt, eew: UInt, uopIdx: UInt, flowIdx: UInt):UInt = { 441 (LookupTree(emulNum/lmulNum,List( 442 //"d1".U -> flowIdx, 443 "d2".U -> ((uopIdx(0 ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx), 444 "d4".U -> ((uopIdx(1,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx), 445 "d8".U -> ((uopIdx(2,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx) 446 )))} 447} 448 449object AddrFLowCnt { 450 def apply (emulNum: UInt, lmulNum:UInt, sew:UInt, uopIdx: UInt, flowIdx: UInt):UInt = { 451 (LookupTree(lmulNum/emulNum,List( 452 "d1".U -> flowIdx, 453 "d2".U -> ((uopIdx(0 ) << Log2Num((16.U >> sew(1,0)).asUInt)).asUInt + flowIdx), 454 "d4".U -> ((uopIdx(1,0) << Log2Num((16.U >> sew(1,0)).asUInt)).asUInt + flowIdx), 455 "d8".U -> ((uopIdx(2,0) << Log2Num((16.U >> sew(1,0)).asUInt)).asUInt + flowIdx) 456 )))} 457} 458 459 460object Log2Num { 461 def apply (num: UInt): UInt = { 462 (LookupTree(num,List( 463 16.U -> 4.U, 464 8.U -> 3.U, 465 4.U -> 2.U, 466 2.U -> 1.U, 467 1.U -> 0.U 468 )))} 469} 470 471/** 472 * when emul is less than or equal to 1, the nf is equal to uop_inner_idx; 473 * when emul is equal to 2, the nf is equal to uop_inner_idx(2,1), and so on*/ 474object GenSegNfIdx { 475 def apply (mul: UInt, uopIdx: UInt):UInt = { // mul means lmul or emul 476 (LookupTree(mul,List( 477 "b101".U -> uopIdx , // 1/8 478 "b110".U -> uopIdx , // 1/4 479 "b111".U -> uopIdx , // 1/2 480 "b000".U -> uopIdx , // 1 481 "b001".U -> uopIdx(2,1), // 2 482 "b010".U -> uopIdx(2) , // 4 483 "b011".U -> 0.U //8 484 )))} 485} 486 487object GenSegNfIdxMul { 488 def apply (emul: UInt, lmul: UInt, uopIdx: UInt):UInt = { 489 (LookupTree(Cat(emul,lmul),List( 490 "b001000".U -> uopIdx(5,1), //emul = 2,lmul = 1 491 "b010000".U -> uopIdx(5,2), //emul = 4,lmul = 1 492 "b011000".U -> uopIdx(5,3), //emul = 8,lmul = 1 493 "b010001".U -> uopIdx(5,3), //emul = 4,lmul = 2 494 "b011001".U -> uopIdx(5,4), //emul = 8,lmul = 2 495 "b011010".U -> uopIdx(5,5) //emul = 8,lmul = 4 496 )))} 497} 498 499/** 500 * when emul is less than or equal to 1, only one segEmulIdx, so the segEmulIdx is 0.U; 501 * when emul is equal to 2, the segEmulIdx is equal to uopIdx(0), and so on*/ 502object GenSegMulIdx { 503 def apply (mul: UInt, uopIdx: UInt): UInt = { //mul means emul or lmul 504 (LookupTree(mul,List( 505 "b101".U -> 0.U , // 1/8 506 "b110".U -> 0.U , // 1/4 507 "b111".U -> 0.U , // 1/2 508 "b000".U -> 0.U , // 1 509 "b001".U -> uopIdx(0) , // 2 510 "b010".U -> uopIdx(1,0), // 4 511 "b011".U -> uopIdx(2,0) //8 512 )))} 513} 514 515//eew decode 516object EewLog2 extends VLSUConstants { 517 // def apply (eew: UInt): UInt = { 518 // (LookupTree(eew,List( 519 // "b000".U -> "b000".U , // 1 520 // "b101".U -> "b001".U , // 2 521 // "b110".U -> "b010".U , // 4 522 // "b111".U -> "b011".U // 8 523 // )))} 524 def apply(eew: UInt): UInt = ZeroExt(eew(1, 0), ewBits) 525} 526 527/** 528 * unit-stride instructions don't use this method; 529 * other instructions generate realFlowNum by EmulDataSize >> eew(1,0), 530 * EmulDataSize means the number of bytes that need to be written to the register, 531 * eew(1,0) means the number of bytes written at once*/ 532object GenRealFlowNum { 533 def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = { 534 (LookupTree(instType,List( 535 "b000".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // store use, load do not use 536 "b010".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // strided 537 "b001".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered 538 "b011".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered 539 "b100".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment unit-stride 540 "b110".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment strided 541 "b101".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // segment indexed-unordered 542 "b111".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt) // segment indexed-ordered 543 )))} 544} 545 546/** 547 * GenRealFlowLog2 = Log2(GenRealFlowNum) 548 */ 549object GenRealFlowLog2 extends VLSUConstants { 550 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = { 551 val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul) 552 val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul) 553 val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew(1, 0) 554 val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0) 555 (LookupTree(instType, List( 556 "b000".U -> eewRealFlowLog2, // unit-stride 557 "b010".U -> eewRealFlowLog2, // strided 558 "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered 559 "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered 560 "b100".U -> eewRealFlowLog2, // segment unit-stride 561 "b110".U -> eewRealFlowLog2, // segment strided 562 "b101".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-unordered 563 "b111".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-ordered 564 ))) 565 } 566} 567 568/** 569 * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx 570 * inside the uop. 571 * 572 * eew = 0, elemIdx = uopIdx ## flowIdx(3, 0) 573 * eew = 1, elemIdx = uopIdx ## flowIdx(2, 0) 574 * eew = 2, elemIdx = uopIdx ## flowIdx(1, 0) 575 * eew = 3, elemIdx = uopIdx ## flowIdx(0) 576 */ 577object GenElemIdx extends VLSUConstants { 578 def apply(alignedType: UInt, uopIdx: UInt, flowIdx: UInt): UInt = { 579 LookupTree( 580 alignedType, 581 (0 until alignTypes).map(i => 582 i.U -> ((uopIdx ## flowIdx(log2Up(VLENB) - i - 1, 0))(log2Up(maxElemNum) - 1, 0)) 583 ) 584 ) 585 } 586} 587 588/** 589 * GenVLMAX calculates VLMAX, which equals MUL * ew 590 */ 591object GenVLMAXLog2 extends VLSUConstants { 592 def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew 593} 594object GenVLMAX { 595 def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew) 596} 597 598object GenUSWholeRegVL extends VLSUConstants { 599 def apply(nfields: UInt, eew: UInt): UInt = { 600 LookupTree(eew(1, 0), List( 601 "b00".U -> (nfields << (log2Up(VLENB) - 0)), 602 "b01".U -> (nfields << (log2Up(VLENB) - 1)), 603 "b10".U -> (nfields << (log2Up(VLENB) - 2)), 604 "b11".U -> (nfields << (log2Up(VLENB) - 3)) 605 )) 606 } 607} 608object GenUSMaskRegVL extends VLSUConstants { 609 def apply(vl: UInt): UInt = { 610 (vl >> 3.U) 611 } 612} 613 614object GenUopByteMask { 615 def apply(flowMask: UInt, alignedType: UInt): UInt = { 616 LookupTree(alignedType, List( 617 "b00".U -> flowMask, 618 "b01".U -> FillInterleaved(2, flowMask), 619 "b10".U -> FillInterleaved(4, flowMask), 620 "b11".U -> FillInterleaved(8, flowMask) 621 )) 622 } 623} 624 625object GenFlowMaskInsideReg extends VLSUConstants { 626 def apply(alignedType: UInt, elemIdx: UInt): UInt = { 627 LookupTree(alignedType, List( 628 "b00".U -> UIntToOH(elemIdx(3, 0)), 629 "b01".U -> FillInterleaved(2, UIntToOH(elemIdx(2, 0))), 630 "b10".U -> FillInterleaved(4, UIntToOH(elemIdx(1, 0))), 631 "b11".U -> FillInterleaved(8, UIntToOH(elemIdx(0))) 632 )) 633 } 634} 635 636// TODO: delete this in vs flow queue 637object GenEleIdx { 638 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, uopIdx:UInt, flowIdx: UInt):UInt = { 639 val eleIdx = Wire(UInt(7.W)) 640 when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || emul.asSInt > lmul.asSInt) { 641 eleIdx := (uopIdx << Log2Num((MulDataSize(emul) >> eew(1,0)).asUInt)).asUInt + flowIdx 642 }.otherwise { 643 eleIdx := (uopIdx << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx 644 } 645 eleIdx 646 } 647}