1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27 28/** 29 * Common used parameters or functions in vlsu 30 */ 31trait VLSUConstants { 32 val VLEN = 128 33 def VLENB = VLEN/8 34 def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg 35 36 def alignTypes = 4 // eew/sew = 1/2/4/8 37 def alignTypeBits = log2Up(alignTypes) 38 def maxMUL = 8 39 def maxFields = 8 40 /** 41 * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1, 42 * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total, 43 * each for a field. Therefore an instruction can be divided into 64 uops at most. 44 */ 45 def maxUopNum = maxMUL * maxFields // 64 46 def maxFlowNum = 16 47 def maxElemNum = maxMUL * maxFlowNum // 128 48 // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx 49 def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction 50 def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop 51 def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8 52 53 def ewBits = 3 // bits-width of EEW/SEW 54 def mulBits = 3 // bits-width of emul/lmul 55 56 def getSlice(data: UInt, i: Int, alignBits: Int): UInt = { 57 require(data.getWidth >= (i+1) * alignBits) 58 data((i+1) * alignBits - 1, i * alignBits) 59 } 60 61 def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8) 62 def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16) 63 def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32) 64 def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64) 65} 66 67trait HasVLSUParameters extends HasXSParameter with VLSUConstants { 68 override val VLEN = coreParams.VLEN 69 def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U 70 def isStrided(instType: UInt) = instType(1, 0) === "b10".U 71 def isIndexed(instType: UInt) = instType(0) === "b1".U 72 def isNotIndexed(instType: UInt) = instType(0) === "b0".U 73 def isSegment(instType: UInt) = instType(2) === "b1".U 74 75 def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = { 76 require(oldData.getWidth == newData.getWidth) 77 require(oldData.getWidth == mask.getWidth * 8) 78 VecInit(mask.asBools.zipWithIndex.map { case (en, i) => 79 Mux(en, getByte(newData, i), getByte(oldData, i)) 80 }) 81 } 82 83 // def asBytes(data: UInt) = { 84 // require(data.getWidth % 8 == 0) 85 // (0 until data.getWidth/8).map(i => getByte(data, i)) 86 // } 87 88 def mergeDataWithElemIdx( 89 oldData: UInt, 90 newData: Seq[UInt], 91 alignedType: UInt, 92 elemIdx: Seq[UInt], 93 valids: Seq[Bool] 94 ): UInt = { 95 require(newData.length == elemIdx.length) 96 require(newData.length == valids.length) 97 LookupTree(alignedType, List( 98 "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 99 ParallelPosteriorityMux( 100 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 101 getByte(oldData, i) +: newData.map(getByte(_)) 102 )}).asUInt, 103 "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 104 ParallelPosteriorityMux( 105 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 106 getHalfWord(oldData, i) +: newData.map(getHalfWord(_)) 107 )}).asUInt, 108 "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 109 ParallelPosteriorityMux( 110 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 111 getWord(oldData, i) +: newData.map(getWord(_)) 112 )}).asUInt, 113 "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 114 ParallelPosteriorityMux( 115 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 116 getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_)) 117 )}).asUInt 118 )) 119 } 120 121 def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = { 122 mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B)) 123 } 124} 125abstract class VLSUModule(implicit p: Parameters) extends XSModule 126 with HasVLSUParameters 127 with HasCircularQueuePtrHelper 128abstract class VLSUBundle(implicit p: Parameters) extends XSBundle 129 with HasVLSUParameters 130 131class VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle { 132 val uop = new DynInst 133} 134 135// Where is VecOperand used? 136class VecOperand(implicit p: Parameters) extends VLSUBundleWithMicroOp { 137 val vmask = UInt(VLEN.W) // the mask of inst which is readed from reg 138 val vecData = UInt(VLEN.W) 139 val baseAddr = UInt(VAddrBits.W) // base address from rs1 140 val stride = UInt(XLEN.W) // stride from rs2 141 val index = UInt(VLEN.W) // index from vs2 142 val pvd = UInt(5.W) // physical vector register destination 143 val lmul = UInt(3.W) 144 val sew = UInt(2.W) 145 val vma = Bool() 146 val vta = Bool() 147 val inner_idx = UInt(3.W) // the number index among 8 uop 148 val vl = UInt(8.W) 149 // TODO: How will OOO calculatr vector register numbers? 150 // (EEW / SEW) * LMUL or (vl * EEW) / VLEN ? 151 // So OOO will always use eew ? 152 // val eew = UInt(3.W) 153 val total_num = UInt(3.W) // An inst to how many uops 154} 155 156class VecDecode(implicit p: Parameters) extends VLSUBundle { 157 val uop_segment_num = UInt(3.W) 158 val uop_type = UInt(2.W) 159 val mask_en = Bool() 160 val uop_unit_stride_whole_reg = Bool() 161 val uop_unit_stride_mask = Bool() 162 val uop_unit_stride_fof = Bool() 163 val uop_eew = UInt(ewBits.W) // this is also the index width when the inst is a index load 164 165 def apply(inst: UInt) = { 166 this.uop_segment_num := inst(31, 29) 167 this.uop_type := inst(27, 26) 168 this.mask_en := inst(25) 169 this.uop_unit_stride_whole_reg := (inst(24,20) === "b01000".U) 170 this.uop_unit_stride_mask := (inst(24,20) === "b01011".U) 171 this.uop_unit_stride_fof := (inst(24,20) === "b10000".U) 172 this.uop_eew := inst(12 + ewBits - 1, 12) 173 this 174 } 175 176 def isUnitStride = uop_type === "b00".U 177 def isStrided = uop_type === "b10".U 178 def isIndexed = uop_type(0) === "b1".U 179} 180 181class OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle { 182 val isvec = Bool() 183 val vecdata = UInt(VLEN.W) 184 val mask = UInt(VLENB.W) 185 // val rob_idx_valid = Vec(2, Bool()) 186 // val inner_idx = Vec(2, UInt(3.W)) 187 // val rob_idx = Vec(2, new RobPtr) 188 // val offset = Vec(2, UInt(4.W)) 189 val reg_offset = UInt(vOffsetBits.W) 190 val exp = Bool() 191 val is_first_ele = Bool() 192 val elemIdx = UInt(elemIdxBits.W) // element index 193 val uopQueuePtr = new VluopPtr 194 val flowPtr = new VlflowPtr 195} 196 197class VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 198 val vec = new OnlyVecExuOutput 199} 200 201class VecStoreExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 202 val elemIdx = UInt(elemIdxBits.W) 203 val uopQueuePtr = new VsUopPtr 204} 205 206class VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 207 val flowMask = UInt(VLENB.W) // each bit for a flow 208 val byteMask = UInt(VLENB.W) // each bit for a byte 209 val data = UInt(VLEN.W) 210 // val fof = Bool() // fof is only used for vector loads 211 val excp_eew_index = UInt(elemIdxBits.W) 212 // val exceptionVec = ExceptionVec() // uop has exceptionVec 213 val baseAddr = UInt(VAddrBits.W) 214 val stride = UInt(VLEN.W) 215 val flow_counter = UInt(flowIdxBits.W) 216 217 // instruction decode result 218 val flowNum = UInt(flowIdxBits.W) // # of flows in a uop 219 // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication 220 val nfields = UInt(fieldBits.W) // NFIELDS 221 val vm = Bool() // whether vector masking is enabled 222 val usWholeReg = Bool() // unit-stride, whole register load 223 val usMaskReg = Bool() // unit-stride, masked store/load 224 val eew = UInt(ewBits.W) // size of memory elements 225 val sew = UInt(ewBits.W) 226 val emul = UInt(mulBits.W) 227 val lmul = UInt(mulBits.W) 228 val vlmax = UInt(elemIdxBits.W) 229 val instType = UInt(3.W) 230 val vd_last_uop = Bool() 231 val vd_first_uop = Bool() 232} 233 234class VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 235 val vaddr = UInt(VAddrBits.W) 236 val mask = UInt(VLENB.W) 237 val alignedType = UInt(alignTypeBits.W) 238 val exp = Bool() 239 val elemIdx = UInt(elemIdxBits.W) 240 val is_first_ele = Bool() 241} 242 243object MulNum { 244 def apply (mul: UInt): UInt = { //mul means emul or lmul 245 (LookupTree(mul,List( 246 "b101".U -> 1.U , // 1/8 247 "b110".U -> 1.U , // 1/4 248 "b111".U -> 1.U , // 1/2 249 "b000".U -> 1.U , // 1 250 "b001".U -> 2.U , // 2 251 "b010".U -> 4.U , // 4 252 "b011".U -> 8.U // 8 253 )))} 254} 255/** 256 * when emul is greater than or equal to 1, this means the entire register needs to be written; 257 * otherwise, only write the specified number of bytes */ 258object MulDataSize { 259 def apply (mul: UInt): UInt = { //mul means emul or lmul 260 (LookupTree(mul,List( 261 "b101".U -> 2.U , // 1/8 262 "b110".U -> 4.U , // 1/4 263 "b111".U -> 8.U , // 1/2 264 "b000".U -> 16.U , // 1 265 "b001".U -> 16.U , // 2 266 "b010".U -> 16.U , // 4 267 "b011".U -> 16.U // 8 268 )))} 269} 270 271object OneRegNum { 272 def apply (eew: UInt): UInt = { //mul means emul or lmul 273 (LookupTree(eew,List( 274 "b000".U -> 16.U , // 1 275 "b101".U -> 8.U , // 2 276 "b110".U -> 4.U , // 4 277 "b111".U -> 2.U // 8 278 )))} 279} 280 281//index inst read data byte 282object SewDataSize { 283 def apply (sew: UInt): UInt = { 284 (LookupTree(sew,List( 285 "b000".U -> 1.U , // 1 286 "b001".U -> 2.U , // 2 287 "b010".U -> 4.U , // 4 288 "b011".U -> 8.U // 8 289 )))} 290} 291 292// strided inst read data byte 293object EewDataSize { 294 def apply (eew: UInt): UInt = { 295 (LookupTree(eew,List( 296 "b000".U -> 1.U , // 1 297 "b101".U -> 2.U , // 2 298 "b110".U -> 4.U , // 4 299 "b111".U -> 8.U // 8 300 )))} 301} 302 303object loadDataSize { 304 def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = { 305 (LookupTree(instType,List( 306 "b000".U -> MulDataSize(emul), // unit-stride 307 "b010".U -> EewDataSize(eew) , // strided 308 "b001".U -> SewDataSize(sew) , // indexed-unordered 309 "b011".U -> SewDataSize(sew) , // indexed-ordered 310 "b100".U -> EewDataSize(eew) , // segment unit-stride 311 "b110".U -> EewDataSize(eew) , // segment strided 312 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 313 "b111".U -> SewDataSize(sew) // segment indexed-ordered 314 )))} 315} 316 317// object GenVecLoadMask extends VLSUConstants { 318// def apply(alignedType: UInt, vaddr: UInt): UInt = { 319// LookupTree(alignedType, List( 320// "b00".U -> 0x1.U, // b1 321// "b01".U -> 0x3.U, // b11 322// "b10".U -> 0xf.U, // b1111 323// "b11".U -> 0xff.U // b11111111 324// )) << vaddr(vOffsetBits - 1, 0) 325// } 326// } 327 328object storeDataSize { 329 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 330 (LookupTree(instType,List( 331 "b000".U -> EewDataSize(eew) , // unit-stride, do not use 332 "b010".U -> EewDataSize(eew) , // strided 333 "b001".U -> SewDataSize(sew) , // indexed-unordered 334 "b011".U -> SewDataSize(sew) , // indexed-ordered 335 "b100".U -> EewDataSize(eew) , // segment unit-stride 336 "b110".U -> EewDataSize(eew) , // segment strided 337 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 338 "b111".U -> SewDataSize(sew) // segment indexed-ordered 339 )))} 340} 341 342object GenVecStoreMask { 343 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 344 val mask = Wire(UInt(16.W)) 345 mask := UIntToOH(storeDataSize(instType = instType, eew = eew, sew = sew)) - 1.U 346 mask 347 } 348} 349 350/** 351 * these are used to obtain immediate addresses for index instruction */ 352object EewEq8 { 353 def apply(index:UInt, flow_inner_idx: UInt): UInt = { 354 (LookupTree(flow_inner_idx,List( 355 0.U -> index(7 ,0 ), 356 1.U -> index(15,8 ), 357 2.U -> index(23,16 ), 358 3.U -> index(31,24 ), 359 4.U -> index(39,32 ), 360 5.U -> index(47,40 ), 361 6.U -> index(55,48 ), 362 7.U -> index(63,56 ), 363 8.U -> index(71,64 ), 364 9.U -> index(79,72 ), 365 10.U -> index(87,80 ), 366 11.U -> index(95,88 ), 367 12.U -> index(103,96 ), 368 13.U -> index(111,104), 369 14.U -> index(119,112), 370 15.U -> index(127,120) 371 )))} 372} 373 374object EewEq16 { 375 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 376 (LookupTree(flow_inner_idx, List( 377 0.U -> index(15, 0), 378 1.U -> index(31, 16), 379 2.U -> index(47, 32), 380 3.U -> index(63, 48), 381 4.U -> index(79, 64), 382 5.U -> index(95, 80), 383 6.U -> index(111, 96), 384 7.U -> index(127, 112) 385 )))} 386} 387 388object EewEq32 { 389 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 390 (LookupTree(flow_inner_idx, List( 391 0.U -> index(31, 0), 392 1.U -> index(63, 32), 393 2.U -> index(95, 64), 394 3.U -> index(127, 96) 395 )))} 396} 397 398object EewEq64 { 399 def apply (index: UInt, flow_inner_idx: UInt): UInt = { 400 (LookupTree(flow_inner_idx, List( 401 0.U -> index(63, 0), 402 1.U -> index(127, 64) 403 )))} 404} 405 406object IndexAddr { 407 def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = { 408 (LookupTree(eew,List( 409 "b000".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register 410 "b101".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte 411 "b110".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte 412 "b111".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx ) // Imm is 8 Byte 413 )))} 414} 415/* 416object RegFLowCnt { 417 def apply (emul: UInt, lmul:UInt, eew: UInt, uopIdx: UInt, flowIdx: UInt): UInt = { 418 419 (LookupTree(Cat(emul,lmul),List( 420 "b001000".U -> ((uopIdx(0 ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 2,lmul = 1 421 "b010000".U -> ((uopIdx(1,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 4,lmul = 1 422 "b011000".U -> ((uopIdx(2,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 8,lmul = 1 423 "b010001".U -> ((uopIdx(0 ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 4,lmul = 2 424 "b011001".U -> ((uopIdx(1,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 8,lmul = 2 425 "b011010".U -> ((uopIdx(0 ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx) //emul = 8,lmul = 4 426 )))} 427} 428 429object AddrFLowCnt { 430 def apply (emul: UInt, lmul:UInt, sew:UInt, uopIdx: UInt, flowIdx: UInt):UInt = { 431 (LookupTree(Cat(lmul,emul),List( 432 "b001000".U -> ((uopIdx(0 ) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 2, emul = 1 433 "b010000".U -> ((uopIdx(1,0) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 4, emul = 1 434 "b011000".U -> ((uopIdx(2,0) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 8, emul = 1 435 "b010001".U -> ((uopIdx(0 ) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 4, emul = 2 436 "b011001".U -> ((uopIdx(1,0) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 8, emul = 2 437 "b011011".U -> ((uopIdx(0 ) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx) //lmul = 8, emul = 4 438 )))} 439} 440*/ 441 442object RegFLowCnt { 443 def apply (emulNum: UInt, lmulNum:UInt, eew: UInt, uopIdx: UInt, flowIdx: UInt):UInt = { 444 (LookupTree(emulNum/lmulNum,List( 445 //"d1".U -> flowIdx, 446 "d2".U -> ((uopIdx(0 ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx), 447 "d4".U -> ((uopIdx(1,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx), 448 "d8".U -> ((uopIdx(2,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx) 449 )))} 450} 451 452object AddrFLowCnt { 453 def apply (emulNum: UInt, lmulNum:UInt, sew:UInt, uopIdx: UInt, flowIdx: UInt):UInt = { 454 (LookupTree(lmulNum/emulNum,List( 455 "d1".U -> flowIdx, 456 "d2".U -> ((uopIdx(0 ) << Log2Num((16.U >> sew(1,0)).asUInt)).asUInt + flowIdx), 457 "d4".U -> ((uopIdx(1,0) << Log2Num((16.U >> sew(1,0)).asUInt)).asUInt + flowIdx), 458 "d8".U -> ((uopIdx(2,0) << Log2Num((16.U >> sew(1,0)).asUInt)).asUInt + flowIdx) 459 )))} 460} 461 462 463object Log2Num { 464 def apply (num: UInt): UInt = { 465 (LookupTree(num,List( 466 16.U -> 4.U, 467 8.U -> 3.U, 468 4.U -> 2.U, 469 2.U -> 1.U, 470 1.U -> 0.U 471 )))} 472} 473 474/** 475 * when emul is less than or equal to 1, the nf is equal to uopIdx; 476 * when emul is equal to 2, the nf is equal to uopIdx >> 1, and so on*/ 477object GenSegNfIdx { 478 def apply (mul: UInt, uopIdx: UInt):UInt = { // mul means lmul or emul 479 (LookupTree(mul,List( 480 "b101".U -> uopIdx, // 1/8 481 "b110".U -> uopIdx, // 1/4 482 "b111".U -> uopIdx, // 1/2 483 "b000".U -> uopIdx, // 1 484 "b001".U -> (uopIdx >> 1.U), // 2 485 "b010".U -> (uopIdx >> 2.U), // 4 486 "b011".U -> (uopIdx >> 3.U), // 8 487 )))} 488} 489 490object GenUopIdxInField { 491 def apply (instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 492 val isIndexed = instType(0) 493 val mulInField = Mux( 494 isIndexed, 495 Mux(lmul.asSInt > emul.asSInt, lmul, emul), 496 emul 497 ) 498 LookupTree(mulInField, List( 499 "b101".U -> 0.U, 500 "b110".U -> 0.U, 501 "b111".U -> 0.U, 502 "b000".U -> 0.U, 503 "b001".U -> uopIdx(0), 504 "b010".U -> uopIdx(1, 0), 505 "b011".U -> uopIdx(2, 0) 506 )) 507 } 508} 509 510object GenSegNfIdxMul { 511 def apply (emul: UInt, lmul: UInt, uopIdx: UInt):UInt = { 512 (LookupTree(Cat(emul,lmul),List( 513 "b001000".U -> uopIdx(5,1), //emul = 2,lmul = 1 514 "b010000".U -> uopIdx(5,2), //emul = 4,lmul = 1 515 "b011000".U -> uopIdx(5,3), //emul = 8,lmul = 1 516 "b010001".U -> uopIdx(5,3), //emul = 4,lmul = 2 517 "b011001".U -> uopIdx(5,4), //emul = 8,lmul = 2 518 "b011010".U -> uopIdx(5,5) //emul = 8,lmul = 4 519 )))} 520} 521 522/** 523 * when emul is less than or equal to 1, only one segEmulIdx, so the segEmulIdx is 0.U; 524 * when emul is equal to 2, the segEmulIdx is equal to uopIdx(0), and so on*/ 525object GenSegMulIdx { 526 def apply (mul: UInt, uopIdx: UInt): UInt = { //mul means emul or lmul 527 (LookupTree(mul,List( 528 "b101".U -> 0.U , // 1/8 529 "b110".U -> 0.U , // 1/4 530 "b111".U -> 0.U , // 1/2 531 "b000".U -> 0.U , // 1 532 "b001".U -> uopIdx(0) , // 2 533 "b010".U -> uopIdx(1,0), // 4 534 "b011".U -> uopIdx(2,0) //8 535 )))} 536} 537 538//eew decode 539object EewLog2 extends VLSUConstants { 540 // def apply (eew: UInt): UInt = { 541 // (LookupTree(eew,List( 542 // "b000".U -> "b000".U , // 1 543 // "b101".U -> "b001".U , // 2 544 // "b110".U -> "b010".U , // 4 545 // "b111".U -> "b011".U // 8 546 // )))} 547 def apply(eew: UInt): UInt = ZeroExt(eew(1, 0), ewBits) 548} 549 550/** 551 * unit-stride instructions don't use this method; 552 * other instructions generate realFlowNum by EmulDataSize >> eew(1,0), 553 * EmulDataSize means the number of bytes that need to be written to the register, 554 * eew(1,0) means the number of bytes written at once*/ 555object GenRealFlowNum { 556 def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = { 557 (LookupTree(instType,List( 558 "b000".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // store use, load do not use 559 "b010".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // strided 560 "b001".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered 561 "b011".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered 562 "b100".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment unit-stride 563 "b110".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment strided 564 "b101".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // segment indexed-unordered 565 "b111".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt) // segment indexed-ordered 566 )))} 567} 568 569/** 570 * GenRealFlowLog2 = Log2(GenRealFlowNum) 571 */ 572object GenRealFlowLog2 extends VLSUConstants { 573 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = { 574 val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul) 575 val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul) 576 val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew(1, 0) 577 val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0) 578 (LookupTree(instType, List( 579 "b000".U -> eewRealFlowLog2, // unit-stride 580 "b010".U -> eewRealFlowLog2, // strided 581 "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered 582 "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered 583 "b100".U -> eewRealFlowLog2, // segment unit-stride 584 "b110".U -> eewRealFlowLog2, // segment strided 585 "b101".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-unordered 586 "b111".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-ordered 587 ))) 588 } 589} 590 591/** 592 * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx 593 * inside the uop. 594 * 595 * eew = 0, elemIdx = uopIdx ## flowIdx(3, 0) 596 * eew = 1, elemIdx = uopIdx ## flowIdx(2, 0) 597 * eew = 2, elemIdx = uopIdx ## flowIdx(1, 0) 598 * eew = 3, elemIdx = uopIdx ## flowIdx(0) 599 */ 600object GenElemIdx extends VLSUConstants { 601 def apply(alignedType: UInt, uopIdx: UInt, flowIdx: UInt): UInt = { 602 LookupTree( 603 alignedType, 604 (0 until alignTypes).map(i => 605 i.U -> ((uopIdx ## flowIdx(log2Up(VLENB) - i - 1, 0))(log2Up(maxElemNum) - 1, 0)) 606 ) 607 ) 608 } 609} 610 611/** 612 * GenVLMAX calculates VLMAX, which equals MUL * ew 613 */ 614object GenVLMAXLog2 extends VLSUConstants { 615 def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew 616} 617object GenVLMAX { 618 def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew) 619} 620 621object GenUSWholeRegVL extends VLSUConstants { 622 def apply(nfields: UInt, eew: UInt): UInt = { 623 LookupTree(eew(1, 0), List( 624 "b00".U -> (nfields << (log2Up(VLENB) - 0)), 625 "b01".U -> (nfields << (log2Up(VLENB) - 1)), 626 "b10".U -> (nfields << (log2Up(VLENB) - 2)), 627 "b11".U -> (nfields << (log2Up(VLENB) - 3)) 628 )) 629 } 630} 631object GenUSWholeEmul extends VLSUConstants{ 632 def apply(nf: UInt): UInt={ 633 LookupTree(nf,List( 634 "b000".U -> "b000".U(mulBits.W), 635 "b001".U -> "b001".U(mulBits.W), 636 "b011".U -> "b010".U(mulBits.W), 637 "b111".U -> "b011".U(mulBits.W) 638 )) 639 } 640} 641 642 643object GenUSMaskRegVL extends VLSUConstants { 644 def apply(vl: UInt): UInt = { 645 (vl >> 3.U) 646 } 647} 648 649object GenUopByteMask { 650 def apply(flowMask: UInt, alignedType: UInt): UInt = { 651 LookupTree(alignedType, List( 652 "b00".U -> flowMask, 653 "b01".U -> FillInterleaved(2, flowMask), 654 "b10".U -> FillInterleaved(4, flowMask), 655 "b11".U -> FillInterleaved(8, flowMask) 656 )) 657 } 658} 659 660object GenFlowMaskInsideReg extends VLSUConstants { 661 def apply(alignedType: UInt, elemIdx: UInt): UInt = { 662 LookupTree(alignedType, List( 663 "b00".U -> UIntToOH(elemIdx(3, 0)), 664 "b01".U -> FillInterleaved(2, UIntToOH(elemIdx(2, 0))), 665 "b10".U -> FillInterleaved(4, UIntToOH(elemIdx(1, 0))), 666 "b11".U -> FillInterleaved(8, UIntToOH(elemIdx(0))) 667 )) 668 } 669} 670 671// TODO: delete this in vs flow queue 672object GenEleIdx { 673 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, uopIdx: UInt, flowIdx: UInt): UInt = { 674 val eleIdx = Wire(UInt(7.W)) 675 when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || emul.asSInt > lmul.asSInt) { 676 eleIdx := (uopIdx << Log2Num((MulDataSize(emul) >> eew(1,0)).asUInt)).asUInt + flowIdx 677 }.otherwise { 678 eleIdx := (uopIdx << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx 679 } 680 eleIdx 681 } 682} 683 684object GenVdIdxInField extends VLSUConstants { 685 def apply(instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 686 val vdIdx = Wire(UInt(log2Up(maxMUL).W)) 687 when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || lmul.asSInt > emul.asSInt) { 688 // Unit-stride or Strided, or indexed with lmul >= emul 689 vdIdx := uopIdx 690 }.otherwise { 691 // Indexed with lmul <= emul 692 val multiple = emul - lmul 693 val uopIdxWidth = uopIdx.getWidth 694 vdIdx := LookupTree(multiple, List( 695 0.U -> uopIdx, 696 1.U -> (uopIdx >> 1), 697 2.U -> (uopIdx >> 2), 698 3.U -> (uopIdx >> 3) 699 )) 700 } 701 vdIdx 702 } 703} 704 705object GenFieldMask { 706 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = { 707 val isSegment = instType(2) 708 val isIndexed = instType(0) 709 val alignedType = Mux(isIndexed, sew(1, 0), eew(1, 0)) 710 val mul = Mux(isIndexed, lmul, emul) 711 val vlmaxMask = GenVLMAX(lmul, sew) - 1.U 712 val mulMask = LookupTree(alignedType, List( 713 "b00".U -> "b01111".U, 714 "b01".U -> "b00111".U, 715 "b10".U -> "b00011".U, 716 "b11".U -> "b00001".U 717 )) 718 Mux( 719 !isSegment || mul.asSInt >= 0.S, 720 vlmaxMask, 721 mulMask 722 ) 723 } 724}