1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27import xiangshan.backend.fu.FuType 28 29/** 30 * Common used parameters or functions in vlsu 31 */ 32trait VLSUConstants { 33 val VLEN = 128 34 //for pack unit-stride flow 35 val AlignedNum = 4 // 1/2/4/8 36 def VLENB = VLEN/8 37 def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg 38 lazy val vlmBindexBits = 8 //will be overrided later 39 lazy val vsmBindexBits = 8 // will be overrided later 40 41 def alignTypes = 5 // eew/sew = 1/2/4/8, last indicate 128 bit element 42 def alignTypeBits = log2Up(alignTypes) 43 def maxMUL = 8 44 def maxFields = 8 45 /** 46 * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1, 47 * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total, 48 * each for a field. Therefore an instruction can be divided into 64 uops at most. 49 */ 50 def maxUopNum = maxMUL * maxFields // 64 51 def maxFlowNum = 16 52 def maxElemNum = maxMUL * maxFlowNum // 128 53 // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx 54 def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction 55 def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop 56 def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8 57 58 def ewBits = 3 // bits-width of EEW/SEW 59 def mulBits = 3 // bits-width of emul/lmul 60 61 def getSlice(data: UInt, i: Int, alignBits: Int): UInt = { 62 require(data.getWidth >= (i+1) * alignBits) 63 data((i+1) * alignBits - 1, i * alignBits) 64 } 65 66 def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8) 67 def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16) 68 def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32) 69 def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64) 70 def getDoubleDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 128) 71} 72 73trait HasVLSUParameters extends HasXSParameter with VLSUConstants { 74 override val VLEN = coreParams.VLEN 75 override lazy val vlmBindexBits = log2Up(coreParams.VlMergeBufferSize) 76 override lazy val vsmBindexBits = log2Up(coreParams.VsMergeBufferSize) 77 def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U 78 def isStrided(instType: UInt) = instType(1, 0) === "b10".U 79 def isIndexed(instType: UInt) = instType(0) === "b1".U 80 def isNotIndexed(instType: UInt) = instType(0) === "b0".U 81 def isSegment(instType: UInt) = instType(2) === "b1".U 82 def is128Bit(alignedType: UInt) = alignedType(2) === "b1".U 83 84 def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = { 85 require(oldData.getWidth == newData.getWidth) 86 require(oldData.getWidth == mask.getWidth * 8) 87 VecInit(mask.asBools.zipWithIndex.map { case (en, i) => 88 Mux(en, getByte(newData, i), getByte(oldData, i)) 89 }) 90 } 91 92 // def asBytes(data: UInt) = { 93 // require(data.getWidth % 8 == 0) 94 // (0 until data.getWidth/8).map(i => getByte(data, i)) 95 // } 96 97 def mergeDataWithElemIdx( 98 oldData: UInt, 99 newData: Seq[UInt], 100 alignedType: UInt, 101 elemIdx: Seq[UInt], 102 valids: Seq[Bool] 103 ): UInt = { 104 require(newData.length == elemIdx.length) 105 require(newData.length == valids.length) 106 LookupTree(alignedType, List( 107 "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 108 ParallelPosteriorityMux( 109 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 110 getByte(oldData, i) +: newData.map(getByte(_)) 111 )}).asUInt, 112 "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 113 ParallelPosteriorityMux( 114 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 115 getHalfWord(oldData, i) +: newData.map(getHalfWord(_)) 116 )}).asUInt, 117 "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 118 ParallelPosteriorityMux( 119 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 120 getWord(oldData, i) +: newData.map(getWord(_)) 121 )}).asUInt, 122 "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 123 ParallelPosteriorityMux( 124 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 125 getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_)) 126 )}).asUInt 127 )) 128 } 129 130 def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = { 131 mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B)) 132 } 133 /** 134 * for merge 128-bits data of unit-stride 135 */ 136 object mergeDataByoffset{ 137 def apply(oldData: UInt, newData: Seq[UInt], mask: Seq[UInt], offset: Seq[UInt], valids: Seq[Bool]): UInt = { 138 require(newData.length == valids.length) 139 require(newData.length == offset.length) 140 // if (i>offset[k] && mask[k][i]==1 && valid[k]) -> newData, else -> oldData 141 val selVec = (mask zip offset).map{case (m,e) => 142 ((~UIntToMask(e, VLENB)).asBools.zip(m.asBools).map(x=> x._1 && x._2))}.transpose // vector(3,16) 143 144 VecInit(selVec.zipWithIndex.map{ case (selV, i) => // selV: vector(3,1), 0=<i<16 145 ParallelPosteriorityMux( 146 true.B +: selV.zip(valids).map(x => x._1 && x._2), 147 getByte(oldData, i) +: newData.map(getByte(_)) 148 )}).asUInt 149 } 150 } 151 def mergeDataByoffset(oldData: UInt, newData: UInt, mask: UInt, offset: UInt): UInt = { 152 mergeDataByoffset(oldData, Seq(newData), Seq(mask), Seq(offset), Seq(true.B)) 153 } 154} 155abstract class VLSUModule(implicit p: Parameters) extends XSModule 156 with HasVLSUParameters 157 with HasCircularQueuePtrHelper 158abstract class VLSUBundle(implicit p: Parameters) extends XSBundle 159 with HasVLSUParameters 160 161class VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle { 162 val uop = new DynInst 163} 164 165class OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle { 166 val isvec = Bool() 167 val vecdata = UInt(VLEN.W) 168 val mask = UInt(VLENB.W) 169 // val rob_idx_valid = Vec(2, Bool()) 170 // val inner_idx = Vec(2, UInt(3.W)) 171 // val rob_idx = Vec(2, new RobPtr) 172 // val offset = Vec(2, UInt(4.W)) 173 val reg_offset = UInt(vOffsetBits.W) 174 val vecActive = Bool() // 1: vector active element, 0: vector not active element 175 val is_first_ele = Bool() 176 val elemIdx = UInt(elemIdxBits.W) // element index 177 val elemIdxInsideVd = UInt(elemIdxBits.W) // element index in scope of vd 178 // val uopQueuePtr = new VluopPtr 179 // val flowPtr = new VlflowPtr 180} 181 182class VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 183 val vec = new OnlyVecExuOutput 184 // pack 185 val isPackage = Bool() 186 val packageNum = UInt((log2Up(VLENB) + 1).W) 187 val originAlignedType = UInt(alignTypeBits.W) 188 val alignedType = UInt(alignTypeBits.W) 189 // feedback 190 val vecFeedback = Bool() 191} 192 193// class VecStoreExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 194// val elemIdx = UInt(elemIdxBits.W) 195// val uopQueuePtr = new VsUopPtr 196// val fieldIdx = UInt(fieldBits.W) 197// val segmentIdx = UInt(elemIdxBits.W) 198// val vaddr = UInt(VAddrBits.W) 199// // pack 200// val isPackage = Bool() 201// val packageNum = UInt((log2Up(VLENB) + 1).W) 202// val originAlignedType = UInt(alignTypeBits.W) 203// val alignedType = UInt(alignTypeBits.W) 204// } 205 206class VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 207 val flowMask = UInt(VLENB.W) // each bit for a flow 208 val byteMask = UInt(VLENB.W) // each bit for a byte 209 val data = UInt(VLEN.W) 210 // val fof = Bool() // fof is only used for vector loads 211 val excp_eew_index = UInt(elemIdxBits.W) 212 // val exceptionVec = ExceptionVec() // uop has exceptionVec 213 val baseAddr = UInt(VAddrBits.W) 214 val stride = UInt(VLEN.W) 215 val flow_counter = UInt(flowIdxBits.W) 216 217 // instruction decode result 218 val flowNum = UInt(flowIdxBits.W) // # of flows in a uop 219 // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication 220 val nfields = UInt(fieldBits.W) // NFIELDS 221 val vm = Bool() // whether vector masking is enabled 222 val usWholeReg = Bool() // unit-stride, whole register load 223 val usMaskReg = Bool() // unit-stride, masked store/load 224 val eew = UInt(ewBits.W) // size of memory elements 225 val sew = UInt(ewBits.W) 226 val emul = UInt(mulBits.W) 227 val lmul = UInt(mulBits.W) 228 val vlmax = UInt(elemIdxBits.W) 229 val instType = UInt(3.W) 230 val vd_last_uop = Bool() 231 val vd_first_uop = Bool() 232} 233 234class VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 235 val vaddr = UInt(VAddrBits.W) 236 val mask = UInt(VLENB.W) 237 val alignedType = UInt(alignTypeBits.W) 238 val vecActive = Bool() 239 val elemIdx = UInt(elemIdxBits.W) 240 val is_first_ele = Bool() 241 242 // pack 243 val isPackage = Bool() 244 val packageNum = UInt((log2Up(VLENB) + 1).W) 245 val originAlignedType = UInt(alignTypeBits.W) 246} 247 248class VecMemExuOutput(isVector: Boolean = false)(implicit p: Parameters) extends VLSUBundle{ 249 val output = new MemExuOutput(isVector) 250 val vecFeedback = Bool() 251 val mmio = Bool() 252 val usSecondInv = Bool() 253 val elemIdx = UInt(elemIdxBits.W) 254 val alignedType = UInt(alignTypeBits.W) 255} 256 257object MulNum { 258 def apply (mul: UInt): UInt = { //mul means emul or lmul 259 (LookupTree(mul,List( 260 "b101".U -> 1.U , // 1/8 261 "b110".U -> 1.U , // 1/4 262 "b111".U -> 1.U , // 1/2 263 "b000".U -> 1.U , // 1 264 "b001".U -> 2.U , // 2 265 "b010".U -> 4.U , // 4 266 "b011".U -> 8.U // 8 267 )))} 268} 269/** 270 * when emul is greater than or equal to 1, this means the entire register needs to be written; 271 * otherwise, only write the specified number of bytes */ 272object MulDataSize { 273 def apply (mul: UInt): UInt = { //mul means emul or lmul 274 (LookupTree(mul,List( 275 "b101".U -> 2.U , // 1/8 276 "b110".U -> 4.U , // 1/4 277 "b111".U -> 8.U , // 1/2 278 "b000".U -> 16.U , // 1 279 "b001".U -> 16.U , // 2 280 "b010".U -> 16.U , // 4 281 "b011".U -> 16.U // 8 282 )))} 283} 284 285object OneRegNum { 286 def apply (eew: UInt): UInt = { //mul means emul or lmul 287 (LookupTree(eew,List( 288 "b000".U -> 16.U , // 1 289 "b101".U -> 8.U , // 2 290 "b110".U -> 4.U , // 4 291 "b111".U -> 2.U // 8 292 )))} 293} 294 295//index inst read data byte 296object SewDataSize { 297 def apply (sew: UInt): UInt = { 298 (LookupTree(sew,List( 299 "b000".U -> 1.U , // 1 300 "b001".U -> 2.U , // 2 301 "b010".U -> 4.U , // 4 302 "b011".U -> 8.U // 8 303 )))} 304} 305 306// strided inst read data byte 307object EewDataSize { 308 def apply (eew: UInt): UInt = { 309 (LookupTree(eew,List( 310 "b000".U -> 1.U , // 1 311 "b101".U -> 2.U , // 2 312 "b110".U -> 4.U , // 4 313 "b111".U -> 8.U // 8 314 )))} 315} 316 317object loadDataSize { 318 def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = { 319 (LookupTree(instType,List( 320 "b000".U -> MulDataSize(emul), // unit-stride 321 "b010".U -> EewDataSize(eew) , // strided 322 "b001".U -> SewDataSize(sew) , // indexed-unordered 323 "b011".U -> SewDataSize(sew) , // indexed-ordered 324 "b100".U -> EewDataSize(eew) , // segment unit-stride 325 "b110".U -> EewDataSize(eew) , // segment strided 326 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 327 "b111".U -> SewDataSize(sew) // segment indexed-ordered 328 )))} 329} 330 331object storeDataSize { 332 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 333 (LookupTree(instType,List( 334 "b000".U -> EewDataSize(eew) , // unit-stride, do not use 335 "b010".U -> EewDataSize(eew) , // strided 336 "b001".U -> SewDataSize(sew) , // indexed-unordered 337 "b011".U -> SewDataSize(sew) , // indexed-ordered 338 "b100".U -> EewDataSize(eew) , // segment unit-stride 339 "b110".U -> EewDataSize(eew) , // segment strided 340 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 341 "b111".U -> SewDataSize(sew) // segment indexed-ordered 342 )))} 343} 344 345object GenVecStoreMask { 346 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 347 val mask = Wire(UInt(16.W)) 348 mask := UIntToOH(storeDataSize(instType = instType, eew = eew, sew = sew)) - 1.U 349 mask 350 } 351} 352 353/** 354 * these are used to obtain immediate addresses for index instruction */ 355object EewEq8 { 356 def apply(index:UInt, flow_inner_idx: UInt): UInt = { 357 (LookupTree(flow_inner_idx,List( 358 0.U -> index(7 ,0 ), 359 1.U -> index(15,8 ), 360 2.U -> index(23,16 ), 361 3.U -> index(31,24 ), 362 4.U -> index(39,32 ), 363 5.U -> index(47,40 ), 364 6.U -> index(55,48 ), 365 7.U -> index(63,56 ), 366 8.U -> index(71,64 ), 367 9.U -> index(79,72 ), 368 10.U -> index(87,80 ), 369 11.U -> index(95,88 ), 370 12.U -> index(103,96 ), 371 13.U -> index(111,104), 372 14.U -> index(119,112), 373 15.U -> index(127,120) 374 )))} 375} 376 377object EewEq16 { 378 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 379 (LookupTree(flow_inner_idx, List( 380 0.U -> index(15, 0), 381 1.U -> index(31, 16), 382 2.U -> index(47, 32), 383 3.U -> index(63, 48), 384 4.U -> index(79, 64), 385 5.U -> index(95, 80), 386 6.U -> index(111, 96), 387 7.U -> index(127, 112) 388 )))} 389} 390 391object EewEq32 { 392 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 393 (LookupTree(flow_inner_idx, List( 394 0.U -> index(31, 0), 395 1.U -> index(63, 32), 396 2.U -> index(95, 64), 397 3.U -> index(127, 96) 398 )))} 399} 400 401object EewEq64 { 402 def apply (index: UInt, flow_inner_idx: UInt): UInt = { 403 (LookupTree(flow_inner_idx, List( 404 0.U -> index(63, 0), 405 1.U -> index(127, 64) 406 )))} 407} 408 409object IndexAddr { 410 def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = { 411 (LookupTree(eew,List( 412 "b000".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register 413 "b101".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte 414 "b110".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte 415 "b111".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx ) // Imm is 8 Byte 416 )))} 417} 418 419object Log2Num { 420 def apply (num: UInt): UInt = { 421 (LookupTree(num,List( 422 16.U -> 4.U, 423 8.U -> 3.U, 424 4.U -> 2.U, 425 2.U -> 1.U, 426 1.U -> 0.U 427 )))} 428} 429 430object GenUopIdxInField { 431 def apply (instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 432 val isIndexed = instType(0) 433 val mulInField = Mux( 434 isIndexed, 435 Mux(lmul.asSInt > emul.asSInt, lmul, emul), 436 emul 437 ) 438 LookupTree(mulInField, List( 439 "b101".U -> 0.U, 440 "b110".U -> 0.U, 441 "b111".U -> 0.U, 442 "b000".U -> 0.U, 443 "b001".U -> uopIdx(0), 444 "b010".U -> uopIdx(1, 0), 445 "b011".U -> uopIdx(2, 0) 446 )) 447 } 448} 449 450//eew decode 451object EewLog2 extends VLSUConstants { 452 // def apply (eew: UInt): UInt = { 453 // (LookupTree(eew,List( 454 // "b000".U -> "b000".U , // 1 455 // "b101".U -> "b001".U , // 2 456 // "b110".U -> "b010".U , // 4 457 // "b111".U -> "b011".U // 8 458 // )))} 459 def apply(eew: UInt): UInt = ZeroExt(eew(1, 0), ewBits) 460} 461 462/** 463 * unit-stride instructions don't use this method; 464 * other instructions generate realFlowNum by EmulDataSize >> eew(1,0), 465 * EmulDataSize means the number of bytes that need to be written to the register, 466 * eew(1,0) means the number of bytes written at once*/ 467object GenRealFlowNum { 468 def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = { 469 (LookupTree(instType,List( 470 "b000".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // store use, load do not use 471 "b010".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // strided 472 "b001".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered 473 "b011".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered 474 "b100".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment unit-stride 475 "b110".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment strided 476 "b101".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // segment indexed-unordered 477 "b111".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt) // segment indexed-ordered 478 )))} 479} 480 481/** 482 * GenRealFlowLog2 = Log2(GenRealFlowNum) 483 */ 484object GenRealFlowLog2 extends VLSUConstants { 485 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = { 486 val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul) 487 val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul) 488 val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew(1, 0) 489 val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0) 490 (LookupTree(instType, List( 491 "b000".U -> eewRealFlowLog2, // unit-stride 492 "b010".U -> eewRealFlowLog2, // strided 493 "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered 494 "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered 495 "b100".U -> eewRealFlowLog2, // segment unit-stride 496 "b110".U -> eewRealFlowLog2, // segment strided 497 "b101".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-unordered 498 "b111".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-ordered 499 ))) 500 } 501} 502 503/** 504 * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx 505 * inside the uop. 506 */ 507object GenElemIdx extends VLSUConstants { 508 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, 509 uopIdx: UInt, flowIdx: UInt): UInt = { 510 val isIndexed = instType(0).asBool 511 val eewUopFlowsLog2 = Mux(emul.asSInt > 0.S, 0.U, emul) + log2Up(VLENB).U - eew(1, 0) 512 val sewUopFlowsLog2 = Mux(lmul.asSInt > 0.S, 0.U, lmul) + log2Up(VLENB).U - sew(1, 0) 513 val uopFlowsLog2 = Mux( 514 isIndexed, 515 Mux(emul.asSInt > lmul.asSInt, eewUopFlowsLog2, sewUopFlowsLog2), 516 eewUopFlowsLog2 517 ) 518 LookupTree(uopFlowsLog2, List( 519 0.U -> uopIdx, 520 1.U -> uopIdx ## flowIdx(0), 521 2.U -> uopIdx ## flowIdx(1, 0), 522 3.U -> uopIdx ## flowIdx(2, 0), 523 4.U -> uopIdx ## flowIdx(3, 0) 524 )) 525 } 526} 527 528/** 529 * GenVLMAX calculates VLMAX, which equals MUL * ew 530 */ 531object GenVLMAXLog2 extends VLSUConstants { 532 def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew 533} 534object GenVLMAX { 535 def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew) 536} 537 538object GenUSWholeRegVL extends VLSUConstants { 539 def apply(nfields: UInt, eew: UInt): UInt = { 540 LookupTree(eew(1, 0), List( 541 "b00".U -> (nfields << (log2Up(VLENB) - 0)), 542 "b01".U -> (nfields << (log2Up(VLENB) - 1)), 543 "b10".U -> (nfields << (log2Up(VLENB) - 2)), 544 "b11".U -> (nfields << (log2Up(VLENB) - 3)) 545 )) 546 } 547} 548object GenUSWholeEmul extends VLSUConstants{ 549 def apply(nf: UInt): UInt={ 550 LookupTree(nf,List( 551 "b000".U -> "b000".U(mulBits.W), 552 "b001".U -> "b001".U(mulBits.W), 553 "b011".U -> "b010".U(mulBits.W), 554 "b111".U -> "b011".U(mulBits.W) 555 )) 556 } 557} 558 559 560object GenUSMaskRegVL extends VLSUConstants { 561 def apply(vl: UInt): UInt = { 562 Mux(vl(2,0) === 0.U , (vl >> 3.U), ((vl >> 3.U) + 1.U)) 563 } 564} 565 566object GenUopByteMask { 567 def apply(flowMask: UInt, alignedType: UInt): UInt = { 568 LookupTree(alignedType, List( 569 "b000".U -> flowMask, 570 "b001".U -> FillInterleaved(2, flowMask), 571 "b010".U -> FillInterleaved(4, flowMask), 572 "b011".U -> FillInterleaved(8, flowMask), 573 "b100".U -> FillInterleaved(16, flowMask) 574 )) 575 } 576} 577 578object GenVdIdxInField extends VLSUConstants { 579 def apply(instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 580 val vdIdx = Wire(UInt(log2Up(maxMUL).W)) 581 when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || lmul.asSInt > emul.asSInt) { 582 // Unit-stride or Strided, or indexed with lmul >= emul 583 vdIdx := uopIdx 584 }.otherwise { 585 // Indexed with lmul <= emul 586 val multiple = emul - lmul 587 val uopIdxWidth = uopIdx.getWidth 588 vdIdx := LookupTree(multiple, List( 589 0.U -> uopIdx, 590 1.U -> (uopIdx >> 1), 591 2.U -> (uopIdx >> 2), 592 3.U -> (uopIdx >> 3) 593 )) 594 } 595 vdIdx 596 } 597} 598/** 599* Use start and vl to generate flow activative mask 600* mod = true fill 0 601* mod = false fill 1 602*/ 603object GenFlowMask extends VLSUConstants { 604 def apply(elementMask: UInt, start: UInt, vl: UInt , mod: Boolean): UInt = { 605 val startMask = ~UIntToMask(start, VLEN) 606 val vlMask = UIntToMask(vl, VLEN) 607 val maskVlStart = vlMask & startMask 608 if(mod){ 609 elementMask & maskVlStart 610 } 611 else{ 612 (~elementMask).asUInt & maskVlStart 613 } 614 } 615} 616 617object CheckAligned extends VLSUConstants { 618 def apply(addr: UInt): UInt = { 619 val aligned_16 = (addr(0) === 0.U) // 16-bit 620 val aligned_32 = (addr(1,0) === 0.U) // 32-bit 621 val aligned_64 = (addr(2,0) === 0.U) // 64-bit 622 val aligned_128 = (addr(3,0) === 0.U) // 128-bit 623 Cat(true.B, aligned_16, aligned_32, aligned_64, aligned_128) 624 } 625} 626 627/** 628 search if mask have continue 'len' bit '1' 629 mask: source mask 630 len: search length 631*/ 632object GenPackMask{ 633 def leadX(mask: Seq[Bool], len: Int): Bool = { 634 if(len == 1){ 635 mask.head 636 } 637 else{ 638 leadX(mask.drop(1),len-1) & mask.head 639 } 640 } 641 def leadOneVec(shiftMask: Seq[Bool]): UInt = { 642 // max is 64-bit, so the max num of flow to pack is 8 643 644 val lead1 = leadX(shiftMask, 1) // continue 1 bit 645 val lead2 = leadX(shiftMask, 2) // continue 2 bit 646 val lead4 = leadX(shiftMask, 4) // continue 4 bit 647 val lead8 = leadX(shiftMask, 8) // continue 8 bit 648 val lead16 = leadX(shiftMask, 16) // continue 16 bit 649 Cat(lead1, lead2, lead4, lead8, lead16) 650 } 651 652 def apply(shiftMask: UInt) = { 653 // pack mask 654 val packMask = leadOneVec(shiftMask.asBools) 655 packMask 656 } 657} 658/** 659PackEnable = (LeadXVec >> eew) & alignedVec, where the 0th bit represents the ability to merge into a 64 bit flow, the second bit represents the ability to merge into a 32 bit flow, and so on. 660 661example: 662 addr = 0x0, activeMask = b00011100101111, flowIdx = 0, eew = 0(8-bit) 663 664 step 0 : addrAlignedVec = (1, 1, 1, 1) elemIdxAligned = (1, 1, 1, 1) 665 step 1 : activePackVec = (1, 1, 1, 0), inactivePackVec = (0, 0, 0, 0) 666 step 2 : activePackEnable = (1, 1, 1, 0), inactivePackVec = (0, 0, 0, 0) 667 668 we can package 4 8-bit activative flows into a 32-bit flow. 669*/ 670object GenPackVec extends VLSUConstants{ 671 def apply(addr: UInt, shiftMask: UInt, eew: UInt, elemIdx: UInt): UInt = { 672 val addrAlignedVec = CheckAligned(addr) 673 val elemIdxAligned = CheckAligned(elemIdx) 674 val packMask = GenPackMask(shiftMask) 675 // generate packVec 676 val packVec = addrAlignedVec & elemIdxAligned & (packMask.asUInt >> eew) 677 678 packVec 679 } 680} 681 682object GenPackAlignedType extends VLSUConstants{ 683 def apply(packVec: UInt): UInt = { 684 val packAlignedType = PriorityMux(Seq( 685 packVec(0) -> "b100".U, 686 packVec(1) -> "b011".U, 687 packVec(2) -> "b010".U, 688 packVec(3) -> "b001".U, 689 packVec(4) -> "b000".U 690 )) 691 packAlignedType 692 } 693} 694 695object GenPackNum extends VLSUConstants{ 696 def apply(alignedType: UInt, packAlignedType: UInt): UInt = { 697 (1.U << (packAlignedType - alignedType)).asUInt 698 } 699} 700 701object genVWmask128 { 702 def apply(addr: UInt, sizeEncode: UInt): UInt = { 703 (LookupTree(sizeEncode, List( 704 "b000".U -> 0x1.U, //0001 << addr(2:0) 705 "b001".U -> 0x3.U, //0011 706 "b010".U -> 0xf.U, //1111 707 "b011".U -> 0xff.U, //11111111 708 "b100".U -> 0xffff.U //1111111111111111 709 )) << addr(3, 0)).asUInt 710 } 711} 712/* 713* only use in max length is 128 714*/ 715object genVWdata { 716 def apply(data: UInt, sizeEncode: UInt): UInt = { 717 LookupTree(sizeEncode, List( 718 "b000".U -> Fill(16, data(7, 0)), 719 "b001".U -> Fill(8, data(15, 0)), 720 "b010".U -> Fill(4, data(31, 0)), 721 "b011".U -> Fill(2, data(63,0)), 722 "b100".U -> data(127,0) 723 )) 724 } 725} 726 727object genUSSplitAddr{ 728 def apply(addr: UInt, index: UInt): UInt = { 729 val tmpAddr = Cat(addr(38, 4), 0.U(4.W)) 730 val nextCacheline = tmpAddr + 16.U 731 LookupTree(index, List( 732 0.U -> tmpAddr, 733 1.U -> nextCacheline 734 )) 735 } 736} 737 738object genUSSplitMask{ 739 def apply(mask: UInt, index: UInt, addrOffset: UInt): UInt = { 740 val tmpMask = Cat(0.U(16.W),mask) << addrOffset // 32-bits 741 LookupTree(index, List( 742 0.U -> tmpMask(15, 0), 743 1.U -> tmpMask(31, 16), 744 )) 745 } 746} 747 748object genUSSplitData{ 749 def apply(data: UInt, index: UInt, addrOffset: UInt): UInt = { 750 val tmpData = WireInit(0.U(256.W)) 751 val lookupTable = (0 until 16).map{case i => 752 if(i == 0){ 753 i.U -> Cat(0.U(128.W), data) 754 }else{ 755 i.U -> Cat(0.U(((16-i)*8).W), data, 0.U((i*8).W)) 756 } 757 } 758 tmpData := LookupTree(addrOffset, lookupTable).asUInt 759 760 LookupTree(index, List( 761 0.U -> tmpData(127, 0), 762 1.U -> tmpData(255, 128) 763 )) 764 } 765} 766/** 767 * generate offset in Vd of flows, only used in Unit-Stride 768 * */ 769object genVdOffset{ 770 def apply(offset: UInt, index: UInt): UInt = { 771 LookupTree(index, List( 772 0.U -> 0.U, 773 1.U -> ((~offset).asUInt + 1.U) 774 )) 775 } 776} 777 778object GenVSData extends VLSUConstants { 779 def apply(data: UInt, elemIdx: UInt, alignedType: UInt): UInt = { 780 LookupTree(alignedType, List( 781 "b000".U -> ZeroExt(LookupTree(elemIdx(3, 0), List.tabulate(VLEN/8)(i => i.U -> getByte(data, i))), VLEN), 782 "b001".U -> ZeroExt(LookupTree(elemIdx(2, 0), List.tabulate(VLEN/16)(i => i.U -> getHalfWord(data, i))), VLEN), 783 "b010".U -> ZeroExt(LookupTree(elemIdx(1, 0), List.tabulate(VLEN/32)(i => i.U -> getWord(data, i))), VLEN), 784 "b011".U -> ZeroExt(LookupTree(elemIdx(0), List.tabulate(VLEN/64)(i => i.U -> getDoubleWord(data, i))), VLEN), 785 "b100".U -> data // if have wider element, it will broken 786 )) 787 } 788}