1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27 28/** 29 * Common used parameters or functions in vlsu 30 */ 31trait VLSUConstants { 32 val VLEN = 128 33 //for pack unit-stride flow 34 val AlignedNum = 4 // 1/2/4/8 35 def VLENB = VLEN/8 36 def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg 37 38 def alignTypes = 5 // eew/sew = 1/2/4/8, last indicate 128 bit element 39 def alignTypeBits = log2Up(alignTypes) 40 def maxMUL = 8 41 def maxFields = 8 42 /** 43 * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1, 44 * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total, 45 * each for a field. Therefore an instruction can be divided into 64 uops at most. 46 */ 47 def maxUopNum = maxMUL * maxFields // 64 48 def maxFlowNum = 16 49 def maxElemNum = maxMUL * maxFlowNum // 128 50 // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx 51 def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction 52 def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop 53 def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8 54 55 def ewBits = 3 // bits-width of EEW/SEW 56 def mulBits = 3 // bits-width of emul/lmul 57 58 def getSlice(data: UInt, i: Int, alignBits: Int): UInt = { 59 require(data.getWidth >= (i+1) * alignBits) 60 data((i+1) * alignBits - 1, i * alignBits) 61 } 62 63 def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8) 64 def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16) 65 def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32) 66 def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64) 67 def getDoubleDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 128) 68} 69 70trait HasVLSUParameters extends HasXSParameter with VLSUConstants { 71 override val VLEN = coreParams.VLEN 72 def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U 73 def isStrided(instType: UInt) = instType(1, 0) === "b10".U 74 def isIndexed(instType: UInt) = instType(0) === "b1".U 75 def isNotIndexed(instType: UInt) = instType(0) === "b0".U 76 def isSegment(instType: UInt) = instType(2) === "b1".U 77 def is128Bit(alignedType: UInt) = alignedType(2) === "b1".U 78 79 def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = { 80 require(oldData.getWidth == newData.getWidth) 81 require(oldData.getWidth == mask.getWidth * 8) 82 VecInit(mask.asBools.zipWithIndex.map { case (en, i) => 83 Mux(en, getByte(newData, i), getByte(oldData, i)) 84 }) 85 } 86 87 // def asBytes(data: UInt) = { 88 // require(data.getWidth % 8 == 0) 89 // (0 until data.getWidth/8).map(i => getByte(data, i)) 90 // } 91 92 def mergeDataWithElemIdx( 93 oldData: UInt, 94 newData: Seq[UInt], 95 alignedType: UInt, 96 elemIdx: Seq[UInt], 97 valids: Seq[Bool] 98 ): UInt = { 99 require(newData.length == elemIdx.length) 100 require(newData.length == valids.length) 101 LookupTree(alignedType, List( 102 "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 103 ParallelPosteriorityMux( 104 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 105 getByte(oldData, i) +: newData.map(getByte(_)) 106 )}).asUInt, 107 "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 108 ParallelPosteriorityMux( 109 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 110 getHalfWord(oldData, i) +: newData.map(getHalfWord(_)) 111 )}).asUInt, 112 "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 113 ParallelPosteriorityMux( 114 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 115 getWord(oldData, i) +: newData.map(getWord(_)) 116 )}).asUInt, 117 "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 118 ParallelPosteriorityMux( 119 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 120 getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_)) 121 )}).asUInt 122 )) 123 } 124 125 def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = { 126 mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B)) 127 } 128} 129abstract class VLSUModule(implicit p: Parameters) extends XSModule 130 with HasVLSUParameters 131 with HasCircularQueuePtrHelper 132abstract class VLSUBundle(implicit p: Parameters) extends XSBundle 133 with HasVLSUParameters 134 135class VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle { 136 val uop = new DynInst 137} 138 139class OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle { 140 val isvec = Bool() 141 val vecdata = UInt(VLEN.W) 142 val mask = UInt(VLENB.W) 143 // val rob_idx_valid = Vec(2, Bool()) 144 // val inner_idx = Vec(2, UInt(3.W)) 145 // val rob_idx = Vec(2, new RobPtr) 146 // val offset = Vec(2, UInt(4.W)) 147 val reg_offset = UInt(vOffsetBits.W) 148 val vecActive = Bool() // 1: vector active element, 0: vector not active element 149 val is_first_ele = Bool() 150 val elemIdx = UInt(elemIdxBits.W) // element index 151 val elemIdxInsideVd = UInt(elemIdxBits.W) // element index in scope of vd 152 // val uopQueuePtr = new VluopPtr 153 // val flowPtr = new VlflowPtr 154} 155 156class VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 157 val vec = new OnlyVecExuOutput 158 // pack 159 val isPackage = Bool() 160 val packageNum = UInt((log2Up(VLENB) + 1).W) 161 val originAlignedType = UInt(alignTypeBits.W) 162 val alignedType = UInt(alignTypeBits.W) 163} 164 165// class VecStoreExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 166// val elemIdx = UInt(elemIdxBits.W) 167// val uopQueuePtr = new VsUopPtr 168// val fieldIdx = UInt(fieldBits.W) 169// val segmentIdx = UInt(elemIdxBits.W) 170// val vaddr = UInt(VAddrBits.W) 171// // pack 172// val isPackage = Bool() 173// val packageNum = UInt((log2Up(VLENB) + 1).W) 174// val originAlignedType = UInt(alignTypeBits.W) 175// val alignedType = UInt(alignTypeBits.W) 176// } 177 178class VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 179 val flowMask = UInt(VLENB.W) // each bit for a flow 180 val byteMask = UInt(VLENB.W) // each bit for a byte 181 val data = UInt(VLEN.W) 182 // val fof = Bool() // fof is only used for vector loads 183 val excp_eew_index = UInt(elemIdxBits.W) 184 // val exceptionVec = ExceptionVec() // uop has exceptionVec 185 val baseAddr = UInt(VAddrBits.W) 186 val stride = UInt(VLEN.W) 187 val flow_counter = UInt(flowIdxBits.W) 188 189 // instruction decode result 190 val flowNum = UInt(flowIdxBits.W) // # of flows in a uop 191 // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication 192 val nfields = UInt(fieldBits.W) // NFIELDS 193 val vm = Bool() // whether vector masking is enabled 194 val usWholeReg = Bool() // unit-stride, whole register load 195 val usMaskReg = Bool() // unit-stride, masked store/load 196 val eew = UInt(ewBits.W) // size of memory elements 197 val sew = UInt(ewBits.W) 198 val emul = UInt(mulBits.W) 199 val lmul = UInt(mulBits.W) 200 val vlmax = UInt(elemIdxBits.W) 201 val instType = UInt(3.W) 202 val vd_last_uop = Bool() 203 val vd_first_uop = Bool() 204} 205 206class VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 207 val vaddr = UInt(VAddrBits.W) 208 val mask = UInt(VLENB.W) 209 val alignedType = UInt(alignTypeBits.W) 210 val vecActive = Bool() 211 val elemIdx = UInt(elemIdxBits.W) 212 val is_first_ele = Bool() 213 214 // pack 215 val isPackage = Bool() 216 val packageNum = UInt((log2Up(VLENB) + 1).W) 217 val originAlignedType = UInt(alignTypeBits.W) 218} 219 220object MulNum { 221 def apply (mul: UInt): UInt = { //mul means emul or lmul 222 (LookupTree(mul,List( 223 "b101".U -> 1.U , // 1/8 224 "b110".U -> 1.U , // 1/4 225 "b111".U -> 1.U , // 1/2 226 "b000".U -> 1.U , // 1 227 "b001".U -> 2.U , // 2 228 "b010".U -> 4.U , // 4 229 "b011".U -> 8.U // 8 230 )))} 231} 232/** 233 * when emul is greater than or equal to 1, this means the entire register needs to be written; 234 * otherwise, only write the specified number of bytes */ 235object MulDataSize { 236 def apply (mul: UInt): UInt = { //mul means emul or lmul 237 (LookupTree(mul,List( 238 "b101".U -> 2.U , // 1/8 239 "b110".U -> 4.U , // 1/4 240 "b111".U -> 8.U , // 1/2 241 "b000".U -> 16.U , // 1 242 "b001".U -> 16.U , // 2 243 "b010".U -> 16.U , // 4 244 "b011".U -> 16.U // 8 245 )))} 246} 247 248object OneRegNum { 249 def apply (eew: UInt): UInt = { //mul means emul or lmul 250 (LookupTree(eew,List( 251 "b000".U -> 16.U , // 1 252 "b101".U -> 8.U , // 2 253 "b110".U -> 4.U , // 4 254 "b111".U -> 2.U // 8 255 )))} 256} 257 258//index inst read data byte 259object SewDataSize { 260 def apply (sew: UInt): UInt = { 261 (LookupTree(sew,List( 262 "b000".U -> 1.U , // 1 263 "b001".U -> 2.U , // 2 264 "b010".U -> 4.U , // 4 265 "b011".U -> 8.U // 8 266 )))} 267} 268 269// strided inst read data byte 270object EewDataSize { 271 def apply (eew: UInt): UInt = { 272 (LookupTree(eew,List( 273 "b000".U -> 1.U , // 1 274 "b101".U -> 2.U , // 2 275 "b110".U -> 4.U , // 4 276 "b111".U -> 8.U // 8 277 )))} 278} 279 280object loadDataSize { 281 def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = { 282 (LookupTree(instType,List( 283 "b000".U -> MulDataSize(emul), // unit-stride 284 "b010".U -> EewDataSize(eew) , // strided 285 "b001".U -> SewDataSize(sew) , // indexed-unordered 286 "b011".U -> SewDataSize(sew) , // indexed-ordered 287 "b100".U -> EewDataSize(eew) , // segment unit-stride 288 "b110".U -> EewDataSize(eew) , // segment strided 289 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 290 "b111".U -> SewDataSize(sew) // segment indexed-ordered 291 )))} 292} 293 294object storeDataSize { 295 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 296 (LookupTree(instType,List( 297 "b000".U -> EewDataSize(eew) , // unit-stride, do not use 298 "b010".U -> EewDataSize(eew) , // strided 299 "b001".U -> SewDataSize(sew) , // indexed-unordered 300 "b011".U -> SewDataSize(sew) , // indexed-ordered 301 "b100".U -> EewDataSize(eew) , // segment unit-stride 302 "b110".U -> EewDataSize(eew) , // segment strided 303 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 304 "b111".U -> SewDataSize(sew) // segment indexed-ordered 305 )))} 306} 307 308object GenVecStoreMask { 309 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 310 val mask = Wire(UInt(16.W)) 311 mask := UIntToOH(storeDataSize(instType = instType, eew = eew, sew = sew)) - 1.U 312 mask 313 } 314} 315 316/** 317 * these are used to obtain immediate addresses for index instruction */ 318object EewEq8 { 319 def apply(index:UInt, flow_inner_idx: UInt): UInt = { 320 (LookupTree(flow_inner_idx,List( 321 0.U -> index(7 ,0 ), 322 1.U -> index(15,8 ), 323 2.U -> index(23,16 ), 324 3.U -> index(31,24 ), 325 4.U -> index(39,32 ), 326 5.U -> index(47,40 ), 327 6.U -> index(55,48 ), 328 7.U -> index(63,56 ), 329 8.U -> index(71,64 ), 330 9.U -> index(79,72 ), 331 10.U -> index(87,80 ), 332 11.U -> index(95,88 ), 333 12.U -> index(103,96 ), 334 13.U -> index(111,104), 335 14.U -> index(119,112), 336 15.U -> index(127,120) 337 )))} 338} 339 340object EewEq16 { 341 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 342 (LookupTree(flow_inner_idx, List( 343 0.U -> index(15, 0), 344 1.U -> index(31, 16), 345 2.U -> index(47, 32), 346 3.U -> index(63, 48), 347 4.U -> index(79, 64), 348 5.U -> index(95, 80), 349 6.U -> index(111, 96), 350 7.U -> index(127, 112) 351 )))} 352} 353 354object EewEq32 { 355 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 356 (LookupTree(flow_inner_idx, List( 357 0.U -> index(31, 0), 358 1.U -> index(63, 32), 359 2.U -> index(95, 64), 360 3.U -> index(127, 96) 361 )))} 362} 363 364object EewEq64 { 365 def apply (index: UInt, flow_inner_idx: UInt): UInt = { 366 (LookupTree(flow_inner_idx, List( 367 0.U -> index(63, 0), 368 1.U -> index(127, 64) 369 )))} 370} 371 372object IndexAddr { 373 def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = { 374 (LookupTree(eew,List( 375 "b000".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register 376 "b101".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte 377 "b110".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte 378 "b111".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx ) // Imm is 8 Byte 379 )))} 380} 381 382object Log2Num { 383 def apply (num: UInt): UInt = { 384 (LookupTree(num,List( 385 16.U -> 4.U, 386 8.U -> 3.U, 387 4.U -> 2.U, 388 2.U -> 1.U, 389 1.U -> 0.U 390 )))} 391} 392 393object GenUopIdxInField { 394 def apply (instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 395 val isIndexed = instType(0) 396 val mulInField = Mux( 397 isIndexed, 398 Mux(lmul.asSInt > emul.asSInt, lmul, emul), 399 emul 400 ) 401 LookupTree(mulInField, List( 402 "b101".U -> 0.U, 403 "b110".U -> 0.U, 404 "b111".U -> 0.U, 405 "b000".U -> 0.U, 406 "b001".U -> uopIdx(0), 407 "b010".U -> uopIdx(1, 0), 408 "b011".U -> uopIdx(2, 0) 409 )) 410 } 411} 412 413//eew decode 414object EewLog2 extends VLSUConstants { 415 // def apply (eew: UInt): UInt = { 416 // (LookupTree(eew,List( 417 // "b000".U -> "b000".U , // 1 418 // "b101".U -> "b001".U , // 2 419 // "b110".U -> "b010".U , // 4 420 // "b111".U -> "b011".U // 8 421 // )))} 422 def apply(eew: UInt): UInt = ZeroExt(eew(1, 0), ewBits) 423} 424 425/** 426 * unit-stride instructions don't use this method; 427 * other instructions generate realFlowNum by EmulDataSize >> eew(1,0), 428 * EmulDataSize means the number of bytes that need to be written to the register, 429 * eew(1,0) means the number of bytes written at once*/ 430object GenRealFlowNum { 431 def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = { 432 (LookupTree(instType,List( 433 "b000".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // store use, load do not use 434 "b010".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // strided 435 "b001".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered 436 "b011".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered 437 "b100".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment unit-stride 438 "b110".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment strided 439 "b101".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // segment indexed-unordered 440 "b111".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt) // segment indexed-ordered 441 )))} 442} 443 444/** 445 * GenRealFlowLog2 = Log2(GenRealFlowNum) 446 */ 447object GenRealFlowLog2 extends VLSUConstants { 448 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = { 449 val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul) 450 val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul) 451 val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew(1, 0) 452 val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0) 453 (LookupTree(instType, List( 454 "b000".U -> eewRealFlowLog2, // unit-stride 455 "b010".U -> eewRealFlowLog2, // strided 456 "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered 457 "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered 458 "b100".U -> eewRealFlowLog2, // segment unit-stride 459 "b110".U -> eewRealFlowLog2, // segment strided 460 "b101".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-unordered 461 "b111".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-ordered 462 ))) 463 } 464} 465 466/** 467 * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx 468 * inside the uop. 469 */ 470object GenElemIdx extends VLSUConstants { 471 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, 472 uopIdx: UInt, flowIdx: UInt): UInt = { 473 val isIndexed = instType(0).asBool 474 val eewUopFlowsLog2 = Mux(emul.asSInt > 0.S, 0.U, emul) + log2Up(VLENB).U - eew(1, 0) 475 val sewUopFlowsLog2 = Mux(lmul.asSInt > 0.S, 0.U, lmul) + log2Up(VLENB).U - sew(1, 0) 476 val uopFlowsLog2 = Mux( 477 isIndexed, 478 Mux(emul.asSInt > lmul.asSInt, eewUopFlowsLog2, sewUopFlowsLog2), 479 eewUopFlowsLog2 480 ) 481 LookupTree(uopFlowsLog2, List( 482 0.U -> uopIdx, 483 1.U -> uopIdx ## flowIdx(0), 484 2.U -> uopIdx ## flowIdx(1, 0), 485 3.U -> uopIdx ## flowIdx(2, 0), 486 4.U -> uopIdx ## flowIdx(3, 0) 487 )) 488 } 489} 490 491/** 492 * GenVLMAX calculates VLMAX, which equals MUL * ew 493 */ 494object GenVLMAXLog2 extends VLSUConstants { 495 def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew 496} 497object GenVLMAX { 498 def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew) 499} 500 501object GenUSWholeRegVL extends VLSUConstants { 502 def apply(nfields: UInt, eew: UInt): UInt = { 503 LookupTree(eew(1, 0), List( 504 "b00".U -> (nfields << (log2Up(VLENB) - 0)), 505 "b01".U -> (nfields << (log2Up(VLENB) - 1)), 506 "b10".U -> (nfields << (log2Up(VLENB) - 2)), 507 "b11".U -> (nfields << (log2Up(VLENB) - 3)) 508 )) 509 } 510} 511object GenUSWholeEmul extends VLSUConstants{ 512 def apply(nf: UInt): UInt={ 513 LookupTree(nf,List( 514 "b000".U -> "b000".U(mulBits.W), 515 "b001".U -> "b001".U(mulBits.W), 516 "b011".U -> "b010".U(mulBits.W), 517 "b111".U -> "b011".U(mulBits.W) 518 )) 519 } 520} 521 522 523object GenUSMaskRegVL extends VLSUConstants { 524 def apply(vl: UInt): UInt = { 525 Mux(vl(2,0) === 0.U , (vl >> 3.U), ((vl >> 3.U) + 1.U)) 526 } 527} 528 529object GenUopByteMask { 530 def apply(flowMask: UInt, alignedType: UInt): UInt = { 531 LookupTree(alignedType, List( 532 "b000".U -> flowMask, 533 "b001".U -> FillInterleaved(2, flowMask), 534 "b010".U -> FillInterleaved(4, flowMask), 535 "b011".U -> FillInterleaved(8, flowMask), 536 "b100".U -> FillInterleaved(16, flowMask) 537 )) 538 } 539} 540 541object GenVdIdxInField extends VLSUConstants { 542 def apply(instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 543 val vdIdx = Wire(UInt(log2Up(maxMUL).W)) 544 when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || lmul.asSInt > emul.asSInt) { 545 // Unit-stride or Strided, or indexed with lmul >= emul 546 vdIdx := uopIdx 547 }.otherwise { 548 // Indexed with lmul <= emul 549 val multiple = emul - lmul 550 val uopIdxWidth = uopIdx.getWidth 551 vdIdx := LookupTree(multiple, List( 552 0.U -> uopIdx, 553 1.U -> (uopIdx >> 1), 554 2.U -> (uopIdx >> 2), 555 3.U -> (uopIdx >> 3) 556 )) 557 } 558 vdIdx 559 } 560} 561/** 562* Use start and vl to generate flow activative mask 563* mod = true fill 0 564* mod = false fill 1 565*/ 566object GenFlowMask extends VLSUConstants { 567 def apply(elementMask: UInt, start: UInt, vl: UInt , mod: Boolean): UInt = { 568 val startMask = ~UIntToMask(start, VLEN) 569 val vlMask = UIntToMask(vl, VLEN) 570 val maskVlStart = vlMask & startMask 571 if(mod){ 572 elementMask & maskVlStart 573 } 574 else{ 575 (~elementMask).asUInt & maskVlStart 576 } 577 } 578} 579 580object CheckAligned extends VLSUConstants { 581 def apply(addr: UInt): UInt = { 582 val aligned_16 = (addr(0) === 0.U) // 16-bit 583 val aligned_32 = (addr(1,0) === 0.U) // 32-bit 584 val aligned_64 = (addr(2,0) === 0.U) // 64-bit 585 val aligned_128 = (addr(3,0) === 0.U) // 128-bit 586 Cat(true.B, aligned_16, aligned_32, aligned_64, aligned_128) 587 } 588} 589 590/** 591 search if mask have continue 'len' bit '1' 592 mask: source mask 593 len: search length 594*/ 595object GenPackMask{ 596 def leadX(mask: Seq[Bool], len: Int): Bool = { 597 if(len == 1){ 598 mask.head 599 } 600 else{ 601 leadX(mask.drop(1),len-1) & mask.head 602 } 603 } 604 def leadOneVec(shiftMask: Seq[Bool]): UInt = { 605 // max is 64-bit, so the max num of flow to pack is 8 606 607 val lead1 = leadX(shiftMask, 1) // continue 1 bit 608 val lead2 = leadX(shiftMask, 2) // continue 2 bit 609 val lead4 = leadX(shiftMask, 4) // continue 4 bit 610 val lead8 = leadX(shiftMask, 8) // continue 8 bit 611 val lead16 = leadX(shiftMask, 16) // continue 16 bit 612 Cat(lead1, lead2, lead4, lead8, lead16) 613 } 614 615 def apply(shiftMask: UInt) = { 616 // pack mask 617 val packMask = leadOneVec(shiftMask.asBools) 618 packMask 619 } 620} 621/** 622PackEnable = (LeadXVec >> eew) & alignedVec, where the 0th bit represents the ability to merge into a 64 bit flow, the second bit represents the ability to merge into a 32 bit flow, and so on. 623 624example: 625 addr = 0x0, activeMask = b00011100101111, flowIdx = 0, eew = 0(8-bit) 626 627 step 0 : addrAlignedVec = (1, 1, 1, 1) elemIdxAligned = (1, 1, 1, 1) 628 step 1 : activePackVec = (1, 1, 1, 0), inactivePackVec = (0, 0, 0, 0) 629 step 2 : activePackEnable = (1, 1, 1, 0), inactivePackVec = (0, 0, 0, 0) 630 631 we can package 4 8-bit activative flows into a 32-bit flow. 632*/ 633object GenPackVec extends VLSUConstants{ 634 def apply(addr: UInt, shiftMask: UInt, eew: UInt, elemIdx: UInt): UInt = { 635 val addrAlignedVec = CheckAligned(addr) 636 val elemIdxAligned = CheckAligned(elemIdx) 637 val packMask = GenPackMask(shiftMask) 638 // generate packVec 639 val packVec = addrAlignedVec & elemIdxAligned & (packMask.asUInt >> eew) 640 641 packVec 642 } 643} 644 645object GenPackAlignedType extends VLSUConstants{ 646 def apply(packVec: UInt): UInt = { 647 val packAlignedType = PriorityMux(Seq( 648 packVec(0) -> "b100".U, 649 packVec(1) -> "b011".U, 650 packVec(2) -> "b010".U, 651 packVec(3) -> "b001".U, 652 packVec(4) -> "b000".U 653 )) 654 packAlignedType 655 } 656} 657 658object GenPackNum extends VLSUConstants{ 659 def apply(alignedType: UInt, packAlignedType: UInt): UInt = { 660 (1.U << (packAlignedType - alignedType)).asUInt 661 } 662} 663 664object genVWmask128 { 665 def apply(addr: UInt, sizeEncode: UInt): UInt = { 666 (LookupTree(sizeEncode, List( 667 "b000".U -> 0x1.U, //0001 << addr(2:0) 668 "b001".U -> 0x3.U, //0011 669 "b010".U -> 0xf.U, //1111 670 "b011".U -> 0xff.U, //11111111 671 "b100".U -> 0xffff.U //1111111111111111 672 )) << addr(3, 0)).asUInt 673 } 674} 675/* 676* only use in max length is 128 677*/ 678object genVWdata { 679 def apply(data: UInt, sizeEncode: UInt): UInt = { 680 LookupTree(sizeEncode, List( 681 "b000".U -> Fill(16, data(7, 0)), 682 "b001".U -> Fill(8, data(15, 0)), 683 "b010".U -> Fill(4, data(31, 0)), 684 "b011".U -> Fill(2, data(63,0)), 685 "b100".U -> data(127,0) 686 )) 687 } 688} 689 690object genUSSplitAddr{ 691 def apply(addr: UInt, index: UInt): UInt = { 692 val tmpAddr = Cat(addr(38, 4), 0.U(4.W)) 693 val nextCacheline = tmpAddr + 16.U 694 LookupTree(index, List( 695 0.U -> tmpAddr, 696 1.U -> nextCacheline 697 )) 698 } 699} 700 701object genUSSplitMask{ 702 def apply(mask: UInt, index: UInt, addrOffset: UInt): UInt = { 703 val tmpMask = Cat(0.U(16.W),mask) << addrOffset // 32-bits 704 LookupTree(index, List( 705 0.U -> tmpMask(15, 0), 706 1.U -> tmpMask(31, 16), 707 )) 708 } 709} 710 711object genUSSplitData{ 712 def apply(data: UInt, index: UInt, addrOffset: UInt): UInt = { 713 val tmpData = WireInit(0.U(256.W)) 714 val lookupTable = (0 until 16).map{case i => 715 if(i == 0){ 716 i.U -> Cat(0.U(128.W), data) 717 }else{ 718 i.U -> Cat(0.U(((16-i)*8).W), data, 0.U((i*8).W)) 719 } 720 } 721 tmpData := LookupTree(addrOffset, lookupTable).asUInt 722 723 LookupTree(index, List( 724 0.U -> tmpData(127, 0), 725 1.U -> tmpData(255, 128) 726 )) 727 } 728} 729 730object genVdOffset{ 731 def apply(offset: UInt, index: UInt): UInt = { 732 LookupTree(index, List( 733 0.U -> offset, 734 1.U -> ((~offset).asUInt + 1.U) 735 )) 736 } 737} 738 739/** 740 * for merge 128-bits data of unit-stride 741 */ 742// object mergeDataByoffset{ 743// def apply(oldData: Seq[UInt], newData: UInt, mask: Seq[Bools], offset: Seq[Uint], valid: Seq[Bool]): UInt = { 744 745// } 746// } 747 748object GenVSData extends VLSUConstants { 749 def apply(data: UInt, elemIdx: UInt, alignedType: UInt): UInt = { 750 LookupTree(alignedType, List( 751 "b000".U -> ZeroExt(LookupTree(elemIdx(3, 0), List.tabulate(VLEN/8)(i => i.U -> getByte(data, i))), VLEN), 752 "b001".U -> ZeroExt(LookupTree(elemIdx(2, 0), List.tabulate(VLEN/16)(i => i.U -> getHalfWord(data, i))), VLEN), 753 "b010".U -> ZeroExt(LookupTree(elemIdx(1, 0), List.tabulate(VLEN/32)(i => i.U -> getWord(data, i))), VLEN), 754 "b011".U -> ZeroExt(LookupTree(elemIdx(0), List.tabulate(VLEN/64)(i => i.U -> getDoubleWord(data, i))), VLEN), 755 "b100".U -> data // if have wider element, it will broken 756 )) 757 } 758}