1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27 28/** 29 * Common used parameters or functions in vlsu 30 */ 31trait VLSUConstants { 32 val VLEN = 128 33 //for pack unit-stride flow 34 val AlignedNum = 4 // 1/2/4/8 35 def VLENB = VLEN/8 36 def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg 37 38 def alignTypes = 4 // eew/sew = 1/2/4/8 39 def alignTypeBits = log2Up(alignTypes) 40 def maxMUL = 8 41 def maxFields = 8 42 /** 43 * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1, 44 * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total, 45 * each for a field. Therefore an instruction can be divided into 64 uops at most. 46 */ 47 def maxUopNum = maxMUL * maxFields // 64 48 def maxFlowNum = 16 49 def maxElemNum = maxMUL * maxFlowNum // 128 50 // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx 51 def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction 52 def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop 53 def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8 54 55 def ewBits = 3 // bits-width of EEW/SEW 56 def mulBits = 3 // bits-width of emul/lmul 57 58 def getSlice(data: UInt, i: Int, alignBits: Int): UInt = { 59 require(data.getWidth >= (i+1) * alignBits) 60 data((i+1) * alignBits - 1, i * alignBits) 61 } 62 63 def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8) 64 def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16) 65 def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32) 66 def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64) 67} 68 69trait HasVLSUParameters extends HasXSParameter with VLSUConstants { 70 override val VLEN = coreParams.VLEN 71 def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U 72 def isStrided(instType: UInt) = instType(1, 0) === "b10".U 73 def isIndexed(instType: UInt) = instType(0) === "b1".U 74 def isNotIndexed(instType: UInt) = instType(0) === "b0".U 75 def isSegment(instType: UInt) = instType(2) === "b1".U 76 77 def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = { 78 require(oldData.getWidth == newData.getWidth) 79 require(oldData.getWidth == mask.getWidth * 8) 80 VecInit(mask.asBools.zipWithIndex.map { case (en, i) => 81 Mux(en, getByte(newData, i), getByte(oldData, i)) 82 }) 83 } 84 85 // def asBytes(data: UInt) = { 86 // require(data.getWidth % 8 == 0) 87 // (0 until data.getWidth/8).map(i => getByte(data, i)) 88 // } 89 90 def mergeDataWithElemIdx( 91 oldData: UInt, 92 newData: Seq[UInt], 93 alignedType: UInt, 94 elemIdx: Seq[UInt], 95 valids: Seq[Bool] 96 ): UInt = { 97 require(newData.length == elemIdx.length) 98 require(newData.length == valids.length) 99 LookupTree(alignedType, List( 100 "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 101 ParallelPosteriorityMux( 102 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 103 getByte(oldData, i) +: newData.map(getByte(_)) 104 )}).asUInt, 105 "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 106 ParallelPosteriorityMux( 107 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 108 getHalfWord(oldData, i) +: newData.map(getHalfWord(_)) 109 )}).asUInt, 110 "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 111 ParallelPosteriorityMux( 112 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 113 getWord(oldData, i) +: newData.map(getWord(_)) 114 )}).asUInt, 115 "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 116 ParallelPosteriorityMux( 117 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 118 getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_)) 119 )}).asUInt 120 )) 121 } 122 123 def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = { 124 mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B)) 125 } 126} 127abstract class VLSUModule(implicit p: Parameters) extends XSModule 128 with HasVLSUParameters 129 with HasCircularQueuePtrHelper 130abstract class VLSUBundle(implicit p: Parameters) extends XSBundle 131 with HasVLSUParameters 132 133class VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle { 134 val uop = new DynInst 135} 136 137class OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle { 138 val isvec = Bool() 139 val vecdata = UInt(VLEN.W) 140 val mask = UInt(VLENB.W) 141 // val rob_idx_valid = Vec(2, Bool()) 142 // val inner_idx = Vec(2, UInt(3.W)) 143 // val rob_idx = Vec(2, new RobPtr) 144 // val offset = Vec(2, UInt(4.W)) 145 val reg_offset = UInt(vOffsetBits.W) 146 val vecActive = Bool() // 1: vector active element, 0: vector not active element 147 val is_first_ele = Bool() 148 val elemIdx = UInt(elemIdxBits.W) // element index 149 val elemIdxInsideVd = UInt(elemIdxBits.W) // element index in scope of vd 150 val uopQueuePtr = new VluopPtr 151 val flowPtr = new VlflowPtr 152} 153 154class VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 155 val vec = new OnlyVecExuOutput 156 // pack 157 val isPackage = Bool() 158 val packageNum = UInt(log2Up(VLENB).W) 159 val originAlignedType = UInt(alignTypeBits.W) 160 val alignedType = UInt(alignTypeBits.W) 161} 162 163class VecStoreExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 164 val elemIdx = UInt(elemIdxBits.W) 165 val uopQueuePtr = new VsUopPtr 166 val fieldIdx = UInt(fieldBits.W) 167 val segmentIdx = UInt(elemIdxBits.W) 168 val vaddr = UInt(VAddrBits.W) 169 // pack 170 val isPackage = Bool() 171 val packageNum = UInt(log2Up(VLENB).W) 172 val originAlignedType = UInt(alignTypeBits.W) 173 val alignedType = UInt(alignTypeBits.W) 174} 175 176class VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 177 val flowMask = UInt(VLENB.W) // each bit for a flow 178 val byteMask = UInt(VLENB.W) // each bit for a byte 179 val data = UInt(VLEN.W) 180 // val fof = Bool() // fof is only used for vector loads 181 val excp_eew_index = UInt(elemIdxBits.W) 182 // val exceptionVec = ExceptionVec() // uop has exceptionVec 183 val baseAddr = UInt(VAddrBits.W) 184 val stride = UInt(VLEN.W) 185 val flow_counter = UInt(flowIdxBits.W) 186 187 // instruction decode result 188 val flowNum = UInt(flowIdxBits.W) // # of flows in a uop 189 // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication 190 val nfields = UInt(fieldBits.W) // NFIELDS 191 val vm = Bool() // whether vector masking is enabled 192 val usWholeReg = Bool() // unit-stride, whole register load 193 val usMaskReg = Bool() // unit-stride, masked store/load 194 val eew = UInt(ewBits.W) // size of memory elements 195 val sew = UInt(ewBits.W) 196 val emul = UInt(mulBits.W) 197 val lmul = UInt(mulBits.W) 198 val vlmax = UInt(elemIdxBits.W) 199 val instType = UInt(3.W) 200 val vd_last_uop = Bool() 201 val vd_first_uop = Bool() 202} 203 204class VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 205 val vaddr = UInt(VAddrBits.W) 206 val mask = UInt(VLENB.W) 207 val alignedType = UInt(alignTypeBits.W) 208 val vecActive = Bool() 209 val elemIdx = UInt(elemIdxBits.W) 210 val is_first_ele = Bool() 211 212 // pack 213 val isPackage = Bool() 214 val packageNum = UInt(log2Up(VLENB).W) 215 val originAlignedType = UInt(alignTypeBits.W) 216} 217 218object MulNum { 219 def apply (mul: UInt): UInt = { //mul means emul or lmul 220 (LookupTree(mul,List( 221 "b101".U -> 1.U , // 1/8 222 "b110".U -> 1.U , // 1/4 223 "b111".U -> 1.U , // 1/2 224 "b000".U -> 1.U , // 1 225 "b001".U -> 2.U , // 2 226 "b010".U -> 4.U , // 4 227 "b011".U -> 8.U // 8 228 )))} 229} 230/** 231 * when emul is greater than or equal to 1, this means the entire register needs to be written; 232 * otherwise, only write the specified number of bytes */ 233object MulDataSize { 234 def apply (mul: UInt): UInt = { //mul means emul or lmul 235 (LookupTree(mul,List( 236 "b101".U -> 2.U , // 1/8 237 "b110".U -> 4.U , // 1/4 238 "b111".U -> 8.U , // 1/2 239 "b000".U -> 16.U , // 1 240 "b001".U -> 16.U , // 2 241 "b010".U -> 16.U , // 4 242 "b011".U -> 16.U // 8 243 )))} 244} 245 246object OneRegNum { 247 def apply (eew: UInt): UInt = { //mul means emul or lmul 248 (LookupTree(eew,List( 249 "b000".U -> 16.U , // 1 250 "b101".U -> 8.U , // 2 251 "b110".U -> 4.U , // 4 252 "b111".U -> 2.U // 8 253 )))} 254} 255 256//index inst read data byte 257object SewDataSize { 258 def apply (sew: UInt): UInt = { 259 (LookupTree(sew,List( 260 "b000".U -> 1.U , // 1 261 "b001".U -> 2.U , // 2 262 "b010".U -> 4.U , // 4 263 "b011".U -> 8.U // 8 264 )))} 265} 266 267// strided inst read data byte 268object EewDataSize { 269 def apply (eew: UInt): UInt = { 270 (LookupTree(eew,List( 271 "b000".U -> 1.U , // 1 272 "b101".U -> 2.U , // 2 273 "b110".U -> 4.U , // 4 274 "b111".U -> 8.U // 8 275 )))} 276} 277 278object loadDataSize { 279 def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = { 280 (LookupTree(instType,List( 281 "b000".U -> MulDataSize(emul), // unit-stride 282 "b010".U -> EewDataSize(eew) , // strided 283 "b001".U -> SewDataSize(sew) , // indexed-unordered 284 "b011".U -> SewDataSize(sew) , // indexed-ordered 285 "b100".U -> EewDataSize(eew) , // segment unit-stride 286 "b110".U -> EewDataSize(eew) , // segment strided 287 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 288 "b111".U -> SewDataSize(sew) // segment indexed-ordered 289 )))} 290} 291 292object storeDataSize { 293 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 294 (LookupTree(instType,List( 295 "b000".U -> EewDataSize(eew) , // unit-stride, do not use 296 "b010".U -> EewDataSize(eew) , // strided 297 "b001".U -> SewDataSize(sew) , // indexed-unordered 298 "b011".U -> SewDataSize(sew) , // indexed-ordered 299 "b100".U -> EewDataSize(eew) , // segment unit-stride 300 "b110".U -> EewDataSize(eew) , // segment strided 301 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 302 "b111".U -> SewDataSize(sew) // segment indexed-ordered 303 )))} 304} 305 306object GenVecStoreMask { 307 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 308 val mask = Wire(UInt(16.W)) 309 mask := UIntToOH(storeDataSize(instType = instType, eew = eew, sew = sew)) - 1.U 310 mask 311 } 312} 313 314/** 315 * these are used to obtain immediate addresses for index instruction */ 316object EewEq8 { 317 def apply(index:UInt, flow_inner_idx: UInt): UInt = { 318 (LookupTree(flow_inner_idx,List( 319 0.U -> index(7 ,0 ), 320 1.U -> index(15,8 ), 321 2.U -> index(23,16 ), 322 3.U -> index(31,24 ), 323 4.U -> index(39,32 ), 324 5.U -> index(47,40 ), 325 6.U -> index(55,48 ), 326 7.U -> index(63,56 ), 327 8.U -> index(71,64 ), 328 9.U -> index(79,72 ), 329 10.U -> index(87,80 ), 330 11.U -> index(95,88 ), 331 12.U -> index(103,96 ), 332 13.U -> index(111,104), 333 14.U -> index(119,112), 334 15.U -> index(127,120) 335 )))} 336} 337 338object EewEq16 { 339 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 340 (LookupTree(flow_inner_idx, List( 341 0.U -> index(15, 0), 342 1.U -> index(31, 16), 343 2.U -> index(47, 32), 344 3.U -> index(63, 48), 345 4.U -> index(79, 64), 346 5.U -> index(95, 80), 347 6.U -> index(111, 96), 348 7.U -> index(127, 112) 349 )))} 350} 351 352object EewEq32 { 353 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 354 (LookupTree(flow_inner_idx, List( 355 0.U -> index(31, 0), 356 1.U -> index(63, 32), 357 2.U -> index(95, 64), 358 3.U -> index(127, 96) 359 )))} 360} 361 362object EewEq64 { 363 def apply (index: UInt, flow_inner_idx: UInt): UInt = { 364 (LookupTree(flow_inner_idx, List( 365 0.U -> index(63, 0), 366 1.U -> index(127, 64) 367 )))} 368} 369 370object IndexAddr { 371 def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = { 372 (LookupTree(eew,List( 373 "b000".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register 374 "b101".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte 375 "b110".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte 376 "b111".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx ) // Imm is 8 Byte 377 )))} 378} 379 380object Log2Num { 381 def apply (num: UInt): UInt = { 382 (LookupTree(num,List( 383 16.U -> 4.U, 384 8.U -> 3.U, 385 4.U -> 2.U, 386 2.U -> 1.U, 387 1.U -> 0.U 388 )))} 389} 390 391object GenUopIdxInField { 392 def apply (instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 393 val isIndexed = instType(0) 394 val mulInField = Mux( 395 isIndexed, 396 Mux(lmul.asSInt > emul.asSInt, lmul, emul), 397 emul 398 ) 399 LookupTree(mulInField, List( 400 "b101".U -> 0.U, 401 "b110".U -> 0.U, 402 "b111".U -> 0.U, 403 "b000".U -> 0.U, 404 "b001".U -> uopIdx(0), 405 "b010".U -> uopIdx(1, 0), 406 "b011".U -> uopIdx(2, 0) 407 )) 408 } 409} 410 411//eew decode 412object EewLog2 extends VLSUConstants { 413 // def apply (eew: UInt): UInt = { 414 // (LookupTree(eew,List( 415 // "b000".U -> "b000".U , // 1 416 // "b101".U -> "b001".U , // 2 417 // "b110".U -> "b010".U , // 4 418 // "b111".U -> "b011".U // 8 419 // )))} 420 def apply(eew: UInt): UInt = ZeroExt(eew(1, 0), ewBits) 421} 422 423/** 424 * unit-stride instructions don't use this method; 425 * other instructions generate realFlowNum by EmulDataSize >> eew(1,0), 426 * EmulDataSize means the number of bytes that need to be written to the register, 427 * eew(1,0) means the number of bytes written at once*/ 428object GenRealFlowNum { 429 def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = { 430 (LookupTree(instType,List( 431 "b000".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // store use, load do not use 432 "b010".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // strided 433 "b001".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered 434 "b011".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered 435 "b100".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment unit-stride 436 "b110".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment strided 437 "b101".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // segment indexed-unordered 438 "b111".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt) // segment indexed-ordered 439 )))} 440} 441 442/** 443 * GenRealFlowLog2 = Log2(GenRealFlowNum) 444 */ 445object GenRealFlowLog2 extends VLSUConstants { 446 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = { 447 val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul) 448 val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul) 449 val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew(1, 0) 450 val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0) 451 (LookupTree(instType, List( 452 "b000".U -> eewRealFlowLog2, // unit-stride 453 "b010".U -> eewRealFlowLog2, // strided 454 "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered 455 "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered 456 "b100".U -> eewRealFlowLog2, // segment unit-stride 457 "b110".U -> eewRealFlowLog2, // segment strided 458 "b101".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-unordered 459 "b111".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-ordered 460 ))) 461 } 462} 463 464/** 465 * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx 466 * inside the uop. 467 */ 468object GenElemIdx extends VLSUConstants { 469 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, 470 uopIdx: UInt, flowIdx: UInt): UInt = { 471 val isIndexed = instType(0).asBool 472 val eewUopFlowsLog2 = Mux(emul.asSInt > 0.S, 0.U, emul) + log2Up(VLENB).U - eew(1, 0) 473 val sewUopFlowsLog2 = Mux(lmul.asSInt > 0.S, 0.U, lmul) + log2Up(VLENB).U - sew(1, 0) 474 val uopFlowsLog2 = Mux( 475 isIndexed, 476 Mux(emul.asSInt > lmul.asSInt, eewUopFlowsLog2, sewUopFlowsLog2), 477 eewUopFlowsLog2 478 ) 479 LookupTree(uopFlowsLog2, List( 480 0.U -> uopIdx, 481 1.U -> uopIdx ## flowIdx(0), 482 2.U -> uopIdx ## flowIdx(1, 0), 483 3.U -> uopIdx ## flowIdx(2, 0), 484 4.U -> uopIdx ## flowIdx(3, 0) 485 )) 486 } 487} 488 489/** 490 * GenVLMAX calculates VLMAX, which equals MUL * ew 491 */ 492object GenVLMAXLog2 extends VLSUConstants { 493 def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew 494} 495object GenVLMAX { 496 def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew) 497} 498 499object GenUSWholeRegVL extends VLSUConstants { 500 def apply(nfields: UInt, eew: UInt): UInt = { 501 LookupTree(eew(1, 0), List( 502 "b00".U -> (nfields << (log2Up(VLENB) - 0)), 503 "b01".U -> (nfields << (log2Up(VLENB) - 1)), 504 "b10".U -> (nfields << (log2Up(VLENB) - 2)), 505 "b11".U -> (nfields << (log2Up(VLENB) - 3)) 506 )) 507 } 508} 509object GenUSWholeEmul extends VLSUConstants{ 510 def apply(nf: UInt): UInt={ 511 LookupTree(nf,List( 512 "b000".U -> "b000".U(mulBits.W), 513 "b001".U -> "b001".U(mulBits.W), 514 "b011".U -> "b010".U(mulBits.W), 515 "b111".U -> "b011".U(mulBits.W) 516 )) 517 } 518} 519 520 521object GenUSMaskRegVL extends VLSUConstants { 522 def apply(vl: UInt): UInt = { 523 Mux(vl(2,0) === 0.U , (vl >> 3.U), ((vl >> 3.U) + 1.U)) 524 } 525} 526 527object GenUopByteMask { 528 def apply(flowMask: UInt, alignedType: UInt): UInt = { 529 LookupTree(alignedType, List( 530 "b00".U -> flowMask, 531 "b01".U -> FillInterleaved(2, flowMask), 532 "b10".U -> FillInterleaved(4, flowMask), 533 "b11".U -> FillInterleaved(8, flowMask) 534 )) 535 } 536} 537 538object GenVdIdxInField extends VLSUConstants { 539 def apply(instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 540 val vdIdx = Wire(UInt(log2Up(maxMUL).W)) 541 when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || lmul.asSInt > emul.asSInt) { 542 // Unit-stride or Strided, or indexed with lmul >= emul 543 vdIdx := uopIdx 544 }.otherwise { 545 // Indexed with lmul <= emul 546 val multiple = emul - lmul 547 val uopIdxWidth = uopIdx.getWidth 548 vdIdx := LookupTree(multiple, List( 549 0.U -> uopIdx, 550 1.U -> (uopIdx >> 1), 551 2.U -> (uopIdx >> 2), 552 3.U -> (uopIdx >> 3) 553 )) 554 } 555 vdIdx 556 } 557} 558/** 559* Use start and vl to generate flow activative mask 560* mod = true fill 0 561* mod = false fill 1 562*/ 563object GenFlowMask extends VLSUConstants { 564 def apply(elementMask: UInt, start: UInt, vl: UInt , mod: Boolean): UInt = { 565 val startMask = ~UIntToMask(start, VLEN) 566 val vlMask = UIntToMask(vl, VLEN) 567 val maskVlStart = vlMask & startMask 568 if(mod){ 569 elementMask & maskVlStart 570 } 571 else{ 572 (~elementMask).asUInt & maskVlStart 573 } 574 } 575} 576 577object CheckAligned extends VLSUConstants { 578 def apply(addr: UInt): UInt = { 579 val aligned_16 = (addr(0) === 0.U) // 16-bit 580 val aligned_32 = (addr(1,0) === 0.U) // 32-bit 581 val aligned_64 = (addr(2,0) === 0.U) // 64-bit 582 Cat(true.B, aligned_16, aligned_32, aligned_64) 583 } 584} 585 586/** 587 search if mask have continue 'len' bit '1' 588 mask: source mask 589 len: search length 590*/ 591object GenPackMask{ 592 def leadX(mask: Seq[Bool], len: Int): Bool = { 593 if(len == 1){ 594 mask.head 595 } 596 else{ 597 leadX(mask.drop(1),len-1) & mask.head 598 } 599 } 600 def leadOneVec(shiftMask: Seq[Bool]): UInt = { 601 // max is 64-bit, so the max num of flow to pack is 8 602 603 val lead1 = leadX(shiftMask, 1) // continue 1 bit 604 val lead2 = leadX(shiftMask, 2) // continue 2 bit 605 val lead4 = leadX(shiftMask, 4) // continue 4 bit 606 val lead8 = leadX(shiftMask, 8) // continue 8 bit 607 Cat(lead1, lead2, lead4, lead8) 608 } 609 610 def apply(shiftMask: UInt) = { 611 // pack mask 612 val packMask = leadOneVec(shiftMask.asBools) 613 packMask 614 } 615} 616/** 617PackEnable = (LeadXVec >> eew) & alignedVec, where the 0th bit represents the ability to merge into a 64 bit flow, the second bit represents the ability to merge into a 32 bit flow, and so on. 618 619example: 620 addr = 0x0, activeMask = b00011100101111, flowIdx = 0, eew = 0(8-bit) 621 622 step 0 : addrAlignedVec = (1, 1, 1, 1) elemIdxAligned = (1, 1, 1, 1) 623 step 1 : activePackVec = (1, 1, 1, 0), inactivePackVec = (0, 0, 0, 0) 624 step 2 : activePackEnable = (1, 1, 1, 0), inactivePackVec = (0, 0, 0, 0) 625 626 we can package 4 8-bit activative flows into a 32-bit flow. 627*/ 628object GenPackVec extends VLSUConstants{ 629 def apply(addr: UInt, shiftMask: UInt, eew: UInt, elemIdx: UInt): UInt = { 630 val addrAlignedVec = CheckAligned(addr) 631 val elemIdxAligned = CheckAligned(elemIdx) 632 val packMask = GenPackMask(shiftMask) 633 // generate packVec 634 val packVec = addrAlignedVec & elemIdxAligned & (packMask.asUInt >> eew) 635 636 packVec 637 } 638} 639 640object GenPackAlignedType extends VLSUConstants{ 641 def apply(packVec: UInt): UInt = { 642 val packAlignedType = PriorityMux(Seq( 643 packVec(0) -> "b11".U, 644 packVec(1) -> "b10".U, 645 packVec(2) -> "b01".U, 646 packVec(3) -> "b00".U, 647 )) 648 packAlignedType 649 } 650} 651 652object GenPackNum extends VLSUConstants{ 653 def apply(alignedType: UInt, packAlignedType: UInt): UInt = { 654 (1.U << (packAlignedType - alignedType)).asUInt 655 } 656}