1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27import xiangshan.backend.fu.FuType 28 29/** 30 * Common used parameters or functions in vlsu 31 */ 32trait VLSUConstants { 33 val VLEN = 128 34 //for pack unit-stride flow 35 val AlignedNum = 4 // 1/2/4/8 36 def VLENB = VLEN/8 37 def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg 38 lazy val vlmBindexBits = 8 //will be overrided later 39 lazy val vsmBindexBits = 8 // will be overrided later 40 41 def alignTypes = 5 // eew/sew = 1/2/4/8, last indicate 128 bit element 42 def alignTypeBits = log2Up(alignTypes) 43 def maxMUL = 8 44 def maxFields = 8 45 /** 46 * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1, 47 * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total, 48 * each for a field. Therefore an instruction can be divided into 64 uops at most. 49 */ 50 def maxUopNum = maxMUL * maxFields // 64 51 def maxFlowNum = 16 52 def maxElemNum = maxMUL * maxFlowNum // 128 53 // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx 54 def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction 55 def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop 56 def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8 57 58 def ewBits = 3 // bits-width of EEW/SEW 59 def mulBits = 3 // bits-width of emul/lmul 60 61 def getSlice(data: UInt, i: Int, alignBits: Int): UInt = { 62 require(data.getWidth >= (i+1) * alignBits) 63 data((i+1) * alignBits - 1, i * alignBits) 64 } 65 def getNoAlignedSlice(data: UInt, i: Int, alignBits: Int): UInt = { 66 data(i * 8 + alignBits - 1, i * 8) 67 } 68 69 def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8) 70 def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16) 71 def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32) 72 def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64) 73 def getDoubleDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 128) 74} 75 76trait HasVLSUParameters extends HasXSParameter with VLSUConstants { 77 override val VLEN = coreParams.VLEN 78 override lazy val vlmBindexBits = log2Up(coreParams.VlMergeBufferSize) 79 override lazy val vsmBindexBits = log2Up(coreParams.VsMergeBufferSize) 80 lazy val maxMemByteNum = 16 // Maximum bytes for a single memory access 81 /** 82 * get addr aligned low bits 83 * @param addr Address to be check 84 * @param width Width for checking alignment 85 */ 86 def getCheckAddrLowBits(addr: UInt, width: Int): UInt = addr(log2Up(width) - 1, 0) 87 def getOverflowBit(in: UInt, width: Int): UInt = in(log2Up(width)) 88 def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U 89 def isStrided(instType: UInt) = instType(1, 0) === "b10".U 90 def isIndexed(instType: UInt) = instType(0) === "b1".U 91 def isNotIndexed(instType: UInt) = instType(0) === "b0".U 92 def isSegment(instType: UInt) = instType(2) === "b1".U 93 def is128Bit(alignedType: UInt) = alignedType(2) === "b1".U 94 95 def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = { 96 require(oldData.getWidth == newData.getWidth) 97 require(oldData.getWidth == mask.getWidth * 8) 98 VecInit(mask.asBools.zipWithIndex.map { case (en, i) => 99 Mux(en, getByte(newData, i), getByte(oldData, i)) 100 }) 101 } 102 103 // def asBytes(data: UInt) = { 104 // require(data.getWidth % 8 == 0) 105 // (0 until data.getWidth/8).map(i => getByte(data, i)) 106 // } 107 108 def mergeDataWithElemIdx( 109 oldData: UInt, 110 newData: Seq[UInt], 111 alignedType: UInt, 112 elemIdx: Seq[UInt], 113 valids: Seq[Bool] 114 ): UInt = { 115 require(newData.length == elemIdx.length) 116 require(newData.length == valids.length) 117 LookupTree(alignedType, List( 118 "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 119 ParallelPosteriorityMux( 120 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 121 getByte(oldData, i) +: newData.map(getByte(_)) 122 )}).asUInt, 123 "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 124 ParallelPosteriorityMux( 125 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 126 getHalfWord(oldData, i) +: newData.map(getHalfWord(_)) 127 )}).asUInt, 128 "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 129 ParallelPosteriorityMux( 130 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 131 getWord(oldData, i) +: newData.map(getWord(_)) 132 )}).asUInt, 133 "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 134 ParallelPosteriorityMux( 135 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 136 getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_)) 137 )}).asUInt 138 )) 139 } 140 141 def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = { 142 mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B)) 143 } 144 /** 145 * for merge 128-bits data of unit-stride 146 */ 147 object mergeDataByByte{ 148 def apply(oldData: UInt, newData: UInt, mask: UInt): UInt = { 149 val selVec = Seq(mask).map(_.asBools).transpose 150 VecInit(selVec.zipWithIndex.map{ case (selV, i) => 151 ParallelPosteriorityMux( 152 true.B +: selV.map(x => x), 153 getByte(oldData, i) +: Seq(getByte(newData, i)) 154 )}).asUInt 155 } 156 } 157 158 /** 159 * for merge Unit-Stride data to 256-bits 160 * merge 128-bits data to 256-bits 161 * if have 3 port, 162 * if is port0, it is 6 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0) or (data, port2data) or (port2data, data) or (data, port3data) or (port3data, data) 163 * if is port1, it is 4 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0) or (data, port3data) or (port3data, data) 164 * if is port3, it is 2 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0) 165 * 166 */ 167 object mergeDataByIndex{ 168 def apply(data: Seq[UInt], mask: Seq[UInt], index: UInt, valids: Seq[Bool]): (UInt, UInt) = { 169 require(data.length == valids.length) 170 require(data.length == mask.length) 171 val muxLength = data.length 172 val selDataMatrix = Wire(Vec(muxLength, Vec(2, UInt((VLEN * 2).W)))) // 3 * 2 * 256 173 val selMaskMatrix = Wire(Vec(muxLength, Vec(2, UInt((VLENB * 2).W)))) // 3 * 2 * 16 174 dontTouch(selDataMatrix) 175 dontTouch(selMaskMatrix) 176 for(i <- 0 until muxLength){ 177 if(i == 0){ 178 selDataMatrix(i)(0) := Cat(0.U(VLEN.W), data(i)) 179 selDataMatrix(i)(1) := Cat(data(i), 0.U(VLEN.W)) 180 selMaskMatrix(i)(0) := Cat(0.U(VLENB.W), mask(i)) 181 selMaskMatrix(i)(1) := Cat(mask(i), 0.U(VLENB.W)) 182 } 183 else{ 184 selDataMatrix(i)(0) := Cat(data(i), data(0)) 185 selDataMatrix(i)(1) := Cat(data(0), data(i)) 186 selMaskMatrix(i)(0) := Cat(mask(i), mask(0)) 187 selMaskMatrix(i)(1) := Cat(mask(0), mask(i)) 188 } 189 } 190 val selIdxVec = (0 until muxLength).map(_.U) 191 val selIdx = PriorityMux(valids.reverse, selIdxVec.reverse) 192 193 val selData = Mux(index === 0.U, 194 selDataMatrix(selIdx)(0), 195 selDataMatrix(selIdx)(1)) 196 val selMask = Mux(index === 0.U, 197 selMaskMatrix(selIdx)(0), 198 selMaskMatrix(selIdx)(1)) 199 (selData, selMask) 200 } 201 } 202 def mergeDataByIndex(data: UInt, mask: UInt, index: UInt): (UInt, UInt) = { 203 mergeDataByIndex(Seq(data), Seq(mask), index, Seq(true.B)) 204 } 205} 206abstract class VLSUModule(implicit p: Parameters) extends XSModule 207 with HasVLSUParameters 208 with HasCircularQueuePtrHelper 209abstract class VLSUBundle(implicit p: Parameters) extends XSBundle 210 with HasVLSUParameters 211 212class VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle { 213 val uop = new DynInst 214} 215 216class OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle { 217 val isvec = Bool() 218 val vecdata = UInt(VLEN.W) 219 val mask = UInt(VLENB.W) 220 // val rob_idx_valid = Vec(2, Bool()) 221 // val inner_idx = Vec(2, UInt(3.W)) 222 // val rob_idx = Vec(2, new RobPtr) 223 // val offset = Vec(2, UInt(4.W)) 224 val reg_offset = UInt(vOffsetBits.W) 225 val vecActive = Bool() // 1: vector active element, 0: vector not active element 226 val is_first_ele = Bool() 227 val elemIdx = UInt(elemIdxBits.W) // element index 228 val elemIdxInsideVd = UInt(elemIdxBits.W) // element index in scope of vd 229 // val uopQueuePtr = new VluopPtr 230 // val flowPtr = new VlflowPtr 231} 232 233class VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 234 val vec = new OnlyVecExuOutput 235 val alignedType = UInt(alignTypeBits.W) 236 // feedback 237 val vecFeedback = Bool() 238} 239 240// class VecStoreExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 241// val elemIdx = UInt(elemIdxBits.W) 242// val uopQueuePtr = new VsUopPtr 243// val fieldIdx = UInt(fieldBits.W) 244// val segmentIdx = UInt(elemIdxBits.W) 245// val vaddr = UInt(VAddrBits.W) 246// // pack 247// val isPackage = Bool() 248// val packageNum = UInt((log2Up(VLENB) + 1).W) 249// val originAlignedType = UInt(alignTypeBits.W) 250// val alignedType = UInt(alignTypeBits.W) 251// } 252 253class VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 254 val flowMask = UInt(VLENB.W) // each bit for a flow 255 val byteMask = UInt(VLENB.W) // each bit for a byte 256 val data = UInt(VLEN.W) 257 // val fof = Bool() // fof is only used for vector loads 258 val excp_eew_index = UInt(elemIdxBits.W) 259 // val exceptionVec = ExceptionVec() // uop has exceptionVec 260 val baseAddr = UInt(VAddrBits.W) 261 val stride = UInt(VLEN.W) 262 val flow_counter = UInt(flowIdxBits.W) 263 264 // instruction decode result 265 val flowNum = UInt(flowIdxBits.W) // # of flows in a uop 266 // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication 267 val nfields = UInt(fieldBits.W) // NFIELDS 268 val vm = Bool() // whether vector masking is enabled 269 val usWholeReg = Bool() // unit-stride, whole register load 270 val usMaskReg = Bool() // unit-stride, masked store/load 271 val eew = UInt(ewBits.W) // size of memory elements 272 val sew = UInt(ewBits.W) 273 val emul = UInt(mulBits.W) 274 val lmul = UInt(mulBits.W) 275 val vlmax = UInt(elemIdxBits.W) 276 val instType = UInt(3.W) 277 val vd_last_uop = Bool() 278 val vd_first_uop = Bool() 279} 280 281class VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 282 val vaddr = UInt(VAddrBits.W) 283 val mask = UInt(VLENB.W) 284 val alignedType = UInt(alignTypeBits.W) 285 val vecActive = Bool() 286 val elemIdx = UInt(elemIdxBits.W) 287 val is_first_ele = Bool() 288 289 // pack 290 val isPackage = Bool() 291 val packageNum = UInt((log2Up(VLENB) + 1).W) 292 val originAlignedType = UInt(alignTypeBits.W) 293} 294 295class VecMemExuOutput(isVector: Boolean = false)(implicit p: Parameters) extends VLSUBundle{ 296 val output = new MemExuOutput(isVector) 297 val vecFeedback = Bool() 298 val mmio = Bool() 299 val usSecondInv = Bool() 300 val elemIdx = UInt(elemIdxBits.W) 301 val alignedType = UInt(alignTypeBits.W) 302 val mbIndex = UInt(vsmBindexBits.W) 303 val mask = UInt(VLENB.W) 304 val vaddr = UInt(VAddrBits.W) 305} 306 307object MulNum { 308 def apply (mul: UInt): UInt = { //mul means emul or lmul 309 (LookupTree(mul,List( 310 "b101".U -> 1.U , // 1/8 311 "b110".U -> 1.U , // 1/4 312 "b111".U -> 1.U , // 1/2 313 "b000".U -> 1.U , // 1 314 "b001".U -> 2.U , // 2 315 "b010".U -> 4.U , // 4 316 "b011".U -> 8.U // 8 317 )))} 318} 319/** 320 * when emul is greater than or equal to 1, this means the entire register needs to be written; 321 * otherwise, only write the specified number of bytes */ 322object MulDataSize { 323 def apply (mul: UInt): UInt = { //mul means emul or lmul 324 (LookupTree(mul,List( 325 "b101".U -> 2.U , // 1/8 326 "b110".U -> 4.U , // 1/4 327 "b111".U -> 8.U , // 1/2 328 "b000".U -> 16.U , // 1 329 "b001".U -> 16.U , // 2 330 "b010".U -> 16.U , // 4 331 "b011".U -> 16.U // 8 332 )))} 333} 334 335object OneRegNum { 336 def apply (eew: UInt): UInt = { //mul means emul or lmul 337 (LookupTree(eew,List( 338 "b000".U -> 16.U , // 1 339 "b101".U -> 8.U , // 2 340 "b110".U -> 4.U , // 4 341 "b111".U -> 2.U // 8 342 )))} 343} 344 345//index inst read data byte 346object SewDataSize { 347 def apply (sew: UInt): UInt = { 348 (LookupTree(sew,List( 349 "b000".U -> 1.U , // 1 350 "b001".U -> 2.U , // 2 351 "b010".U -> 4.U , // 4 352 "b011".U -> 8.U // 8 353 )))} 354} 355 356// strided inst read data byte 357object EewDataSize { 358 def apply (eew: UInt): UInt = { 359 (LookupTree(eew,List( 360 "b000".U -> 1.U , // 1 361 "b101".U -> 2.U , // 2 362 "b110".U -> 4.U , // 4 363 "b111".U -> 8.U // 8 364 )))} 365} 366 367object loadDataSize { 368 def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = { 369 (LookupTree(instType,List( 370 "b000".U -> MulDataSize(emul), // unit-stride 371 "b010".U -> EewDataSize(eew) , // strided 372 "b001".U -> SewDataSize(sew) , // indexed-unordered 373 "b011".U -> SewDataSize(sew) , // indexed-ordered 374 "b100".U -> EewDataSize(eew) , // segment unit-stride 375 "b110".U -> EewDataSize(eew) , // segment strided 376 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 377 "b111".U -> SewDataSize(sew) // segment indexed-ordered 378 )))} 379} 380 381object storeDataSize { 382 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 383 (LookupTree(instType,List( 384 "b000".U -> EewDataSize(eew) , // unit-stride, do not use 385 "b010".U -> EewDataSize(eew) , // strided 386 "b001".U -> SewDataSize(sew) , // indexed-unordered 387 "b011".U -> SewDataSize(sew) , // indexed-ordered 388 "b100".U -> EewDataSize(eew) , // segment unit-stride 389 "b110".U -> EewDataSize(eew) , // segment strided 390 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 391 "b111".U -> SewDataSize(sew) // segment indexed-ordered 392 )))} 393} 394 395object GenVecStoreMask { 396 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 397 val mask = Wire(UInt(16.W)) 398 mask := UIntToOH(storeDataSize(instType = instType, eew = eew, sew = sew)) - 1.U 399 mask 400 } 401} 402 403/** 404 * these are used to obtain immediate addresses for index instruction */ 405object EewEq8 { 406 def apply(index:UInt, flow_inner_idx: UInt): UInt = { 407 (LookupTree(flow_inner_idx,List( 408 0.U -> index(7 ,0 ), 409 1.U -> index(15,8 ), 410 2.U -> index(23,16 ), 411 3.U -> index(31,24 ), 412 4.U -> index(39,32 ), 413 5.U -> index(47,40 ), 414 6.U -> index(55,48 ), 415 7.U -> index(63,56 ), 416 8.U -> index(71,64 ), 417 9.U -> index(79,72 ), 418 10.U -> index(87,80 ), 419 11.U -> index(95,88 ), 420 12.U -> index(103,96 ), 421 13.U -> index(111,104), 422 14.U -> index(119,112), 423 15.U -> index(127,120) 424 )))} 425} 426 427object EewEq16 { 428 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 429 (LookupTree(flow_inner_idx, List( 430 0.U -> index(15, 0), 431 1.U -> index(31, 16), 432 2.U -> index(47, 32), 433 3.U -> index(63, 48), 434 4.U -> index(79, 64), 435 5.U -> index(95, 80), 436 6.U -> index(111, 96), 437 7.U -> index(127, 112) 438 )))} 439} 440 441object EewEq32 { 442 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 443 (LookupTree(flow_inner_idx, List( 444 0.U -> index(31, 0), 445 1.U -> index(63, 32), 446 2.U -> index(95, 64), 447 3.U -> index(127, 96) 448 )))} 449} 450 451object EewEq64 { 452 def apply (index: UInt, flow_inner_idx: UInt): UInt = { 453 (LookupTree(flow_inner_idx, List( 454 0.U -> index(63, 0), 455 1.U -> index(127, 64) 456 )))} 457} 458 459object IndexAddr { 460 def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = { 461 (LookupTree(eew,List( 462 "b000".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register 463 "b101".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte 464 "b110".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte 465 "b111".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx ) // Imm is 8 Byte 466 )))} 467} 468 469object Log2Num { 470 def apply (num: UInt): UInt = { 471 (LookupTree(num,List( 472 16.U -> 4.U, 473 8.U -> 3.U, 474 4.U -> 2.U, 475 2.U -> 1.U, 476 1.U -> 0.U 477 )))} 478} 479 480object GenUopIdxInField { 481 /** 482 * Used in normal vector instruction 483 * */ 484 def apply (instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 485 val isIndexed = instType(0) 486 val mulInField = Mux( 487 isIndexed, 488 Mux(lmul.asSInt > emul.asSInt, lmul, emul), 489 emul 490 ) 491 LookupTree(mulInField, List( 492 "b101".U -> 0.U, 493 "b110".U -> 0.U, 494 "b111".U -> 0.U, 495 "b000".U -> 0.U, 496 "b001".U -> uopIdx(0), 497 "b010".U -> uopIdx(1, 0), 498 "b011".U -> uopIdx(2, 0) 499 )) 500 } 501 /** 502 * Only used in segment instruction. 503 * */ 504 def apply (select: UInt, uopIdx: UInt): UInt = { 505 LookupTree(select, List( 506 "b101".U -> 0.U, 507 "b110".U -> 0.U, 508 "b111".U -> 0.U, 509 "b000".U -> 0.U, 510 "b001".U -> uopIdx(0), 511 "b010".U -> uopIdx(1, 0), 512 "b011".U -> uopIdx(2, 0) 513 )) 514 } 515} 516 517//eew decode 518object EewLog2 extends VLSUConstants { 519 // def apply (eew: UInt): UInt = { 520 // (LookupTree(eew,List( 521 // "b000".U -> "b000".U , // 1 522 // "b101".U -> "b001".U , // 2 523 // "b110".U -> "b010".U , // 4 524 // "b111".U -> "b011".U // 8 525 // )))} 526 def apply(eew: UInt): UInt = ZeroExt(eew(1, 0), ewBits) 527} 528 529object GenRealFlowNum { 530 /** 531 * unit-stride instructions don't use this method; 532 * other instructions generate realFlowNum by EmulDataSize >> eew(1,0), 533 * EmulDataSize means the number of bytes that need to be written to the register, 534 * eew(1,0) means the number of bytes written at once. 535 * 536 * @param instType As the name implies. 537 * @param emul As the name implies. 538 * @param lmul As the name implies. 539 * @param eew As the name implies. 540 * @param sew As the name implies. 541 * @param isSegment Only modules related to segment need to be set to true. 542 * @return FlowNum of instruction. 543 * 544 */ 545 def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, isSegment: Boolean = false): UInt = { 546 require(instType.getWidth == 3, "The instType width must be 3, (isSegment, mop)") 547 // Because the new segmentunit is needed. But the previous implementation is retained for the time being in case of emergency. 548 val segmentIndexFlowNum = if (isSegment) (MulDataSize(lmul) >> sew(1,0)).asUInt 549 else Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt) 550 (LookupTree(instType,List( 551 "b000".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // store use, load do not use 552 "b010".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // strided 553 "b001".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered 554 "b011".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered 555 "b100".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment unit-stride 556 "b110".U -> (MulDataSize(emul) >> eew(1,0)).asUInt, // segment strided 557 "b101".U -> segmentIndexFlowNum, // segment indexed-unordered 558 "b111".U -> segmentIndexFlowNum // segment indexed-ordered 559 )))} 560} 561 562object GenRealFlowLog2 extends VLSUConstants { 563 /** 564 * GenRealFlowLog2 = Log2(GenRealFlowNum) 565 * 566 * @param instType As the name implies. 567 * @param emul As the name implies. 568 * @param lmul As the name implies. 569 * @param eew As the name implies. 570 * @param sew As the name implies. 571 * @param isSegment Only modules related to segment need to be set to true. 572 * @return FlowNumLog2 of instruction. 573 */ 574 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, isSegment: Boolean = false): UInt = { 575 require(instType.getWidth == 3, "The instType width must be 3, (isSegment, mop)") 576 val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul) 577 val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul) 578 val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew(1, 0) 579 val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0) 580 // Because the new segmentunit is needed. But the previous implementation is retained for the time being in case of emergency. 581 val segmentIndexFlowLog2 = if (isSegment) sewRealFlowLog2 else Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2) 582 (LookupTree(instType, List( 583 "b000".U -> eewRealFlowLog2, // unit-stride 584 "b010".U -> eewRealFlowLog2, // strided 585 "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered 586 "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered 587 "b100".U -> eewRealFlowLog2, // segment unit-stride 588 "b110".U -> eewRealFlowLog2, // segment strided 589 "b101".U -> segmentIndexFlowLog2, // segment indexed-unordered 590 "b111".U -> segmentIndexFlowLog2, // segment indexed-ordered 591 ))) 592 } 593} 594 595/** 596 * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx 597 * inside the uop. 598 */ 599object GenElemIdx extends VLSUConstants { 600 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, 601 uopIdx: UInt, flowIdx: UInt): UInt = { 602 val isIndexed = instType(0).asBool 603 val eewUopFlowsLog2 = Mux(emul.asSInt > 0.S, 0.U, emul) + log2Up(VLENB).U - eew(1, 0) 604 val sewUopFlowsLog2 = Mux(lmul.asSInt > 0.S, 0.U, lmul) + log2Up(VLENB).U - sew(1, 0) 605 val uopFlowsLog2 = Mux( 606 isIndexed, 607 Mux(emul.asSInt > lmul.asSInt, eewUopFlowsLog2, sewUopFlowsLog2), 608 eewUopFlowsLog2 609 ) 610 LookupTree(uopFlowsLog2, List( 611 0.U -> uopIdx, 612 1.U -> uopIdx ## flowIdx(0), 613 2.U -> uopIdx ## flowIdx(1, 0), 614 3.U -> uopIdx ## flowIdx(2, 0), 615 4.U -> uopIdx ## flowIdx(3, 0) 616 )) 617 } 618} 619 620/** 621 * GenVLMAX calculates VLMAX, which equals MUL * ew 622 */ 623object GenVLMAXLog2 extends VLSUConstants { 624 def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew 625} 626object GenVLMAX { 627 def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew) 628} 629/** 630 * generate mask base on vlmax 631 * example: vlmax = b100, max = b011 632 * */ 633object GenVlMaxMask{ 634 def apply(vlmax: UInt, length: Int): UInt = (vlmax - 1.U)(length-1, 0) 635} 636 637object GenUSWholeRegVL extends VLSUConstants { 638 def apply(nfields: UInt, eew: UInt): UInt = { 639 LookupTree(eew(1, 0), List( 640 "b00".U -> (nfields << (log2Up(VLENB) - 0)), 641 "b01".U -> (nfields << (log2Up(VLENB) - 1)), 642 "b10".U -> (nfields << (log2Up(VLENB) - 2)), 643 "b11".U -> (nfields << (log2Up(VLENB) - 3)) 644 )) 645 } 646} 647object GenUSWholeEmul extends VLSUConstants{ 648 def apply(nf: UInt): UInt={ 649 LookupTree(nf,List( 650 "b000".U -> "b000".U(mulBits.W), 651 "b001".U -> "b001".U(mulBits.W), 652 "b011".U -> "b010".U(mulBits.W), 653 "b111".U -> "b011".U(mulBits.W) 654 )) 655 } 656} 657 658 659object GenUSMaskRegVL extends VLSUConstants { 660 def apply(vl: UInt): UInt = { 661 Mux(vl(2,0) === 0.U , (vl >> 3.U), ((vl >> 3.U) + 1.U)) 662 } 663} 664 665object GenUopByteMask { 666 def apply(flowMask: UInt, alignedType: UInt): UInt = { 667 LookupTree(alignedType, List( 668 "b000".U -> flowMask, 669 "b001".U -> FillInterleaved(2, flowMask), 670 "b010".U -> FillInterleaved(4, flowMask), 671 "b011".U -> FillInterleaved(8, flowMask), 672 "b100".U -> FillInterleaved(16, flowMask) 673 )) 674 } 675} 676 677object GenVdIdxInField extends VLSUConstants { 678 def apply(instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 679 val vdIdx = Wire(UInt(log2Up(maxMUL).W)) 680 when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || lmul.asSInt > emul.asSInt) { 681 // Unit-stride or Strided, or indexed with lmul >= emul 682 vdIdx := uopIdx 683 }.otherwise { 684 // Indexed with lmul <= emul 685 val multiple = emul - lmul 686 val uopIdxWidth = uopIdx.getWidth 687 vdIdx := LookupTree(multiple, List( 688 0.U -> uopIdx, 689 1.U -> (uopIdx >> 1), 690 2.U -> (uopIdx >> 2), 691 3.U -> (uopIdx >> 3) 692 )) 693 } 694 vdIdx 695 } 696} 697/** 698* Use start and vl to generate flow activative mask 699* mod = true fill 0 700* mod = false fill 1 701*/ 702object GenFlowMask extends VLSUConstants { 703 def apply(elementMask: UInt, start: UInt, vl: UInt , mod: Boolean): UInt = { 704 val startMask = ~UIntToMask(start, VLEN) 705 val vlMask = UIntToMask(vl, VLEN) 706 val maskVlStart = vlMask & startMask 707 if(mod){ 708 elementMask & maskVlStart 709 } 710 else{ 711 (~elementMask).asUInt & maskVlStart 712 } 713 } 714} 715 716object CheckAligned extends VLSUConstants { 717 def apply(addr: UInt): UInt = { 718 val aligned_16 = (addr(0) === 0.U) // 16-bit 719 val aligned_32 = (addr(1,0) === 0.U) // 32-bit 720 val aligned_64 = (addr(2,0) === 0.U) // 64-bit 721 val aligned_128 = (addr(3,0) === 0.U) // 128-bit 722 Cat(true.B, aligned_16, aligned_32, aligned_64, aligned_128) 723 } 724} 725 726/** 727 search if mask have continue 'len' bit '1' 728 mask: source mask 729 len: search length 730*/ 731object GenPackMask{ 732 def leadX(mask: Seq[Bool], len: Int): Bool = { 733 if(len == 1){ 734 mask.head 735 } 736 else{ 737 leadX(mask.drop(1),len-1) & mask.head 738 } 739 } 740 def leadOneVec(shiftMask: Seq[Bool]): UInt = { 741 // max is 64-bit, so the max num of flow to pack is 8 742 743 val lead1 = leadX(shiftMask, 1) // continue 1 bit 744 val lead2 = leadX(shiftMask, 2) // continue 2 bit 745 val lead4 = leadX(shiftMask, 4) // continue 4 bit 746 val lead8 = leadX(shiftMask, 8) // continue 8 bit 747 val lead16 = leadX(shiftMask, 16) // continue 16 bit 748 Cat(lead1, lead2, lead4, lead8, lead16) 749 } 750 751 def apply(shiftMask: UInt) = { 752 // pack mask 753 val packMask = leadOneVec(shiftMask.asBools) 754 packMask 755 } 756} 757/** 758PackEnable = (LeadXVec >> eew) & alignedVec, where the 0th bit represents the ability to merge into a 64 bit flow, the second bit represents the ability to merge into a 32 bit flow, and so on. 759 760example: 761 addr = 0x0, activeMask = b00011100101111, flowIdx = 0, eew = 0(8-bit) 762 763 step 0 : addrAlignedVec = (1, 1, 1, 1) elemIdxAligned = (1, 1, 1, 1) 764 step 1 : activePackVec = (1, 1, 1, 0), inactivePackVec = (0, 0, 0, 0) 765 step 2 : activePackEnable = (1, 1, 1, 0), inactivePackVec = (0, 0, 0, 0) 766 767 we can package 4 8-bit activative flows into a 32-bit flow. 768*/ 769object GenPackVec extends VLSUConstants{ 770 def apply(addr: UInt, shiftMask: UInt, eew: UInt, elemIdx: UInt): UInt = { 771 val addrAlignedVec = CheckAligned(addr) 772 val elemIdxAligned = CheckAligned(elemIdx) 773 val packMask = GenPackMask(shiftMask) 774 // generate packVec 775 val packVec = addrAlignedVec & elemIdxAligned & (packMask.asUInt >> eew) 776 777 packVec 778 } 779} 780 781object GenPackAlignedType extends VLSUConstants{ 782 def apply(packVec: UInt): UInt = { 783 val packAlignedType = PriorityMux(Seq( 784 packVec(0) -> "b100".U, 785 packVec(1) -> "b011".U, 786 packVec(2) -> "b010".U, 787 packVec(3) -> "b001".U, 788 packVec(4) -> "b000".U 789 )) 790 packAlignedType 791 } 792} 793 794object GenPackNum extends VLSUConstants{ 795 def apply(alignedType: UInt, packAlignedType: UInt): UInt = { 796 (1.U << (packAlignedType - alignedType)).asUInt 797 } 798} 799 800object genVWmask128 { 801 def apply(addr: UInt, sizeEncode: UInt): UInt = { 802 (LookupTree(sizeEncode, List( 803 "b000".U -> 0x1.U, //0001 << addr(2:0) 804 "b001".U -> 0x3.U, //0011 805 "b010".U -> 0xf.U, //1111 806 "b011".U -> 0xff.U, //11111111 807 "b100".U -> 0xffff.U //1111111111111111 808 )) << addr(3, 0)).asUInt 809 } 810} 811/* 812* only use in max length is 128 813*/ 814object genVWdata { 815 def apply(data: UInt, sizeEncode: UInt): UInt = { 816 LookupTree(sizeEncode, List( 817 "b000".U -> Fill(16, data(7, 0)), 818 "b001".U -> Fill(8, data(15, 0)), 819 "b010".U -> Fill(4, data(31, 0)), 820 "b011".U -> Fill(2, data(63,0)), 821 "b100".U -> data(127,0) 822 )) 823 } 824} 825 826object genUSSplitAddr{ 827 def apply(addr: UInt, index: UInt): UInt = { 828 val tmpAddr = Cat(addr(38, 4), 0.U(4.W)) 829 val nextCacheline = tmpAddr + 16.U 830 LookupTree(index, List( 831 0.U -> tmpAddr, 832 1.U -> nextCacheline 833 )) 834 } 835} 836 837object genUSSplitMask{ 838 def apply(mask: UInt, index: UInt, addrOffset: UInt): UInt = { 839 val tmpMask = Cat(0.U(16.W),mask) << addrOffset // 32-bits 840 LookupTree(index, List( 841 0.U -> tmpMask(15, 0), 842 1.U -> tmpMask(31, 16), 843 )) 844 } 845} 846 847object genUSSplitData{ 848 def apply(data: UInt, index: UInt, addrOffset: UInt): UInt = { 849 val tmpData = WireInit(0.U(256.W)) 850 val lookupTable = (0 until 16).map{case i => 851 if(i == 0){ 852 i.U -> Cat(0.U(128.W), data) 853 }else{ 854 i.U -> Cat(0.U(((16-i)*8).W), data, 0.U((i*8).W)) 855 } 856 } 857 tmpData := LookupTree(addrOffset, lookupTable).asUInt 858 859 LookupTree(index, List( 860 0.U -> tmpData(127, 0), 861 1.U -> tmpData(255, 128) 862 )) 863 } 864} 865 866object genVSData extends VLSUConstants { 867 def apply(data: UInt, elemIdx: UInt, alignedType: UInt): UInt = { 868 LookupTree(alignedType, List( 869 "b000".U -> ZeroExt(LookupTree(elemIdx(3, 0), List.tabulate(VLEN/8)(i => i.U -> getByte(data, i))), VLEN), 870 "b001".U -> ZeroExt(LookupTree(elemIdx(2, 0), List.tabulate(VLEN/16)(i => i.U -> getHalfWord(data, i))), VLEN), 871 "b010".U -> ZeroExt(LookupTree(elemIdx(1, 0), List.tabulate(VLEN/32)(i => i.U -> getWord(data, i))), VLEN), 872 "b011".U -> ZeroExt(LookupTree(elemIdx(0), List.tabulate(VLEN/64)(i => i.U -> getDoubleWord(data, i))), VLEN), 873 "b100".U -> data // if have wider element, it will broken 874 )) 875 } 876} 877 878// TODO: more elegant 879object genVStride extends VLSUConstants { 880 def apply(uopIdx: UInt, stride: UInt): UInt = { 881 LookupTree(uopIdx, List( 882 0.U -> 0.U, 883 1.U -> stride, 884 2.U -> (stride << 1), 885 3.U -> ((stride << 1).asUInt + stride), 886 4.U -> (stride << 2), 887 5.U -> ((stride << 2).asUInt + stride), 888 6.U -> ((stride << 2).asUInt + (stride << 1)), 889 7.U -> ((stride << 2).asUInt + (stride << 1) + stride) 890 )) 891 } 892} 893/** 894 * generate uopOffset, not used in segment instruction 895 * */ 896object genVUopOffset extends VLSUConstants { 897 def apply(instType: UInt, isfof: Bool, uopidx: UInt, nf: UInt, eew: UInt, stride: UInt, alignedType: UInt): UInt = { 898 val uopInsidefield = (uopidx >> nf).asUInt // when nf == 0, is uopidx 899 900 val fofVUopOffset = (LookupTree(instType,List( 901 "b000".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // unit-stride fof 902 "b100".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // segment unit-stride fof 903 ))).asUInt 904 905 val otherVUopOffset = (LookupTree(instType,List( 906 "b000".U -> ( uopInsidefield << alignedType ) , // unit-stride 907 "b010".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // strided 908 "b001".U -> ( 0.U ) , // indexed-unordered 909 "b011".U -> ( 0.U ) , // indexed-ordered 910 "b100".U -> ( uopInsidefield << alignedType ) , // segment unit-stride 911 "b110".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // segment strided 912 "b101".U -> ( 0.U ) , // segment indexed-unordered 913 "b111".U -> ( 0.U ) // segment indexed-ordered 914 ))).asUInt 915 916 Mux(isfof, fofVUopOffset, otherVUopOffset) 917 } 918} 919 920 921 922object genVFirstUnmask extends VLSUConstants { 923 /** 924 * Find the lowest unmasked number of bits. 925 * example: 926 * mask = 16'b1111_1111_1110_0000 927 * return 5 928 * @param mask 16bits of mask. 929 * @return lowest unmasked number of bits. 930 */ 931 def apply(mask: UInt): UInt = { 932 require(mask.getWidth == 16, "The mask width must be 16") 933 val select = (0 until 16).zip(mask.asBools).map{case (i, v) => 934 (v, i.U) 935 } 936 PriorityMuxDefault(select, 0.U) 937 } 938 939 def apply(mask: UInt, regOffset: UInt): UInt = { 940 require(mask.getWidth == 16, "The mask width must be 16") 941 val realMask = (mask >> regOffset).asUInt 942 val select = (0 until 16).zip(realMask.asBools).map{case (i, v) => 943 (v, i.U) 944 } 945 PriorityMuxDefault(select, 0.U) 946 } 947} 948 949