1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27import xiangshan.mem._ 28import xiangshan.backend.fu.FuType 29import xiangshan.backend.fu.FuConfig._ 30import xiangshan.backend.datapath.NewPipelineConnect 31import freechips.rocketchip.diplomacy.BufferParams 32 33class MBufferBundle(implicit p: Parameters) extends VLSUBundle{ 34 val data = UInt(VLEN.W) 35 val mask = UInt(VLENB.W) 36 val flowNum = UInt(flowIdxBits.W) 37 val exceptionVec = ExceptionVec() 38 val uop = new DynInst 39 // val vdOffset = UInt(vOffsetBits.W) 40 val sourceType = VSFQFeedbackType() 41 val flushState = Bool() 42 val vdIdx = UInt(3.W) 43 val elemIdx = UInt(elemIdxBits.W) // element index 44 // for exception 45 val vstart = UInt(elemIdxBits.W) 46 val vl = UInt(elemIdxBits.W) 47 val vaNeedExt = Bool() 48 val vaddr = UInt(XLEN.W) 49 val gpaddr = UInt(GPAddrBits.W) 50 val isForVSnonLeafPTE= Bool() 51 val fof = Bool() 52 val vlmax = UInt(elemIdxBits.W) 53 54 def allReady(): Bool = (flowNum === 0.U) 55} 56 57abstract class BaseVMergeBuffer(isVStore: Boolean=false)(implicit p: Parameters) extends VLSUModule{ 58 val io = IO(new VMergeBufferIO(isVStore)) 59 60 // freeliset: store valid entries index. 61 // +---+---+--------------+-----+-----+ 62 // | 0 | 1 | ...... | n-2 | n-1 | 63 // +---+---+--------------+-----+-----+ 64 val freeList: FreeList 65 val uopSize: Int 66 val enqWidth = io.fromSplit.length 67 val deqWidth = io.uopWriteback.length 68 val pipeWidth = io.fromPipeline.length 69 lazy val fuCfg = if (isVStore) VstuCfg else VlduCfg 70 71 def EnqConnect(source: MergeBufferReq, sink: MBufferBundle) = { 72 sink.data := source.data 73 sink.mask := source.mask 74 sink.flowNum := source.flowNum 75 sink.exceptionVec := ExceptionNO.selectByFu(0.U.asTypeOf(ExceptionVec()), fuCfg) 76 sink.uop := source.uop 77 sink.sourceType := 0.U.asTypeOf(VSFQFeedbackType()) 78 sink.flushState := false.B 79 sink.vdIdx := source.vdIdx 80 sink.elemIdx := Fill(elemIdxBits, 1.U) 81 sink.fof := source.fof 82 sink.vlmax := source.vlmax 83 sink.vl := source.uop.vpu.vl 84 sink.vaddr := source.vaddr 85 sink.vstart := 0.U 86 } 87 def DeqConnect(source: MBufferBundle): MemExuOutput = { 88 val sink = WireInit(0.U.asTypeOf(new MemExuOutput(isVector = true))) 89 sink.data := source.data 90 sink.mask.get := source.mask 91 sink.uop := source.uop 92 sink.uop.exceptionVec := ExceptionNO.selectByFu(source.exceptionVec, fuCfg) 93 sink.uop.vpu.vmask := source.mask 94 sink.debug := 0.U.asTypeOf(new DebugBundle) 95 sink.vdIdxInField.get := source.vdIdx // Mgu needs to use this. 96 sink.vdIdx.get := source.vdIdx 97 sink.uop.vpu.vstart := source.vstart 98 sink.uop.vpu.vl := source.vl 99 sink 100 } 101 def ToLsqConnect(source: MBufferBundle): FeedbackToLsqIO = { 102 val sink = WireInit(0.U.asTypeOf(new FeedbackToLsqIO)) 103 val hasExp = ExceptionNO.selectByFu(source.exceptionVec, fuCfg).asUInt.orR 104 sink.robidx := source.uop.robIdx 105 sink.uopidx := source.uop.uopIdx 106 sink.feedback(VecFeedbacks.COMMIT) := !hasExp 107 sink.feedback(VecFeedbacks.FLUSH) := hasExp 108 sink.feedback(VecFeedbacks.LAST) := true.B 109 sink.vstart := source.vstart // TODO: if lsq need vl for fof? 110 sink.vaddr := source.vaddr 111 sink.vaNeedExt := source.vaNeedExt 112 sink.gpaddr := source.gpaddr 113 sink.isForVSnonLeafPTE := source.isForVSnonLeafPTE 114 sink.vl := source.vl 115 sink.exceptionVec := ExceptionNO.selectByFu(source.exceptionVec, fuCfg) 116 sink 117 } 118 119 120 val entries = Reg(Vec(uopSize, new MBufferBundle)) 121 val needCancel = WireInit(VecInit(Seq.fill(uopSize)(false.B))) 122 val allocated = RegInit(VecInit(Seq.fill(uopSize)(false.B))) 123 val freeMaskVec = WireInit(VecInit(Seq.fill(uopSize)(false.B))) 124 val uopFinish = RegInit(VecInit(Seq.fill(uopSize)(false.B))) 125 val needRSReplay = RegInit(VecInit(Seq.fill(uopSize)(false.B))) 126 // enq, from splitPipeline 127 // val allowEnqueue = 128 val cancelEnq = io.fromSplit.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 129 val canEnqueue = io.fromSplit.map(_.req.valid) 130 val needEnqueue = (0 until enqWidth).map{i => 131 canEnqueue(i) && !cancelEnq(i) 132 } 133 134 val freeCount = uopSize.U - freeList.io.validCount 135 136 for ((enq, i) <- io.fromSplit.zipWithIndex){ 137 freeList.io.doAllocate(i) := false.B 138 139 freeList.io.allocateReq(i) := true.B 140 141 val offset = PopCount(needEnqueue.take(i)) 142 val canAccept = freeList.io.canAllocate(offset) 143 val enqIndex = freeList.io.allocateSlot(offset) 144 enq.req.ready := freeCount >= (i + 1).U // for better timing 145 146 when(needEnqueue(i) && enq.req.ready){ 147 freeList.io.doAllocate(i) := true.B 148 // enqueue 149 allocated(enqIndex) := true.B 150 uopFinish(enqIndex) := false.B 151 needRSReplay(enqIndex) := false.B 152 153 EnqConnect(enq.req.bits, entries(enqIndex))// initial entry 154 } 155 156 enq.resp.bits.mBIndex := enqIndex 157 enq.resp.bits.fail := false.B 158 enq.resp.valid := freeCount >= (i + 1).U // for better timing 159 } 160 161 //redirect 162 for (i <- 0 until uopSize){ 163 needCancel(i) := entries(i).uop.robIdx.needFlush(io.redirect) && allocated(i) 164 when (needCancel(i)) { 165 allocated(i) := false.B 166 freeMaskVec(i) := true.B 167 uopFinish(i) := false.B 168 needRSReplay(i):= false.B 169 } 170 } 171 freeList.io.free := freeMaskVec.asUInt 172 //pipelineWriteback 173 // handle the situation where multiple ports are going to write the same uop queue entry 174 val mergePortMatrix = Wire(Vec(pipeWidth, Vec(pipeWidth, Bool()))) 175 val mergedByPrevPortVec = Wire(Vec(pipeWidth, Bool())) 176 (0 until pipeWidth).map{case i => (0 until pipeWidth).map{case j => 177 mergePortMatrix(i)(j) := (j == i).B || 178 (j > i).B && 179 io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex && 180 io.fromPipeline(j).valid 181 }} 182 (0 until pipeWidth).map{case i => 183 mergedByPrevPortVec(i) := (i != 0).B && Cat((0 until i).map(j => 184 io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex && 185 io.fromPipeline(j).valid)).orR 186 } 187 dontTouch(mergePortMatrix) 188 dontTouch(mergedByPrevPortVec) 189 190 // for exception, select exception, when multi port writeback exception, we need select oldest one 191 def selectOldest[T <: VecPipelineFeedbackIO](valid: Seq[Bool], bits: Seq[T], sel: Seq[UInt]): (Seq[Bool], Seq[T], Seq[UInt]) = { 192 assert(valid.length == bits.length) 193 assert(valid.length == sel.length) 194 if (valid.length == 0 || valid.length == 1) { 195 (valid, bits, sel) 196 } else if (valid.length == 2) { 197 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 198 for (i <- res.indices) { 199 res(i).valid := valid(i) 200 res(i).bits := bits(i) 201 } 202 val oldest = Mux(valid(0) && valid(1), 203 Mux(sel(0) < sel(1), 204 res(0), res(1)), 205 Mux(valid(0) && !valid(1), res(0), res(1))) 206 (Seq(oldest.valid), Seq(oldest.bits), Seq(0.U)) 207 } else { 208 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2), sel.take(sel.length / 2)) 209 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)), sel.takeRight(sel.length - (sel.length / 2))) 210 selectOldest(left._1 ++ right._1, left._2 ++ right._2, left._3 ++ right._3) 211 } 212 } 213 214 val pipeValid = io.fromPipeline.map(_.valid) 215 val pipeBits = io.fromPipeline.map(_.bits) 216 val wbElemIdx = pipeBits.map(_.elemIdx) 217 val wbMbIndex = pipeBits.map(_.mBIndex) 218 val wbElemIdxInField = wbElemIdx.zip(wbMbIndex).map(x => x._1 & (entries(x._2).vlmax - 1.U)) 219 220 val portHasExcp = pipeBits.zip(mergePortMatrix).map{case (port, v) => 221 (0 until pipeWidth).map{case i => 222 val pipeHasExcep = ExceptionNO.selectByFu(port.exceptionVec, fuCfg).asUInt.orR 223 (v(i) && ((pipeHasExcep && io.fromPipeline(i).bits.mask.orR) || TriggerAction.isDmode(port.trigger))) // this port have exception or merged port have exception 224 }.reduce(_ || _) 225 } 226 227 for((pipewb, i) <- io.fromPipeline.zipWithIndex){ 228 val entry = entries(wbMbIndex(i)) 229 val entryVeew = entry.uop.vpu.veew 230 val entryIsUS = LSUOpType.isAllUS(entry.uop.fuOpType) 231 val entryHasException = ExceptionNO.selectByFu(entry.exceptionVec, fuCfg).asUInt.orR 232 val entryExcp = entryHasException && entry.mask.orR 233 val entryVaddr = entry.vaddr 234 val entryVstart = entry.vstart 235 val entryElemIdx = entry.elemIdx 236 237 val sel = selectOldest(mergePortMatrix(i), pipeBits, wbElemIdxInField) 238 val selPort = sel._2 239 val selElemInfield = selPort(0).elemIdx & (entries(wbMbIndex(i)).vlmax - 1.U) 240 val selExceptionVec = selPort(0).exceptionVec 241 val selVaddr = selPort(0).vaddr 242 val selElemIdx = selPort(0).elemIdx 243 244 val isUSFirstUop = !selPort(0).elemIdx.orR 245 // Only the first unaligned uop of unit-stride needs to be offset. 246 // When unaligned, the lowest bit of mask is 0. 247 // example: 16'b1111_1111_1111_0000 248 val firstUnmask = genVFirstUnmask(selPort(0).mask).asUInt 249 val vaddrOffset = Mux(entryIsUS, firstUnmask, 0.U) 250 val vaddr = selVaddr + vaddrOffset 251 val vstart = Mux(entryIsUS, (selPort(0).vecVaddrOffset >> entryVeew).asUInt, selElemInfield) 252 253 // select oldest port to raise exception 254 when((((entryElemIdx >= selElemIdx) && entryExcp && portHasExcp(i)) || (!entryExcp && portHasExcp(i))) && pipewb.valid && !mergedByPrevPortVec(i)) { 255 entry.uop.trigger := selPort(0).trigger 256 entry.elemIdx := selElemIdx 257 when(!entry.fof || vstart === 0.U){ 258 // For fof loads, if element 0 raises an exception, vl is not modified, and the trap is taken. 259 entry.vstart := vstart 260 entry.exceptionVec := ExceptionNO.selectByFu(selExceptionVec, fuCfg) 261 entry.vaddr := vaddr 262 entry.vaNeedExt := selPort(0).vaNeedExt 263 entry.gpaddr := selPort(0).gpaddr 264 entry.isForVSnonLeafPTE := selPort(0).isForVSnonLeafPTE 265 }.otherwise{ 266 entry.vl := Mux(entries(wbMbIndex(i)).vl > vstart, vstart, entries(wbMbIndex(i)).vl) 267 } 268 } 269 } 270 271 // for pipeline writeback 272 for((pipewb, i) <- io.fromPipeline.zipWithIndex){ 273 val wbIndex = pipewb.bits.mBIndex 274 val flowNumOffset = Mux(pipewb.bits.usSecondInv, 275 2.U, 276 PopCount(mergePortMatrix(i))) 277 val sourceTypeNext = entries(wbIndex).sourceType | pipewb.bits.sourceType 278 val hasExp = ExceptionNO.selectByFu(pipewb.bits.exceptionVec, fuCfg).asUInt.orR 279 280 // if is VLoad, need latch 1 cycle to merge data. only flowNum and wbIndex need to latch 281 val latchWbValid = if(isVStore) pipewb.valid else RegNext(pipewb.valid) 282 val latchWbIndex = if(isVStore) wbIndex else RegEnable(wbIndex, pipewb.valid) 283 val latchFlowNum = if(isVStore) flowNumOffset else RegEnable(flowNumOffset, pipewb.valid) 284 val latchMergeByPre = if(isVStore) mergedByPrevPortVec(i) else RegEnable(mergedByPrevPortVec(i), pipewb.valid) 285 when(latchWbValid && !latchMergeByPre){ 286 entries(latchWbIndex).flowNum := entries(latchWbIndex).flowNum - latchFlowNum 287 } 288 289 when(pipewb.valid){ 290 entries(wbIndex).sourceType := sourceTypeNext 291 entries(wbIndex).flushState := pipewb.bits.flushState 292 } 293 when(pipewb.valid && !pipewb.bits.hit){ 294 needRSReplay(wbIndex) := true.B 295 } 296 pipewb.ready := true.B 297 XSError((entries(latchWbIndex).flowNum - latchFlowNum > entries(latchWbIndex).flowNum) && latchWbValid && !latchMergeByPre, "FlowWriteback overflow!!\n") 298 XSError(!allocated(latchWbIndex) && latchWbValid, "Writeback error flow!!\n") 299 } 300 // for inorder mem asscess 301 io.toSplit := DontCare 302 303 //uopwriteback(deq) 304 for (i <- 0 until uopSize){ 305 when(allocated(i) && entries(i).allReady()){ 306 uopFinish(i) := true.B 307 } 308 } 309 val selPolicy = SelectOne("circ", uopFinish, deqWidth) // select one entry to deq 310 private val pipelineOut = Wire(Vec(deqWidth, DecoupledIO(new MemExuOutput(isVector = true)))) 311 private val writeBackOut = Wire(Vec(deqWidth, DecoupledIO(new MemExuOutput(isVector = true)))) 312 private val writeBackOutExceptionVec = writeBackOut.map(_.bits.uop.exceptionVec) 313 for(((port, lsqport), i) <- (pipelineOut zip io.toLsq).zipWithIndex){ 314 val canGo = port.ready 315 val (selValid, selOHVec) = selPolicy.getNthOH(i + 1) 316 val entryIdx = OHToUInt(selOHVec) 317 val selEntry = entries(entryIdx) 318 val selAllocated = allocated(entryIdx) 319 val selFire = selValid && canGo 320 when(selFire){ 321 freeMaskVec(entryIdx) := selAllocated 322 allocated(entryIdx) := false.B 323 uopFinish(entryIdx) := false.B 324 needRSReplay(entryIdx):= false.B 325 } 326 //writeback connect 327 port.valid := selFire && selAllocated && !needRSReplay(entryIdx) && !selEntry.uop.robIdx.needFlush(io.redirect) 328 port.bits := DeqConnect(selEntry) 329 //to lsq 330 lsqport.bits := ToLsqConnect(selEntry) // when uopwriteback, free MBuffer entry, write to lsq 331 lsqport.valid:= selFire && selAllocated && !needRSReplay(entryIdx) 332 //to RS 333 val feedbackOut = WireInit(0.U.asTypeOf(io.feedback(i).bits)).suggestName(s"feedbackOut_${i}") 334 val feedbackValid = selFire && selAllocated 335 feedbackOut.hit := !needRSReplay(entryIdx) 336 feedbackOut.robIdx := selEntry.uop.robIdx 337 feedbackOut.sourceType := selEntry.sourceType 338 feedbackOut.flushState := selEntry.flushState 339 feedbackOut.dataInvalidSqIdx := DontCare 340 feedbackOut.sqIdx := selEntry.uop.sqIdx 341 feedbackOut.lqIdx := selEntry.uop.lqIdx 342 343 io.feedback(i).valid := RegNext(feedbackValid) 344 io.feedback(i).bits := RegEnable(feedbackOut, feedbackValid) 345 346 NewPipelineConnect( 347 port, writeBackOut(i), writeBackOut(i).fire, 348 Mux(port.fire, 349 selEntry.uop.robIdx.needFlush(io.redirect), 350 writeBackOut(i).bits.uop.robIdx.needFlush(io.redirect)), 351 Option(s"VMergebufferPipelineConnect${i}") 352 ) 353 io.uopWriteback(i) <> writeBackOut(i) 354 io.uopWriteback(i).bits.uop.exceptionVec := ExceptionNO.selectByFu(writeBackOutExceptionVec(i), fuCfg) 355 } 356 357 QueuePerf(uopSize, freeList.io.validCount, freeList.io.validCount === 0.U) 358} 359 360class VLMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=false){ 361 override lazy val uopSize = VlMergeBufferSize 362 println(s"VLMergeBuffer Size: ${VlMergeBufferSize}") 363 override lazy val freeList = Module(new FreeList( 364 size = uopSize, 365 allocWidth = VecLoadPipelineWidth, 366 freeWidth = deqWidth, 367 enablePreAlloc = false, 368 moduleName = "VLoad MergeBuffer freelist" 369 )) 370 371 //merge data 372 val flowWbElemIdx = Wire(Vec(pipeWidth, UInt(elemIdxBits.W))) 373 val flowWbElemIdxInVd = Wire(Vec(pipeWidth, UInt(elemIdxBits.W))) 374 val pipewbValidReg = Wire(Vec(pipeWidth, Bool())) 375 val wbIndexReg = Wire(Vec(pipeWidth, UInt(vlmBindexBits.W))) 376 val mergeDataReg = Wire(Vec(pipeWidth, UInt(VLEN.W))) 377 378 for((pipewb, i) <- io.fromPipeline.zipWithIndex){ 379 /** step0 **/ 380 val wbIndex = pipewb.bits.mBIndex 381 val alignedType = pipewb.bits.alignedType 382 val elemIdxInsideVd = pipewb.bits.elemIdxInsideVd 383 flowWbElemIdx(i) := pipewb.bits.elemIdx 384 flowWbElemIdxInVd(i) := elemIdxInsideVd.get 385 386 val oldData = PriorityMux(Seq( 387 (pipewbValidReg(0) && (wbIndexReg(0) === wbIndex)) -> mergeDataReg(0), 388 (pipewbValidReg(1) && (wbIndexReg(1) === wbIndex)) -> mergeDataReg(1), 389 (pipewbValidReg(2) && (wbIndexReg(2) === wbIndex)) -> mergeDataReg(2), 390 true.B -> entries(wbIndex).data // default use entries_data 391 )) 392 val mergedData = mergeDataWithElemIdx( 393 oldData = oldData, 394 newData = io.fromPipeline.map(_.bits.vecdata.get), 395 alignedType = alignedType(1,0), 396 elemIdx = flowWbElemIdxInVd, 397 valids = mergePortMatrix(i) 398 ) 399 /* this only for unit-stride load data merge 400 * cycle0: broden 128-bits to 256-bits (max 6 to 1) 401 * cycle1: select 128-bits data from 256-bits (16 to 1) 402 */ 403 val (brodenMergeData, brodenMergeMask) = mergeDataByIndex( 404 data = io.fromPipeline.map(_.bits.vecdata.get).drop(i), 405 mask = io.fromPipeline.map(_.bits.mask).drop(i), 406 index = io.fromPipeline(i).bits.elemIdxInsideVd.get, 407 valids = mergePortMatrix(i).drop(i) 408 ) 409 /** step1 **/ 410 pipewbValidReg(i) := RegNext(pipewb.valid) 411 wbIndexReg(i) := RegEnable(wbIndex, pipewb.valid) 412 mergeDataReg(i) := RegEnable(mergedData, pipewb.valid) // for not Unit-stride 413 val brodenMergeDataReg = RegEnable(brodenMergeData, pipewb.valid) // only for Unit-stride 414 val brodenMergeMaskReg = RegEnable(brodenMergeMask, pipewb.valid) 415 val mergedByPrevPortReg = RegEnable(mergedByPrevPortVec(i), pipewb.valid) 416 val regOffsetReg = RegEnable(pipewb.bits.reg_offset.get, pipewb.valid) // only for Unit-stride 417 val isusMerge = RegEnable(alignedType(2), pipewb.valid) 418 419 val usSelData = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => getNoAlignedSlice(brodenMergeDataReg, i, 128)}) 420 val usSelMask = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => brodenMergeMaskReg(16 + i - 1, i)}) 421 val usMergeData = mergeDataByByte(entries(wbIndexReg(i)).data, usSelData, usSelMask) 422 when(pipewbValidReg(i) && !mergedByPrevPortReg){ 423 entries(wbIndexReg(i)).data := Mux(isusMerge, usMergeData, mergeDataReg(i)) // if aligned(2) == 1, is Unit-Stride inst 424 } 425 } 426} 427 428class VSMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=true){ 429 override lazy val uopSize = VsMergeBufferSize 430 println(s"VSMergeBuffer Size: ${VsMergeBufferSize}") 431 override lazy val freeList = Module(new FreeList( 432 size = uopSize, 433 allocWidth = VecStorePipelineWidth, 434 freeWidth = deqWidth, 435 enablePreAlloc = false, 436 moduleName = "VStore MergeBuffer freelist" 437 )) 438 override def DeqConnect(source: MBufferBundle): MemExuOutput = { 439 val sink = Wire(new MemExuOutput(isVector = true)) 440 sink.data := DontCare 441 sink.mask.get := DontCare 442 sink.uop := source.uop 443 sink.uop.exceptionVec := source.exceptionVec 444 sink.debug := 0.U.asTypeOf(new DebugBundle) 445 sink.vdIdxInField.get := DontCare 446 sink.vdIdx.get := DontCare 447 sink.uop.vpu.vstart := source.vstart 448 sink 449 } 450} 451