1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27import xiangshan.mem._ 28import xiangshan.backend.fu.FuType 29import freechips.rocketchip.diplomacy.BufferParams 30 31class MBufferBundle(implicit p: Parameters) extends VLSUBundle{ 32 val data = UInt(VLEN.W) 33 val mask = UInt(VLENB.W) 34 val flowNum = UInt(flowIdxBits.W) 35 val exceptionVec = ExceptionVec() 36 val uop = new DynInst 37 // val vdOffset = UInt(vOffsetBits.W) 38 val sourceType = VSFQFeedbackType() 39 val flushState = Bool() 40 val vdIdx = UInt(3.W) 41 // for exception 42 val vstart = UInt(elemIdxBits.W) 43 val vl = UInt(elemIdxBits.W) 44 val vaddr = UInt(VAddrBits.W) 45 val fof = Bool() 46 val vlmax = UInt(elemIdxBits.W) 47 48 def allReady(): Bool = (flowNum === 0.U) 49} 50 51abstract class BaseVMergeBuffer(isVStore: Boolean=false)(implicit p: Parameters) extends VLSUModule{ 52 val io = IO(new VMergeBufferIO(isVStore)) 53 54 def EnqConnect(source: MergeBufferReq, sink: MBufferBundle) = { 55 sink.data := source.data 56 sink.mask := source.mask 57 sink.flowNum := source.flowNum 58 sink.exceptionVec := 0.U.asTypeOf(ExceptionVec()) 59 sink.uop := source.uop 60 sink.sourceType := 0.U.asTypeOf(VSFQFeedbackType()) 61 sink.flushState := false.B 62 sink.vdIdx := source.vdIdx 63 sink.fof := source.fof 64 sink.vlmax := source.vlmax 65 sink.vl := source.uop.vpu.vl 66 sink.vstart := 0.U 67 } 68 def DeqConnect(source: MBufferBundle): MemExuOutput = { 69 val sink = WireInit(0.U.asTypeOf(new MemExuOutput(isVector = true))) 70 sink.data := source.data 71 sink.mask.get := source.mask 72 sink.uop := source.uop 73 sink.uop.exceptionVec := source.exceptionVec 74 sink.uop.vpu.vmask := source.mask 75 sink.debug := 0.U.asTypeOf(new DebugBundle) 76 sink.vdIdxInField.get := source.vdIdx // Mgu needs to use this. 77 sink.vdIdx.get := source.vdIdx 78 sink.uop.vpu.vstart := source.vstart 79 sink.uop.vpu.vl := source.vl 80 sink 81 } 82 def ToLsqConnect(source: MBufferBundle): FeedbackToLsqIO = { 83 val sink = WireInit(0.U.asTypeOf(new FeedbackToLsqIO)) 84 val hasExp = source.exceptionVec.asUInt.orR 85 sink.robidx := source.uop.robIdx 86 sink.uopidx := source.uop.uopIdx 87 sink.feedback(VecFeedbacks.COMMIT) := !hasExp 88 sink.feedback(VecFeedbacks.FLUSH) := hasExp 89 sink.feedback(VecFeedbacks.LAST) := true.B 90 sink.vstart := source.vstart // TODO: if lsq need vl for fof? 91 sink.vaddr := source.vaddr 92 sink.vl := source.vl 93 sink.exceptionVec := source.exceptionVec 94 sink 95 } 96 // freeliset: store valid entries index. 97 // +---+---+--------------+-----+-----+ 98 // | 0 | 1 | ...... | n-2 | n-1 | 99 // +---+---+--------------+-----+-----+ 100 val freeList: FreeList 101 val uopSize: Int 102 val enqWidth = io.fromSplit.length 103 val deqWidth = io.uopWriteback.length 104 val pipeWidth = io.fromPipeline.length 105 106 val entries = Reg(Vec(uopSize, new MBufferBundle)) 107 val needCancel = WireInit(VecInit(Seq.fill(uopSize)(false.B))) 108 val allocated = RegInit(VecInit(Seq.fill(uopSize)(false.B))) 109 val freeMaskVec = WireInit(VecInit(Seq.fill(uopSize)(false.B))) 110 val uopFinish = RegInit(VecInit(Seq.fill(uopSize)(false.B))) 111 val needRSReplay = RegInit(VecInit(Seq.fill(uopSize)(false.B))) 112 // enq, from splitPipeline 113 // val allowEnqueue = 114 val cancelEnq = io.fromSplit.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 115 val canEnqueue = io.fromSplit.map(_.req.valid) 116 val needEnqueue = (0 until enqWidth).map{i => 117 canEnqueue(i) && !cancelEnq(i) 118 } 119 120 for ((enq, i) <- io.fromSplit.zipWithIndex){ 121 freeList.io.doAllocate(i) := false.B 122 123 freeList.io.allocateReq(i) := true.B 124 125 val offset = PopCount(needEnqueue.take(i)) 126 val canAccept = freeList.io.canAllocate(offset) 127 val enqIndex = freeList.io.allocateSlot(offset) 128 enq.req.ready := canAccept 129 130 when(needEnqueue(i) && enq.req.ready){ 131 freeList.io.doAllocate(i) := true.B 132 // enqueue 133 allocated(enqIndex) := true.B 134 uopFinish(enqIndex) := false.B 135 needRSReplay(enqIndex) := false.B 136 137 EnqConnect(enq.req.bits, entries(enqIndex))// initial entry 138 } 139 140 enq.resp.bits.mBIndex := enqIndex 141 enq.resp.bits.fail := false.B 142 enq.resp.valid := canAccept //resp in 1 cycle 143 } 144 145 //redirect 146 for (i <- 0 until uopSize){ 147 needCancel(i) := entries(i).uop.robIdx.needFlush(io.redirect) && allocated(i) 148 when (needCancel(i)) { 149 allocated(i) := false.B 150 freeMaskVec(i) := true.B 151 uopFinish(i) := false.B 152 needRSReplay(i):= false.B 153 } 154 } 155 freeList.io.free := freeMaskVec.asUInt 156 //pipelineWriteback 157 // handle the situation where multiple ports are going to write the same uop queue entry 158 val mergePortMatrix = Wire(Vec(pipeWidth, Vec(pipeWidth, Bool()))) 159 val mergedByPrevPortVec = Wire(Vec(pipeWidth, Bool())) 160 (0 until pipeWidth).map{case i => (0 until pipeWidth).map{case j => 161 mergePortMatrix(i)(j) := (j == i).B || 162 (j > i).B && 163 io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex && 164 io.fromPipeline(j).valid 165 }} 166 (0 until pipeWidth).map{case i => 167 mergedByPrevPortVec(i) := (i != 0).B && Cat((0 until i).map(j => 168 io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex && 169 io.fromPipeline(j).valid)).orR 170 } 171 dontTouch(mergePortMatrix) 172 dontTouch(mergedByPrevPortVec) 173 174 // for exception, select exception, when multi port writeback exception, we need select oldest one 175 def selectOldest[T <: VecPipelineFeedbackIO](valid: Seq[Bool], bits: Seq[T], sel: Seq[UInt]): (Seq[Bool], Seq[T], Seq[UInt]) = { 176 assert(valid.length == bits.length) 177 assert(valid.length == sel.length) 178 if (valid.length == 0 || valid.length == 1) { 179 (valid, bits, sel) 180 } else if (valid.length == 2) { 181 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 182 for (i <- res.indices) { 183 res(i).valid := valid(i) 184 res(i).bits := bits(i) 185 } 186 val oldest = Mux(valid(0) && valid(1), 187 Mux(sel(0) < sel(1), 188 res(0), res(1)), 189 Mux(valid(0) && !valid(1), res(0), res(1))) 190 (Seq(oldest.valid), Seq(oldest.bits), Seq(0.U)) 191 } else { 192 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2), sel.take(sel.length / 2)) 193 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)), sel.takeRight(sel.length - (sel.length / 2))) 194 selectOldest(left._1 ++ right._1, left._2 ++ right._2, left._3 ++ right._3) 195 } 196 } 197 198 val pipeValid = io.fromPipeline.map(_.valid) 199 val pipeBits = io.fromPipeline.map(x => x.bits) 200 val wbElemIdx = pipeBits.map(_.elemIdx) 201 val wbMbIndex = pipeBits.map(_.mBIndex) 202 val wbElemIdxInField = wbElemIdx.zip(wbMbIndex).map(x => x._1 & (entries(x._2).vlmax - 1.U)) 203 204 val portHasExcp = pipeBits.zip(mergePortMatrix).map{case (port, v) => 205 (0 until pipeWidth).map{case i => 206 (v(i) && io.fromPipeline(i).bits.exceptionVec.asUInt.orR && io.fromPipeline(i).bits.mask.orR) // this port have exception or merged port have exception 207 }.reduce(_ || _) 208 } 209 210 for((pipewb, i) <- io.fromPipeline.zipWithIndex){ 211 val entry = entries(wbMbIndex(i)) 212 val entryVeew = entry.uop.vpu.veew 213 val entryIsUS = LSUOpType.isUStride(entry.uop.fuOpType) 214 val entryExcp = entry.exceptionVec.asUInt.orR && entry.mask.orR 215 216 val sel = selectOldest(mergePortMatrix(i), pipeBits, wbElemIdxInField) 217 val selPort = sel._2 218 val selElemInfield = selPort(0).elemIdx & (entries(wbMbIndex(i)).vlmax - 1.U) 219 val selExceptionVec = selPort(0).exceptionVec 220 221 val USFirstUopOffset = (searchVFirstUnMask(selPort(0).mask) << entryVeew).asUInt 222 val isUSFirstUop = !selPort(0).elemIdx.orR 223 val vaddr = selPort(0).vaddr + Mux(entryIsUS && isUSFirstUop, USFirstUopOffset, 0.U) 224 225 // select oldest port to raise exception 226 when((((entries(wbMbIndex(i)).vstart >= selElemInfield) && entryExcp && portHasExcp(i)) || (!entryExcp && portHasExcp(i))) && pipewb.valid && !mergedByPrevPortVec(i)){ 227 when(!entries(wbMbIndex(i)).fof || selElemInfield === 0.U){ 228 // For fof loads, if element 0 raises an exception, vl is not modified, and the trap is taken. 229 entries(wbMbIndex(i)).vstart := selElemInfield 230 entries(wbMbIndex(i)).exceptionVec := selExceptionVec 231 entries(wbMbIndex(i)).vaddr := vaddr 232 }.otherwise{ 233 entries(wbMbIndex(i)).vl := selElemInfield 234 } 235 } 236 } 237 238 // for pipeline writeback 239 for((pipewb, i) <- io.fromPipeline.zipWithIndex){ 240 val wbIndex = pipewb.bits.mBIndex 241 val flowNumOffset = Mux(pipewb.bits.usSecondInv, 242 2.U, 243 PopCount(mergePortMatrix(i))) 244 val sourceTypeNext = entries(wbIndex).sourceType | pipewb.bits.sourceType 245 val hasExp = pipewb.bits.exceptionVec.asUInt.orR 246 247 // if is VLoad, need latch 1 cycle to merge data. only flowNum and wbIndex need to latch 248 val latchWbValid = if(isVStore) pipewb.valid else RegNext(pipewb.valid) 249 val latchWbIndex = if(isVStore) wbIndex else RegEnable(wbIndex, pipewb.valid) 250 val latchFlowNum = if(isVStore) flowNumOffset else RegEnable(flowNumOffset, pipewb.valid) 251 val latchMergeByPre = if(isVStore) mergedByPrevPortVec(i) else RegEnable(mergedByPrevPortVec(i), pipewb.valid) 252 when(latchWbValid && !latchMergeByPre){ 253 entries(latchWbIndex).flowNum := entries(latchWbIndex).flowNum - latchFlowNum 254 } 255 256 when(pipewb.valid){ 257 entries(wbIndex).sourceType := sourceTypeNext 258 entries(wbIndex).flushState := pipewb.bits.flushState 259 } 260 when(pipewb.valid && !pipewb.bits.hit){ 261 needRSReplay(wbIndex) := true.B 262 } 263 pipewb.ready := true.B 264 XSError((entries(latchWbIndex).flowNum - latchFlowNum > entries(latchWbIndex).flowNum) && latchWbValid && !latchMergeByPre, "FlowWriteback overflow!!\n") 265 XSError(!allocated(latchWbIndex) && latchWbValid, "Writeback error flow!!\n") 266 } 267 // for inorder mem asscess 268 io.toSplit := DontCare 269 270 //uopwriteback(deq) 271 for (i <- 0 until uopSize){ 272 when(allocated(i) && entries(i).allReady()){ 273 uopFinish(i) := true.B 274 } 275 } 276 val selPolicy = SelectOne("circ", uopFinish, deqWidth) // select one entry to deq 277 for(((port, lsqport), i) <- (io.uopWriteback zip io.toLsq).zipWithIndex){ 278 val canGo = port.ready 279 val (selValid, selOHVec) = selPolicy.getNthOH(i + 1) 280 val entryIdx = OHToUInt(selOHVec) 281 val selEntry = entries(entryIdx) 282 val selFire = selValid && canGo 283 when(selFire){ 284 freeMaskVec(entryIdx) := true.B 285 allocated(entryIdx) := false.B 286 uopFinish(entryIdx) := false.B 287 needRSReplay(entryIdx):= false.B 288 } 289 //writeback connect 290 port.valid := selFire && allocated(entryIdx) && !needRSReplay(entryIdx) && !selEntry.uop.robIdx.needFlush(io.redirect) 291 port.bits := DeqConnect(selEntry) 292 //to lsq 293 lsqport.bits := ToLsqConnect(selEntry) // when uopwriteback, free MBuffer entry, write to lsq 294 lsqport.valid:= selFire && allocated(entryIdx) && !needRSReplay(entryIdx) 295 //to RS 296 io.feedback(i).valid := selFire && allocated(entryIdx) 297 io.feedback(i).bits.hit := !needRSReplay(entryIdx) 298 io.feedback(i).bits.robIdx := selEntry.uop.robIdx 299 io.feedback(i).bits.sourceType := selEntry.sourceType 300 io.feedback(i).bits.flushState := selEntry.flushState 301 io.feedback(i).bits.dataInvalidSqIdx := DontCare 302 io.feedback(i).bits.uopIdx.get := selEntry.uop.uopIdx 303 } 304 305 QueuePerf(uopSize, freeList.io.validCount, freeList.io.validCount === 0.U) 306} 307 308class VLMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=false){ 309 override lazy val uopSize = VlMergeBufferSize 310 println(s"VLMergeBuffer Size: ${VlMergeBufferSize}") 311 override lazy val freeList = Module(new FreeList( 312 size = uopSize, 313 allocWidth = VecLoadPipelineWidth, 314 freeWidth = deqWidth, 315 enablePreAlloc = false, 316 moduleName = "VLoad MergeBuffer freelist" 317 )) 318 319 //merge data 320 val flowWbElemIdx = Wire(Vec(pipeWidth, UInt(elemIdxBits.W))) 321 val flowWbElemIdxInVd = Wire(Vec(pipeWidth, UInt(elemIdxBits.W))) 322 val pipewbValidReg = Wire(Vec(pipeWidth, Bool())) 323 val wbIndexReg = Wire(Vec(pipeWidth, UInt(vlmBindexBits.W))) 324 val mergeDataReg = Wire(Vec(pipeWidth, UInt(VLEN.W))) 325 326 for((pipewb, i) <- io.fromPipeline.zipWithIndex){ 327 /** step0 **/ 328 val wbIndex = pipewb.bits.mBIndex 329 val alignedType = pipewb.bits.alignedType 330 val elemIdxInsideVd = pipewb.bits.elemIdxInsideVd 331 flowWbElemIdx(i) := pipewb.bits.elemIdx 332 flowWbElemIdxInVd(i) := elemIdxInsideVd.get 333 334 val oldData = PriorityMux(Seq( 335 (pipewbValidReg(0) && (wbIndexReg(0) === wbIndex)) -> mergeDataReg(0), 336 (pipewbValidReg(1) && (wbIndexReg(1) === wbIndex)) -> mergeDataReg(1), 337 (pipewbValidReg(2) && (wbIndexReg(2) === wbIndex)) -> mergeDataReg(2), 338 true.B -> entries(wbIndex).data // default use entries_data 339 )) 340 val mergedData = mergeDataWithElemIdx( 341 oldData = oldData, 342 newData = io.fromPipeline.map(_.bits.vecdata.get), 343 alignedType = alignedType(1,0), 344 elemIdx = flowWbElemIdxInVd, 345 valids = mergePortMatrix(i) 346 ) 347 /* this only for unit-stride load data merge 348 * cycle0: broden 128-bits to 256-bits (max 6 to 1) 349 * cycle1: select 128-bits data from 256-bits (16 to 1) 350 */ 351 val (brodenMergeData, brodenMergeMask) = mergeDataByIndex( 352 data = io.fromPipeline.map(_.bits.vecdata.get).drop(i), 353 mask = io.fromPipeline.map(_.bits.mask).drop(i), 354 index = io.fromPipeline(i).bits.elemIdxInsideVd.get, 355 valids = mergePortMatrix(i).drop(i) 356 ) 357 /** step1 **/ 358 pipewbValidReg(i) := RegNext(pipewb.valid) 359 wbIndexReg(i) := RegEnable(wbIndex, pipewb.valid) 360 mergeDataReg(i) := RegEnable(mergedData, pipewb.valid) // for not Unit-stride 361 val brodenMergeDataReg = RegEnable(brodenMergeData, pipewb.valid) // only for Unit-stride 362 val brodenMergeMaskReg = RegEnable(brodenMergeMask, pipewb.valid) 363 val mergedByPrevPortReg = RegEnable(mergedByPrevPortVec(i), pipewb.valid) 364 val regOffsetReg = RegEnable(pipewb.bits.reg_offset.get, pipewb.valid) // only for Unit-stride 365 val isusMerge = RegEnable(alignedType(2), pipewb.valid) 366 367 val usSelData = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => getNoAlignedSlice(brodenMergeDataReg, i, 128)}) 368 val usSelMask = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => brodenMergeMaskReg(16 + i - 1, i)}) 369 val usMergeData = mergeDataByByte(entries(wbIndexReg(i)).data, usSelData, usSelMask) 370 when(pipewbValidReg(i) && !mergedByPrevPortReg){ 371 entries(wbIndexReg(i)).data := Mux(isusMerge, usMergeData, mergeDataReg(i)) // if aligned(2) == 1, is Unit-Stride inst 372 } 373 } 374} 375 376class VSMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=true){ 377 override lazy val uopSize = VsMergeBufferSize 378 println(s"VSMergeBuffer Size: ${VsMergeBufferSize}") 379 override lazy val freeList = Module(new FreeList( 380 size = uopSize, 381 allocWidth = VecStorePipelineWidth, 382 freeWidth = deqWidth, 383 enablePreAlloc = false, 384 moduleName = "VStore MergeBuffer freelist" 385 )) 386 override def DeqConnect(source: MBufferBundle): MemExuOutput = { 387 val sink = Wire(new MemExuOutput(isVector = true)) 388 sink.data := DontCare 389 sink.mask.get := DontCare 390 sink.uop := source.uop 391 sink.uop.exceptionVec := source.exceptionVec 392 sink.debug := 0.U.asTypeOf(new DebugBundle) 393 sink.vdIdxInField.get := DontCare 394 sink.vdIdx.get := DontCare 395 sink.uop.vpu.vstart := source.vstart 396 sink 397 } 398} 399