1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.frontend 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import xiangshan._ 23import utils._ 24import utility._ 25import xiangshan.ExceptionNO._ 26 27class IBufPtr(implicit p: Parameters) extends CircularQueuePtr[IBufPtr]( 28 p => p(XSCoreParamsKey).IBufSize 29) { 30} 31 32class IBufInBankPtr(implicit p: Parameters) extends CircularQueuePtr[IBufInBankPtr]( 33 p => p(XSCoreParamsKey).IBufSize / p(XSCoreParamsKey).IBufNBank 34) { 35} 36 37class IBufBankPtr(implicit p: Parameters) extends CircularQueuePtr[IBufBankPtr]( 38 p => p(XSCoreParamsKey).IBufNBank 39) { 40} 41 42class IBufferIO(implicit p: Parameters) extends XSBundle { 43 val flush = Input(Bool()) 44 val ControlRedirect = Input(Bool()) 45 val ControlBTBMissBubble = Input(Bool()) 46 val TAGEMissBubble = Input(Bool()) 47 val SCMissBubble = Input(Bool()) 48 val ITTAGEMissBubble = Input(Bool()) 49 val RASMissBubble = Input(Bool()) 50 val MemVioRedirect = Input(Bool()) 51 val in = Flipped(DecoupledIO(new FetchToIBuffer)) 52 val out = Vec(DecodeWidth, DecoupledIO(new CtrlFlow)) 53 val full = Output(Bool()) 54 val decodeCanAccept = Input(Bool()) 55 val stallReason = new StallReasonIO(DecodeWidth) 56} 57 58class IBufEntry(implicit p: Parameters) extends XSBundle { 59 val inst = UInt(32.W) 60 val pc = UInt(VAddrBits.W) 61 val foldpc = UInt(MemPredPCWidth.W) 62 val pd = new PreDecodeInfo 63 val pred_taken = Bool() 64 val ftqPtr = new FtqPtr 65 val ftqOffset = UInt(log2Ceil(PredictWidth).W) 66 val ipf = Bool() 67 val igpf = Bool() 68 val acf = Bool() 69 val crossPageIPFFix = Bool() 70 val triggered = new TriggerCf 71 val gpaddr = UInt(GPAddrBits.W) 72 73 def fromFetch(fetch: FetchToIBuffer, i: Int): IBufEntry = { 74 inst := fetch.instrs(i) 75 pc := fetch.pc(i) 76 foldpc := fetch.foldpc(i) 77 gpaddr := fetch.gpaddr(i) 78 pd := fetch.pd(i) 79 pred_taken := fetch.ftqOffset(i).valid 80 ftqPtr := fetch.ftqPtr 81 ftqOffset := fetch.ftqOffset(i).bits 82 ipf := fetch.ipf(i) 83 igpf:= fetch.igpf(i) 84 acf := fetch.acf(i) 85 crossPageIPFFix := fetch.crossPageIPFFix(i) 86 triggered := fetch.triggered(i) 87 this 88 } 89 90 def toCtrlFlow: CtrlFlow = { 91 val cf = Wire(new CtrlFlow) 92 cf.instr := inst 93 cf.pc := pc 94 cf.foldpc := foldpc 95 cf.exceptionVec := 0.U.asTypeOf(ExceptionVec()) 96 cf.exceptionVec(instrPageFault) := ipf 97 cf.exceptionVec(instrGuestPageFault) := igpf 98 cf.exceptionVec(instrAccessFault) := acf 99 cf.trigger := triggered 100 cf.pd := pd 101 cf.pred_taken := pred_taken 102 cf.crossPageIPFFix := crossPageIPFFix 103 cf.storeSetHit := DontCare 104 cf.waitForRobIdx := DontCare 105 cf.loadWaitBit := DontCare 106 cf.loadWaitStrict := DontCare 107 cf.ssid := DontCare 108 cf.ftqPtr := ftqPtr 109 cf.ftqOffset := ftqOffset 110 cf.gpaddr := gpaddr 111 cf 112 } 113} 114 115class IBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper with HasPerfEvents { 116 val io = IO(new IBufferIO) 117 118 // io alias 119 private val decodeCanAccept = io.decodeCanAccept 120 121 // Parameter Check 122 private val bankSize = IBufSize / IBufNBank 123 require(IBufSize % IBufNBank == 0, s"IBufNBank should divide IBufSize, IBufNBank: $IBufNBank, IBufSize: $IBufSize") 124 require(IBufNBank >= DecodeWidth, 125 s"IBufNBank should be equal or larger than DecodeWidth, IBufNBank: $IBufNBank, DecodeWidth: $DecodeWidth") 126 127 // IBuffer is organized as raw registers 128 // This is due to IBuffer is a huge queue, read & write port logic should be precisely controlled 129 // . + + E E E - . 130 // . + + E E E - . 131 // . . + E E E - . 132 // . . + E E E E - 133 // As shown above, + means enqueue, - means dequeue, E is current content 134 // When dequeue, read port is organized like a banked FIFO 135 // Dequeue reads no more than 1 entry from each bank sequentially, this can be exploit to reduce area 136 // Enqueue writes cannot benefit from this characteristic unless use a SRAM 137 // For detail see Enqueue and Dequeue below 138 private val ibuf: Vec[IBufEntry] = RegInit(VecInit.fill(IBufSize)(0.U.asTypeOf(new IBufEntry))) 139 private val bankedIBufView: Vec[Vec[IBufEntry]] = VecInit.tabulate(IBufNBank)( 140 bankID => VecInit.tabulate(bankSize)( 141 inBankOffset => ibuf(bankID + inBankOffset * IBufNBank) 142 ) 143 ) 144 145 146 // Bypass wire 147 private val bypassEntries = WireDefault(VecInit.fill(DecodeWidth)(0.U.asTypeOf(Valid(new IBufEntry)))) 148 // Normal read wire 149 private val deqEntries = WireDefault(VecInit.fill(DecodeWidth)(0.U.asTypeOf(Valid(new IBufEntry)))) 150 // Output register 151 private val outputEntries = RegInit(VecInit.fill(DecodeWidth)(0.U.asTypeOf(Valid(new IBufEntry)))) 152 153 // Between Bank 154 private val deqBankPtrVec: Vec[IBufBankPtr] = RegInit(VecInit.tabulate(DecodeWidth)(_.U.asTypeOf(new IBufBankPtr))) 155 private val deqBankPtr: IBufBankPtr = deqBankPtrVec(0) 156 private val deqBankPtrVecNext = Wire(deqBankPtrVec.cloneType) 157 // Inside Bank 158 private val deqInBankPtr: Vec[IBufInBankPtr] = RegInit(VecInit.fill(IBufNBank)(0.U.asTypeOf(new IBufInBankPtr))) 159 private val deqInBankPtrNext = Wire(deqInBankPtr.cloneType) 160 161 val deqPtr = RegInit(0.U.asTypeOf(new IBufPtr)) 162 val deqPtrNext = Wire(deqPtr.cloneType) 163 164 val enqPtrVec = RegInit(VecInit.tabulate(PredictWidth)(_.U.asTypeOf(new IBufPtr))) 165 val enqPtr = enqPtrVec(0) 166 167 val numTryEnq = WireDefault(0.U) 168 val numEnq = Mux(io.in.fire, numTryEnq, 0.U) 169 170 val useBypass = enqPtr === deqPtr && decodeCanAccept // empty and decode can accept insts 171 // Record the insts in output entries are from bypass or deq. 172 // Update deqPtr if they are from deq 173 val currentOutUseBypass = RegInit(false.B) 174 175 // The number of decode accepted insts. 176 // Since decode promises accepting insts in order, use priority encoder to simplify the accumulation. 177 private val numOut: UInt = PriorityMuxDefault(io.out.map(x => !x.ready) zip (0 until DecodeWidth).map(_.U), DecodeWidth.U) 178 private val numDeq = Mux(currentOutUseBypass, 0.U, numOut) 179 180 // counter current number of valid 181 val numValid = distanceBetween(enqPtr, deqPtr) 182 val numValidAfterDeq = numValid - numDeq 183 // counter next number of valid 184 val numValidNext = numValid + numEnq - numDeq 185 val allowEnq = RegInit(true.B) 186 val numFromFetch = Mux(io.in.valid, PopCount(io.in.bits.enqEnable), 0.U) 187 val numBypass = PopCount(bypassEntries.map(_.valid)) 188 189 allowEnq := (IBufSize - PredictWidth).U >= numValidNext // Disable when almost full 190 191 val enqOffset = VecInit.tabulate(PredictWidth)(i => PopCount(io.in.bits.valid.asBools.take(i))) 192 val enqData = VecInit.tabulate(PredictWidth)(i => Wire(new IBufEntry).fromFetch(io.in.bits, i)) 193 194 // when using bypass, bypassed entries do not enqueue 195 when(useBypass) { 196 when(numFromFetch >= DecodeWidth.U) { 197 numTryEnq := numFromFetch - DecodeWidth.U 198 } .otherwise { 199 numTryEnq := 0.U 200 } 201 } .otherwise { 202 numTryEnq := numFromFetch 203 } 204 205 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 206 // Bypass 207 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 208 bypassEntries.zipWithIndex.foreach { 209 case (entry, idx) => 210 // Select 211 val validOH = Range(0, PredictWidth).map { 212 i => 213 io.in.bits.valid(i) && 214 io.in.bits.enqEnable(i) && 215 enqOffset(i) === idx.asUInt 216 } // Should be OneHot 217 entry.valid := validOH.reduce(_ || _) && io.in.fire && !io.flush 218 entry.bits := Mux1H(validOH, enqData) 219 220 // Debug Assertion 221 XSError(io.in.valid && PopCount(validOH) > 1.asUInt, "validOH is not OneHot") 222 } 223 224 // => Decode Output 225 // clean register output 226 io.out zip outputEntries foreach { 227 case (io, reg) => 228 io.valid := reg.valid 229 io.bits := reg.bits.toCtrlFlow 230 } 231 outputEntries zip bypassEntries zip deqEntries foreach { 232 case ((out, bypass), deq) => 233 when(decodeCanAccept) { 234 out := deq 235 currentOutUseBypass := false.B 236 when(useBypass && io.in.valid) { 237 out := bypass 238 currentOutUseBypass := true.B 239 } 240 } 241 } 242 243 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 244 // Enqueue 245 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 246 io.in.ready := allowEnq 247 // Data 248 ibuf.zipWithIndex.foreach { 249 case (entry, idx) => { 250 // Select 251 val validOH = Range(0, PredictWidth).map { 252 i => 253 val useBypassMatch = enqOffset(i) >= DecodeWidth.U && 254 enqPtrVec(enqOffset(i) - DecodeWidth.U).value === idx.asUInt 255 val normalMatch = enqPtrVec(enqOffset(i)).value === idx.asUInt 256 val m = Mux(useBypass, useBypassMatch, normalMatch) // when using bypass, bypassed entries do not enqueue 257 258 io.in.bits.valid(i) && io.in.bits.enqEnable(i) && m 259 } // Should be OneHot 260 val wen = validOH.reduce(_ || _) && io.in.fire && !io.flush 261 262 // Write port 263 // Each IBuffer entry has a PredictWidth -> 1 Mux 264 val writeEntry = Mux1H(validOH, enqData) 265 entry := Mux(wen, writeEntry, entry) 266 267 // Debug Assertion 268 XSError(PopCount(validOH) > 1.asUInt, "validOH is not OneHot") 269 } 270 } 271 // Pointer maintenance 272 when (io.in.fire && !io.flush) { 273 enqPtrVec := VecInit(enqPtrVec.map(_ + numTryEnq)) 274 } 275 276 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 277 // Dequeue 278 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 279 val validVec = Mux(numValidAfterDeq >= DecodeWidth.U, 280 ((1 << DecodeWidth) - 1).U, 281 UIntToMask(numValidAfterDeq(log2Ceil(DecodeWidth) - 1, 0), DecodeWidth) 282 ) 283 // Data 284 // Read port 285 // 2-stage, IBufNBank * (bankSize -> 1) + IBufNBank -> 1 286 // Should be better than IBufSize -> 1 in area, with no significant latency increase 287 private val readStage1: Vec[IBufEntry] = VecInit.tabulate(IBufNBank)( 288 bankID => Mux1H(UIntToOH(deqInBankPtrNext(bankID).value), bankedIBufView(bankID)) 289 ) 290 for (i <- 0 until DecodeWidth) { 291 deqEntries(i).valid := validVec(i) 292 deqEntries(i).bits := Mux1H(UIntToOH(deqBankPtrVecNext(i).value), readStage1) 293 } 294 // Pointer maintenance 295 deqBankPtrVecNext := VecInit(deqBankPtrVec.map(_ + numDeq)) 296 deqPtrNext := deqPtr + numDeq 297 deqInBankPtrNext.zip(deqInBankPtr).zipWithIndex.foreach { 298 case ((ptrNext, ptr), idx) => { 299 // validVec[k] == bankValid[deqBankPtr + k] 300 // So bankValid[n] == validVec[n - deqBankPtr] 301 val validIdx = Mux(idx.asUInt >= deqBankPtr.value, 302 idx.asUInt - deqBankPtr.value, 303 ((idx + IBufNBank).asUInt - deqBankPtr.value)(log2Ceil(IBufNBank) - 1, 0) 304 )(log2Ceil(DecodeWidth) - 1, 0) 305 val bankAdvance = Mux(validIdx >= DecodeWidth.U, 306 false.B, 307 io.out(validIdx).ready // `ready` depends on `valid`, so we need only `ready`, not fire 308 ) && !currentOutUseBypass 309 ptrNext := Mux(bankAdvance , ptr + 1.U, ptr) 310 } 311 } 312 313 // Flush 314 when (io.flush) { 315 allowEnq := true.B 316 enqPtrVec := enqPtrVec.indices.map(_.U.asTypeOf(new IBufPtr)) 317 deqBankPtrVec := deqBankPtrVec.indices.map(_.U.asTypeOf(new IBufBankPtr)) 318 deqInBankPtr := VecInit.fill(IBufNBank)(0.U.asTypeOf(new IBufInBankPtr)) 319 deqPtr := 0.U.asTypeOf(new IBufPtr()) 320 outputEntries.foreach(_.valid := false.B) 321 }.otherwise { 322 deqPtr := deqPtrNext 323 deqInBankPtr := deqInBankPtrNext 324 deqBankPtrVec := deqBankPtrVecNext 325 } 326 io.full := !allowEnq 327 328 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 329 // TopDown 330 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 331 val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle)) 332 topdown_stage := io.in.bits.topdown_info 333 when(io.flush) { 334 when(io.ControlRedirect) { 335 when(io.ControlBTBMissBubble) { 336 topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B 337 }.elsewhen(io.TAGEMissBubble) { 338 topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B 339 }.elsewhen(io.SCMissBubble) { 340 topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B 341 }.elsewhen(io.ITTAGEMissBubble) { 342 topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B 343 }.elsewhen(io.RASMissBubble) { 344 topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B 345 } 346 }.elsewhen(io.MemVioRedirect) { 347 topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B 348 }.otherwise { 349 topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B 350 } 351 } 352 353 354 val dequeueInsufficient = Wire(Bool()) 355 val matchBubble = Wire(UInt(log2Up(TopDownCounters.NumStallReasons.id).W)) 356 val deqValidCount = PopCount(validVec.asBools) 357 val deqWasteCount = DecodeWidth.U - deqValidCount 358 dequeueInsufficient := deqValidCount < DecodeWidth.U 359 matchBubble := (TopDownCounters.NumStallReasons.id - 1).U - PriorityEncoder(topdown_stage.reasons.reverse) 360 361 io.stallReason.reason.map(_ := 0.U) 362 for (i <- 0 until DecodeWidth) { 363 when(i.U < deqWasteCount) { 364 io.stallReason.reason(DecodeWidth - i - 1) := matchBubble 365 } 366 } 367 368 when(!(deqWasteCount === DecodeWidth.U || topdown_stage.reasons.asUInt.orR)) { 369 // should set reason for FetchFragmentationStall 370 // topdown_stage.reasons(TopDownCounters.FetchFragmentationStall.id) := true.B 371 for (i <- 0 until DecodeWidth) { 372 when(i.U < deqWasteCount) { 373 io.stallReason.reason(DecodeWidth - i - 1) := TopDownCounters.FetchFragBubble.id.U 374 } 375 } 376 } 377 378 when(io.stallReason.backReason.valid) { 379 io.stallReason.reason.map(_ := io.stallReason.backReason.bits) 380 } 381 382 // Debug info 383 XSError( 384 deqPtr.value =/= deqBankPtr.value + deqInBankPtr(deqBankPtr.value).value * IBufNBank.asUInt, 385 "Dequeue PTR mismatch" 386 ) 387 XSError(isBefore(enqPtr, deqPtr) && !isFull(enqPtr, deqPtr), "\ndeqPtr is older than enqPtr!\n") 388 389 XSDebug(io.flush, "IBuffer Flushed\n") 390 391 when(io.in.fire) { 392 XSDebug("Enque:\n") 393 XSDebug(p"MASK=${Binary(io.in.bits.valid)}\n") 394 for(i <- 0 until PredictWidth){ 395 XSDebug(p"PC=${Hexadecimal(io.in.bits.pc(i))} ${Hexadecimal(io.in.bits.instrs(i))}\n") 396 } 397 } 398 399 for (i <- 0 until DecodeWidth) { 400 XSDebug(io.out(i).fire, 401 p"deq: ${Hexadecimal(io.out(i).bits.instr)} PC=${Hexadecimal(io.out(i).bits.pc)}" + 402 p"v=${io.out(i).valid} r=${io.out(i).ready} " + 403 p"excpVec=${Binary(io.out(i).bits.exceptionVec.asUInt)} crossPageIPF=${io.out(i).bits.crossPageIPFFix}\n") 404 } 405 406 XSDebug(p"numValid: ${numValid}\n") 407 XSDebug(p"EnqNum: ${numEnq}\n") 408 XSDebug(p"DeqNum: ${numDeq}\n") 409 410 val afterInit = RegInit(false.B) 411 val headBubble = RegInit(false.B) 412 when (io.in.fire) { afterInit := true.B } 413 when (io.flush) { 414 headBubble := true.B 415 } .elsewhen(numValid =/= 0.U) { 416 headBubble := false.B 417 } 418 val instrHungry = afterInit && (numValid === 0.U) && !headBubble 419 420 QueuePerf(IBufSize, numValid, !allowEnq) 421 XSPerfAccumulate("flush", io.flush) 422 XSPerfAccumulate("hungry", instrHungry) 423 424 val ibuffer_IDWidth_hvButNotFull = afterInit && (numValid =/= 0.U) && (numValid < DecodeWidth.U) && !headBubble 425 XSPerfAccumulate("ibuffer_IDWidth_hvButNotFull", ibuffer_IDWidth_hvButNotFull) 426 /* 427 XSPerfAccumulate("ICacheMissBubble", Mux(matchBubbleVec(TopDownCounters.ICacheMissBubble.id), deqWasteCount, 0.U)) 428 XSPerfAccumulate("ITLBMissBubble", Mux(matchBubbleVec(TopDownCounters.ITLBMissBubble.id), deqWasteCount, 0.U)) 429 XSPerfAccumulate("ControlRedirectBubble", Mux(matchBubbleVec(TopDownCounters.ControlRedirectBubble.id), deqWasteCount, 0.U)) 430 XSPerfAccumulate("MemVioRedirectBubble", Mux(matchBubbleVec(TopDownCounters.MemVioRedirectBubble.id), deqWasteCount, 0.U)) 431 XSPerfAccumulate("OtherRedirectBubble", Mux(matchBubbleVec(TopDownCounters.OtherRedirectBubble.id), deqWasteCount, 0.U)) 432 XSPerfAccumulate("BTBMissBubble", Mux(matchBubbleVec(TopDownCounters.BTBMissBubble.id), deqWasteCount, 0.U)) 433 XSPerfAccumulate("OverrideBubble", Mux(matchBubbleVec(TopDownCounters.OverrideBubble.id), deqWasteCount, 0.U)) 434 XSPerfAccumulate("FtqUpdateBubble", Mux(matchBubbleVec(TopDownCounters.FtqUpdateBubble.id), deqWasteCount, 0.U)) 435 XSPerfAccumulate("FtqFullStall", Mux(matchBubbleVec(TopDownCounters.FtqFullStall.id), deqWasteCount, 0.U)) 436 XSPerfAccumulate("FetchFragmentBubble", 437 Mux(deqWasteCount === DecodeWidth.U || topdown_stage.reasons.asUInt.orR, 0.U, deqWasteCount)) 438 XSPerfAccumulate("TAGEMissBubble", Mux(matchBubbleVec(TopDownCounters.TAGEMissBubble.id), deqWasteCount, 0.U)) 439 XSPerfAccumulate("SCMissBubble", Mux(matchBubbleVec(TopDownCounters.SCMissBubble.id), deqWasteCount, 0.U)) 440 XSPerfAccumulate("ITTAGEMissBubble", Mux(matchBubbleVec(TopDownCounters.ITTAGEMissBubble.id), deqWasteCount, 0.U)) 441 XSPerfAccumulate("RASMissBubble", Mux(matchBubbleVec(TopDownCounters.RASMissBubble.id), deqWasteCount, 0.U)) 442 */ 443 444 val perfEvents = Seq( 445 ("IBuffer_Flushed ", io.flush ), 446 ("IBuffer_hungry ", instrHungry ), 447 ("IBuffer_1_4_valid", (numValid > (0*(IBufSize/4)).U) & (numValid < (1*(IBufSize/4)).U) ), 448 ("IBuffer_2_4_valid", (numValid >= (1*(IBufSize/4)).U) & (numValid < (2*(IBufSize/4)).U) ), 449 ("IBuffer_3_4_valid", (numValid >= (2*(IBufSize/4)).U) & (numValid < (3*(IBufSize/4)).U) ), 450 ("IBuffer_4_4_valid", (numValid >= (3*(IBufSize/4)).U) & (numValid < (4*(IBufSize/4)).U) ), 451 ("IBuffer_full ", numValid.andR ), 452 ("Front_Bubble ", PopCount((0 until DecodeWidth).map(i => io.out(i).ready && !io.out(i).valid))) 453 ) 454 generatePerfEvent() 455} 456