/*************************************************************************************** * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences * Copyright (c) 2020-2021 Peng Cheng Laboratory * * XiangShan is licensed under Mulan PSL v2. * You can use this software according to the terms and conditions of the Mulan PSL v2. * You may obtain a copy of Mulan PSL v2 at: * http://license.coscl.org.cn/MulanPSL2 * * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. ***************************************************************************************/ package xiangshan.frontend.icache import chisel3._ import chisel3.util._ import difftest._ import freechips.rocketchip.diplomacy.IdRange import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.ClientStates._ import freechips.rocketchip.tilelink.TLPermissions._ import org.chipsalliance.cde.config.Parameters import utility._ import utils._ import xiangshan._ import xiangshan.cache._ abstract class ICacheMissUnitModule(implicit p: Parameters) extends XSModule with HasICacheParameters abstract class ICacheMissUnitBundle(implicit p: Parameters) extends XSBundle with HasICacheParameters class Demultiplexer[T <: Data](val gen: T, val n: Int) extends Module { /** Hardware module that is used to sequence 1 producers into n consumer. * Priority is given to lower producer. */ require(n >= 2) val io = IO(new Bundle { val in = Flipped(DecoupledIO(gen)) val out = Vec(n, DecoupledIO(gen)) val chosen = Output(UInt(log2Ceil(n).W)) }) val grant = false.B +: (1 until n).map(i => (0 until i).map(io.out(_).ready).reduce(_ || _)) for (i <- 0 until n) { io.out(i).bits := io.in.bits io.out(i).valid := !grant(i) && io.in.valid } io.in.ready := grant.last || io.out.last.ready io.chosen := PriorityEncoder(VecInit(io.out.map(_.ready))) } class MuxBundle[T <: Data](val gen: T, val n: Int) extends Module { require(n >= 2) val io = IO(new Bundle { val sel = Input(UInt(log2Ceil(n).W)) val in = Flipped(Vec(n, DecoupledIO(gen))) val out = DecoupledIO(gen) }) io.in <> DontCare io.out <> DontCare for (i <- 0 until n) { when(io.sel === i.U) { io.out <> io.in(i) } io.in(i).ready := (io.sel === i.U) && io.out.ready } } class ICacheMissReq(implicit p: Parameters) extends ICacheBundle { val blkPaddr = UInt((PAddrBits - blockOffBits).W) val vSetIdx = UInt(idxBits.W) } class ICacheMissResp(implicit p: Parameters) extends ICacheBundle { val blkPaddr = UInt((PAddrBits - blockOffBits).W) val vSetIdx = UInt(idxBits.W) val waymask = UInt(nWays.W) val data = UInt(blockBits.W) val corrupt = Bool() } class LookUpMSHR(implicit p: Parameters) extends ICacheBundle { val info = ValidIO(new ICacheMissReq) val hit = Input(Bool()) } class MSHRResp(implicit p: Parameters) extends ICacheBundle { val blkPaddr = UInt((PAddrBits - blockOffBits).W) val vSetIdx = UInt(idxBits.W) val waymask = UInt(log2Ceil(nWays).W) } class MSHRAcquire(edge: TLEdgeOut)(implicit p: Parameters) extends ICacheBundle { val acquire = new TLBundleA(edge.bundle) val vSetIdx = UInt(idxBits.W) } class ICacheMSHR(edge: TLEdgeOut, isFetch: Boolean, ID: Int)(implicit p: Parameters) extends ICacheMissUnitModule { val io = IO(new Bundle { val fencei = Input(Bool()) val flush = Input(Bool()) val invalid = Input(Bool()) val req = Flipped(DecoupledIO(new ICacheMissReq)) val acquire = DecoupledIO(new MSHRAcquire(edge)) val lookUps = Flipped(Vec(2, new LookUpMSHR)) val resp = ValidIO(new MSHRResp) val victimWay = Input(UInt(log2Ceil(nWays).W)) }) val valid = RegInit(Bool(), false.B) // this MSHR doesn't respones to fetch and sram val flush = RegInit(Bool(), false.B) val fencei = RegInit(Bool(), false.B) // this MSHR has been issued val issue = RegInit(Bool(), false.B) val blkPaddr = RegInit(UInt((PAddrBits - blockOffBits).W), 0.U) val vSetIdx = RegInit(UInt(idxBits.W), 0.U) val waymask = RegInit(UInt(log2Ceil(nWays).W), 0.U) // look up and return result at the same cycle val hits = io.lookUps.map(lookup => valid && !fencei && !flush && (lookup.info.bits.vSetIdx === vSetIdx) && (lookup.info.bits.blkPaddr === blkPaddr) ) // Decoupling valid and bits (0 until 2).foreach(i => io.lookUps(i).hit := hits(i)) // disable wake up when hit MSHR (fencei is low) // when(hit) { // flush := false.B // } // invalid when the req hasn't been issued when(io.fencei || io.flush) { fencei := true.B flush := true.B when(!issue) { valid := false.B } } // receive request and register io.req.ready := !valid && !io.flush && !io.fencei when(io.req.fire) { valid := true.B flush := false.B issue := false.B fencei := false.B blkPaddr := io.req.bits.blkPaddr vSetIdx := io.req.bits.vSetIdx } // send request to L2 io.acquire.valid := valid && !issue && !io.flush && !io.fencei val getBlock = edge.Get( fromSource = ID.U, toAddress = Cat(blkPaddr, 0.U(blockOffBits.W)), lgSize = log2Up(cacheParams.blockBytes).U )._2 io.acquire.bits.acquire := getBlock io.acquire.bits.acquire.user.lift(ReqSourceKey).foreach(_ := MemReqSource.CPUInst.id.U) io.acquire.bits.vSetIdx := vSetIdx // get victim way when acquire fire when(io.acquire.fire) { issue := true.B waymask := io.victimWay } // invalid request when grant finish when(io.invalid) { valid := false.B } // offer the information other than data for write sram and response fetch io.resp.valid := valid && (!flush && !fencei) io.resp.bits.blkPaddr := blkPaddr io.resp.bits.vSetIdx := vSetIdx io.resp.bits.waymask := waymask } class ICacheMissBundle(edge: TLEdgeOut)(implicit p: Parameters) extends ICacheBundle { // difftest val hartId = Input(Bool()) // control val fencei = Input(Bool()) val flush = Input(Bool()) // fetch val fetch_req = Flipped(DecoupledIO(new ICacheMissReq)) val fetch_resp = ValidIO(new ICacheMissResp) // prefetch val prefetch_req = Flipped(DecoupledIO(new ICacheMissReq)) // SRAM Write Req val meta_write = DecoupledIO(new ICacheMetaWriteBundle) val data_write = DecoupledIO(new ICacheDataWriteBundle) // get victim from replacer val victim = new ReplacerVictim // Tilelink val mem_acquire = DecoupledIO(new TLBundleA(edge.bundle)) val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) } class ICacheMissUnit(edge: TLEdgeOut)(implicit p: Parameters) extends ICacheMissUnitModule { val io = IO(new ICacheMissBundle(edge)) /** ****************************************************************************** * fetch have higher priority * fetch MSHR: lower index have a higher priority * prefetch MSHR: the prefetchMSHRs earlier have a higher priority * --------- -------------- ----------- * ---fetch reg--->| Demux |-----> | fetch MSHR |------>| Arbiter |---acquire---> * --------- -------------- ----------- * | fetch MSHR | ^ * -------------- | * | * ----------------- | * | prefetch MSHR | | * --------- ----------------- ----------- * ---fetch reg--->| Demux |----> | prefetch MSHR |---->| Arbiter | * --------- ----------------- ----------- * | ....... | * ----------------- ****************************************************************************** */ val fetchDemux = Module(new Demultiplexer(new ICacheMissReq, nFetchMshr)) val prefetchDemux = Module(new Demultiplexer(new ICacheMissReq, nPrefetchMshr)) val prefetchArb = Module(new MuxBundle(new MSHRAcquire(edge), nPrefetchMshr)) val acquireArb = Module(new Arbiter(new MSHRAcquire(edge), nFetchMshr + 1)) // To avoid duplicate request reception. val fetchHit, prefetchHit = Wire(Bool()) fetchDemux.io.in <> io.fetch_req fetchDemux.io.in.valid := io.fetch_req.valid && !fetchHit io.fetch_req.ready := fetchDemux.io.in.ready || fetchHit prefetchDemux.io.in <> io.prefetch_req prefetchDemux.io.in.valid := io.prefetch_req.valid && !prefetchHit io.prefetch_req.ready := prefetchDemux.io.in.ready || prefetchHit acquireArb.io.in.last <> prefetchArb.io.out // mem_acquire connect io.mem_acquire.valid := acquireArb.io.out.valid io.mem_acquire.bits := acquireArb.io.out.bits.acquire acquireArb.io.out.ready := io.mem_acquire.ready val fetchMSHRs = (0 until nFetchMshr).map { i => val mshr = Module(new ICacheMSHR(edge, true, i)) mshr.io.flush := false.B mshr.io.fencei := io.fencei mshr.io.req <> fetchDemux.io.out(i) mshr.io.lookUps(0).info.valid := io.fetch_req.valid mshr.io.lookUps(0).info.bits := io.fetch_req.bits mshr.io.lookUps(1).info.valid := io.prefetch_req.valid mshr.io.lookUps(1).info.bits := io.prefetch_req.bits mshr.io.victimWay := io.victim.way acquireArb.io.in(i) <> mshr.io.acquire mshr } val prefetchMSHRs = (0 until nPrefetchMshr).map { i => val mshr = Module(new ICacheMSHR(edge, false, nFetchMshr + i)) mshr.io.flush := io.flush mshr.io.fencei := io.fencei mshr.io.req <> prefetchDemux.io.out(i) mshr.io.lookUps(0).info.valid := io.fetch_req.valid mshr.io.lookUps(0).info.bits := io.fetch_req.bits mshr.io.lookUps(1).info.valid := io.prefetch_req.valid mshr.io.lookUps(1).info.bits := io.prefetch_req.bits mshr.io.victimWay := io.victim.way prefetchArb.io.in(i) <> mshr.io.acquire mshr } /** ****************************************************************************** * MSHR look up * - look up all mshr ****************************************************************************** */ val allMSHRs = fetchMSHRs ++ prefetchMSHRs val prefetchHitFetchReq = (io.prefetch_req.bits.blkPaddr === io.fetch_req.bits.blkPaddr) && (io.prefetch_req.bits.vSetIdx === io.fetch_req.bits.vSetIdx) && io.fetch_req.valid fetchHit := allMSHRs.map(mshr => mshr.io.lookUps(0).hit).reduce(_ || _) prefetchHit := allMSHRs.map(mshr => mshr.io.lookUps(1).hit).reduce(_ || _) || prefetchHitFetchReq /** ****************************************************************************** * prefetchMSHRs priority * - The requests that enter the prefetchMSHRs earlier have a higher priority in issuing. * - The order of enqueuing is recorded in FIFO when requset enters MSHRs. * - The requests are dispatched in the order they are recorded in FIFO. ****************************************************************************** */ // When the FIFO is full, enqueue and dequeue operations do not occur at the same cycle. // So the depth of the FIFO is set to match the number of MSHRs. // val priorityFIFO = Module(new Queue(UInt(log2Ceil(nPrefetchMshr).W), nPrefetchMshr, hasFlush=true)) val priorityFIFO = Module(new FIFOReg(UInt(log2Ceil(nPrefetchMshr).W), nPrefetchMshr, hasFlush = true)) priorityFIFO.io.flush.get := io.flush || io.fencei priorityFIFO.io.enq.valid := prefetchDemux.io.in.fire priorityFIFO.io.enq.bits := prefetchDemux.io.chosen priorityFIFO.io.deq.ready := prefetchArb.io.out.fire prefetchArb.io.sel := priorityFIFO.io.deq.bits assert( !(priorityFIFO.io.enq.fire ^ prefetchDemux.io.in.fire), "priorityFIFO.io.enq and io.prefetch_req must fire at the same cycle" ) assert( !(priorityFIFO.io.deq.fire ^ prefetchArb.io.out.fire), "priorityFIFO.io.deq and prefetchArb.io.out must fire at the same cycle" ) /** ****************************************************************************** * Tilelink D channel (grant) ****************************************************************************** */ // cacheline register val readBeatCnt = RegInit(UInt(log2Up(refillCycles).W), 0.U) val respDataReg = RegInit(VecInit(Seq.fill(refillCycles)(0.U(beatBits.W)))) val wait_last = readBeatCnt === (refillCycles - 1).U when(io.mem_grant.fire && edge.hasData(io.mem_grant.bits)) { respDataReg(readBeatCnt) := io.mem_grant.bits.data readBeatCnt := Mux(wait_last, 0.U, readBeatCnt + 1.U) } // last transition finsh or corrupt val last_fire = io.mem_grant.fire && edge.hasData(io.mem_grant.bits) && wait_last val (_, _, refill_done, _) = edge.addr_inc(io.mem_grant) assert(!(refill_done ^ last_fire), "refill not done!") io.mem_grant.ready := true.B val last_fire_r = RegNext(last_fire) val id_r = RegNext(io.mem_grant.bits.source) // if any beat is corrupt, the whole response (to mainPipe/metaArray/dataArray) is corrupt val corrupt_r = RegInit(false.B) when(io.mem_grant.fire && edge.hasData(io.mem_grant.bits) && io.mem_grant.bits.corrupt) { corrupt_r := true.B }.elsewhen(io.fetch_resp.fire) { corrupt_r := false.B } /** ****************************************************************************** * invalid mshr when finish transition ****************************************************************************** */ (0 until (nFetchMshr + nPrefetchMshr)).foreach(i => allMSHRs(i).io.invalid := last_fire_r && (id_r === i.U)) /** ****************************************************************************** * response fetch and write SRAM ****************************************************************************** */ // get request information from MSHRs val allMSHRs_resp = VecInit(allMSHRs.map(mshr => mshr.io.resp)) val mshr_resp = allMSHRs_resp(id_r) // get waymask from replacer when acquire fire io.victim.vSetIdx.valid := acquireArb.io.out.fire io.victim.vSetIdx.bits := acquireArb.io.out.bits.vSetIdx val waymask = UIntToOH(mshr_resp.bits.waymask) // NOTE: when flush/fencei, missUnit will still send response to mainPipe/prefetchPipe // this is intentional to fix timing (io.flush -> mainPipe/prefetchPipe s2_miss -> s2_ready -> ftq ready) // unnecessary response will be dropped by mainPipe/prefetchPipe/wayLookup since their sx_valid is set to false val fetch_resp_valid = mshr_resp.valid && last_fire_r // NOTE: but we should not write meta/dataArray when flush/fencei val write_sram_valid = fetch_resp_valid && !corrupt_r && !io.flush && !io.fencei // write SRAM io.meta_write.bits.generate( tag = getPhyTagFromBlk(mshr_resp.bits.blkPaddr), idx = mshr_resp.bits.vSetIdx, waymask = waymask, bankIdx = mshr_resp.bits.vSetIdx(0) ) io.data_write.bits.generate( data = respDataReg.asUInt, idx = mshr_resp.bits.vSetIdx, waymask = waymask, bankIdx = mshr_resp.bits.vSetIdx(0) ) io.meta_write.valid := write_sram_valid io.data_write.valid := write_sram_valid // response fetch io.fetch_resp.valid := fetch_resp_valid io.fetch_resp.bits.blkPaddr := mshr_resp.bits.blkPaddr io.fetch_resp.bits.vSetIdx := mshr_resp.bits.vSetIdx io.fetch_resp.bits.waymask := waymask io.fetch_resp.bits.data := respDataReg.asUInt io.fetch_resp.bits.corrupt := corrupt_r /** ****************************************************************************** * performance counter ****************************************************************************** */ // Duplicate requests will be excluded. XSPerfAccumulate("enq_fetch_req", fetchDemux.io.in.fire) XSPerfAccumulate("enq_prefetch_req", prefetchDemux.io.in.fire) /** ****************************************************************************** * ChiselDB: record ICache SRAM write log ****************************************************************************** */ class ICacheSRAMDB(implicit p: Parameters) extends ICacheBundle { val blkPaddr = UInt((PAddrBits - blockOffBits).W) val vSetIdx = UInt(idxBits.W) val waymask = UInt(log2Ceil(nWays).W) } val isWriteICacheSRAMTable = WireInit(Constantin.createRecord("isWriteICacheSRAMTable" + p(XSCoreParamsKey).HartId.toString)) val ICacheSRAMTable = ChiselDB.createTable("ICacheSRAMTable" + p(XSCoreParamsKey).HartId.toString, new ICacheSRAMDB) val ICacheSRAMDBDumpData = Wire(new ICacheSRAMDB) ICacheSRAMDBDumpData.blkPaddr := mshr_resp.bits.blkPaddr ICacheSRAMDBDumpData.vSetIdx := mshr_resp.bits.vSetIdx ICacheSRAMDBDumpData.waymask := OHToUInt(waymask) ICacheSRAMTable.log( data = ICacheSRAMDBDumpData, en = write_sram_valid, clock = clock, reset = reset ) /** ****************************************************************************** * Difftest ****************************************************************************** */ if (env.EnableDifftest) { val difftest = DifftestModule(new DiffRefillEvent, dontCare = true) difftest.coreid := io.hartId difftest.index := 0.U difftest.valid := write_sram_valid difftest.addr := Cat(mshr_resp.bits.blkPaddr, 0.U(blockOffBits.W)) difftest.data := respDataReg.asTypeOf(difftest.data) difftest.idtfr := DontCare } }