11f0e2dc7SJiawei Lin/*************************************************************************************** 21f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 31f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Peng Cheng Laboratory 41f0e2dc7SJiawei Lin* 51f0e2dc7SJiawei Lin* XiangShan is licensed under Mulan PSL v2. 61f0e2dc7SJiawei Lin* You can use this software according to the terms and conditions of the Mulan PSL v2. 71f0e2dc7SJiawei Lin* You may obtain a copy of Mulan PSL v2 at: 81f0e2dc7SJiawei Lin* http://license.coscl.org.cn/MulanPSL2 91f0e2dc7SJiawei Lin* 101f0e2dc7SJiawei Lin* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 111f0e2dc7SJiawei Lin* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 121f0e2dc7SJiawei Lin* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 131f0e2dc7SJiawei Lin* 141f0e2dc7SJiawei Lin* See the Mulan PSL v2 for more details. 151f0e2dc7SJiawei Lin***************************************************************************************/ 161f0e2dc7SJiawei Lin 171f0e2dc7SJiawei Linpackage xiangshan.cache 181f0e2dc7SJiawei Lin 191f0e2dc7SJiawei Linimport chisel3._ 201f0e2dc7SJiawei Linimport chisel3.util._ 218891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters 2237225120Ssfencevmaimport utils._ 233c02ee8fSwakafaimport utility._ 2437225120Ssfencevmaimport xiangshan._ 25e04c5f64SYanqin Liimport xiangshan.mem._ 261f0e2dc7SJiawei Linimport freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes} 271f0e2dc7SJiawei Linimport freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters} 2837225120Ssfencevma 2937225120Ssfencevmaclass UncacheFlushBundle extends Bundle { 3037225120Ssfencevma val valid = Output(Bool()) 3137225120Ssfencevma val empty = Input(Bool()) 3237225120Ssfencevma} 331f0e2dc7SJiawei Lin 34cfdd605fSYanqin Liclass UncacheEntry(implicit p: Parameters) extends DCacheBundle { 35cfdd605fSYanqin Li val cmd = UInt(M_SZ.W) 36cfdd605fSYanqin Li val addr = UInt(PAddrBits.W) 37e04c5f64SYanqin Li val vaddr = UInt(VAddrBits.W) 38cfdd605fSYanqin Li val data = UInt(XLEN.W) 39e04c5f64SYanqin Li val mask = UInt(DataBytes.W) 40cfdd605fSYanqin Li val id = UInt(uncacheIdxBits.W) 41cfdd605fSYanqin Li val nc = Bool() 42cfdd605fSYanqin Li val atomic = Bool() 431f0e2dc7SJiawei Lin 44cfdd605fSYanqin Li val resp_nderr = Bool() 451f0e2dc7SJiawei Lin 46*46236761SYanqin Li /* NOTE: if it support the internal forward logic, here can uncomment */ 47*46236761SYanqin Li // val fwd_data = UInt(XLEN.W) 48*46236761SYanqin Li // val fwd_mask = UInt(DataBytes.W) 49e04c5f64SYanqin Li 50cfdd605fSYanqin Li def set(x: UncacheWordReq): Unit = { 51cfdd605fSYanqin Li cmd := x.cmd 52cfdd605fSYanqin Li addr := x.addr 53e04c5f64SYanqin Li vaddr := x.vaddr 54cfdd605fSYanqin Li data := x.data 55cfdd605fSYanqin Li mask := x.mask 56cfdd605fSYanqin Li id := x.id 57cfdd605fSYanqin Li nc := x.nc 58cfdd605fSYanqin Li atomic := x.atomic 5958cb1b0bSzhanglinjuan resp_nderr := false.B 60*46236761SYanqin Li // fwd_data := 0.U 61*46236761SYanqin Li // fwd_mask := 0.U 62cfdd605fSYanqin Li } 63cfdd605fSYanqin Li 64cfdd605fSYanqin Li def update(x: TLBundleD): Unit = { 65*46236761SYanqin Li when(cmd === MemoryOpConstants.M_XRD) { 66*46236761SYanqin Li data := x.data 67*46236761SYanqin Li } 68cfdd605fSYanqin Li resp_nderr := x.denied 69cfdd605fSYanqin Li } 70cfdd605fSYanqin Li 71*46236761SYanqin Li // def update(forwardData: UInt, forwardMask: UInt): Unit = { 72*46236761SYanqin Li // fwd_data := forwardData 73*46236761SYanqin Li // fwd_mask := forwardMask 74*46236761SYanqin Li // } 75e04c5f64SYanqin Li 76cfdd605fSYanqin Li def toUncacheWordResp(): UncacheWordResp = { 77*46236761SYanqin Li // val resp_fwd_data = VecInit((0 until DataBytes).map(j => 78*46236761SYanqin Li // Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j)) 79*46236761SYanqin Li // )).asUInt 80*46236761SYanqin Li val resp_fwd_data = data 81cfdd605fSYanqin Li val r = Wire(new UncacheWordResp) 82cfdd605fSYanqin Li r := DontCare 83e04c5f64SYanqin Li r.data := resp_fwd_data 84cfdd605fSYanqin Li r.id := id 85cfdd605fSYanqin Li r.nderr := resp_nderr 86cfdd605fSYanqin Li r.nc := nc 87cfdd605fSYanqin Li r.is2lq := cmd === MemoryOpConstants.M_XRD 88cfdd605fSYanqin Li r.miss := false.B 89cfdd605fSYanqin Li r.replay := false.B 90cfdd605fSYanqin Li r.tag_error := false.B 91cfdd605fSYanqin Li r.error := false.B 92cfdd605fSYanqin Li r 931f0e2dc7SJiawei Lin } 941f0e2dc7SJiawei Lin} 951f0e2dc7SJiawei Lin 96cfdd605fSYanqin Liclass UncacheEntryState(implicit p: Parameters) extends DCacheBundle { 97cfdd605fSYanqin Li // valid (-> waitSame) -> inflight -> waitReturn 98cfdd605fSYanqin Li val valid = Bool() 99cfdd605fSYanqin Li val inflight = Bool() // uncache -> L2 100cfdd605fSYanqin Li val waitSame = Bool() 101cfdd605fSYanqin Li val waitReturn = Bool() // uncache -> LSQ 1021f0e2dc7SJiawei Lin 103cfdd605fSYanqin Li def init: Unit = { 104cfdd605fSYanqin Li valid := false.B 105cfdd605fSYanqin Li inflight := false.B 106cfdd605fSYanqin Li waitSame := false.B 107cfdd605fSYanqin Li waitReturn := false.B 1081f0e2dc7SJiawei Lin } 1091f0e2dc7SJiawei Lin 110cfdd605fSYanqin Li def isValid(): Bool = valid 111cfdd605fSYanqin Li def isInflight(): Bool = inflight 112cfdd605fSYanqin Li def isWaitReturn(): Bool = waitReturn 113cfdd605fSYanqin Li def isWaitSame(): Bool = waitSame 114cfdd605fSYanqin Li def can2Uncache(): Bool = valid && !inflight && !waitSame && !waitReturn 115cfdd605fSYanqin Li def can2Lsq(): Bool = valid && waitReturn 1161f0e2dc7SJiawei Lin 117cfdd605fSYanqin Li def setValid(x: Bool): Unit = { valid := x} 118cfdd605fSYanqin Li def setInflight(x: Bool): Unit = { inflight := x} 119cfdd605fSYanqin Li def setWaitReturn(x: Bool): Unit = { waitReturn := x } 120cfdd605fSYanqin Li def setWaitSame(x: Bool): Unit = { waitSame := x} 1211f0e2dc7SJiawei Lin 122cfdd605fSYanqin Li def updateUncacheResp(): Unit = { 123cfdd605fSYanqin Li assert(inflight, "The request was not sent and a response was received") 124cfdd605fSYanqin Li inflight := false.B 125cfdd605fSYanqin Li waitReturn := true.B 1261f0e2dc7SJiawei Lin } 127cfdd605fSYanqin Li def updateReturn(): Unit = { 128cfdd605fSYanqin Li valid := false.B 129cfdd605fSYanqin Li inflight := false.B 130cfdd605fSYanqin Li waitSame := false.B 131cfdd605fSYanqin Li waitReturn := false.B 1321f0e2dc7SJiawei Lin } 1331f0e2dc7SJiawei Lin} 1341f0e2dc7SJiawei Lin 1351f0e2dc7SJiawei Linclass UncacheIO(implicit p: Parameters) extends DCacheBundle { 13637225120Ssfencevma val hartId = Input(UInt()) 13737225120Ssfencevma val enableOutstanding = Input(Bool()) 13837225120Ssfencevma val flush = Flipped(new UncacheFlushBundle) 1396786cfb7SWilliam Wang val lsq = Flipped(new UncacheWordIO) 140e04c5f64SYanqin Li val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) 1411f0e2dc7SJiawei Lin} 1421f0e2dc7SJiawei Lin 1431f0e2dc7SJiawei Lin// convert DCacheIO to TileLink 1441f0e2dc7SJiawei Lin// for Now, we only deal with TL-UL 1451f0e2dc7SJiawei Lin 14637225120Ssfencevmaclass Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter { 14795e60e55STang Haojin override def shouldBeInlined: Boolean = false 14837225120Ssfencevma def idRange: Int = UncacheBufferSize 1491f0e2dc7SJiawei Lin 1501f0e2dc7SJiawei Lin val clientParameters = TLMasterPortParameters.v1( 1511f0e2dc7SJiawei Lin clients = Seq(TLMasterParameters.v1( 1521f0e2dc7SJiawei Lin "uncache", 15337225120Ssfencevma sourceId = IdRange(0, idRange) 1541f0e2dc7SJiawei Lin )) 1551f0e2dc7SJiawei Lin ) 1561f0e2dc7SJiawei Lin val clientNode = TLClientNode(Seq(clientParameters)) 1571f0e2dc7SJiawei Lin 1581f0e2dc7SJiawei Lin lazy val module = new UncacheImp(this) 1591f0e2dc7SJiawei Lin} 1601f0e2dc7SJiawei Lin 161cfdd605fSYanqin Li/* Uncache Buffer */ 16237225120Ssfencevmaclass UncacheImp(outer: Uncache)extends LazyModuleImp(outer) 1631f0e2dc7SJiawei Lin with HasTLDump 16437225120Ssfencevma with HasXSParameter 16537225120Ssfencevma with HasPerfEvents 1661f0e2dc7SJiawei Lin{ 167cfdd605fSYanqin Li private val INDEX_WIDTH = log2Up(UncacheBufferSize) 168cfdd605fSYanqin Li println(s"Uncahe Buffer Size: $UncacheBufferSize entries") 1691f0e2dc7SJiawei Lin val io = IO(new UncacheIO) 1701f0e2dc7SJiawei Lin 1711f0e2dc7SJiawei Lin val (bus, edge) = outer.clientNode.out.head 1721f0e2dc7SJiawei Lin 1731f0e2dc7SJiawei Lin val req = io.lsq.req 1741f0e2dc7SJiawei Lin val resp = io.lsq.resp 1751f0e2dc7SJiawei Lin val mem_acquire = bus.a 1761f0e2dc7SJiawei Lin val mem_grant = bus.d 1771f0e2dc7SJiawei Lin val req_ready = WireInit(false.B) 1781f0e2dc7SJiawei Lin 1791f0e2dc7SJiawei Lin // assign default values to output signals 1801f0e2dc7SJiawei Lin bus.b.ready := false.B 1811f0e2dc7SJiawei Lin bus.c.valid := false.B 1821f0e2dc7SJiawei Lin bus.c.bits := DontCare 1831f0e2dc7SJiawei Lin bus.d.ready := false.B 1841f0e2dc7SJiawei Lin bus.e.valid := false.B 1851f0e2dc7SJiawei Lin bus.e.bits := DontCare 186cfdd605fSYanqin Li io.lsq.req.ready := req_ready 18737225120Ssfencevma io.lsq.resp.valid := false.B 18837225120Ssfencevma io.lsq.resp.bits := DontCare 1891f0e2dc7SJiawei Lin 19037225120Ssfencevma 191cfdd605fSYanqin Li /****************************************************************** 192cfdd605fSYanqin Li * Data Structure 193cfdd605fSYanqin Li ******************************************************************/ 19437225120Ssfencevma 195cfdd605fSYanqin Li val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry)) 196cfdd605fSYanqin Li val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState)))) 197cfdd605fSYanqin Li val fence = RegInit(Bool(), false.B) 198cfdd605fSYanqin Li val s_idle :: s_refill_req :: s_refill_resp :: s_send_resp :: Nil = Enum(4) 199cfdd605fSYanqin Li val uState = RegInit(s_idle) 2001f0e2dc7SJiawei Lin 201cfdd605fSYanqin Li def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f)) 202e04c5f64SYanqin Li def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR 203e04c5f64SYanqin Li def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR 204e04c5f64SYanqin Li 205e04c5f64SYanqin Li // drain buffer 206e04c5f64SYanqin Li val empty = Wire(Bool()) 207e04c5f64SYanqin Li val f0_needDrain = Wire(Bool()) 208e04c5f64SYanqin Li val do_uarch_drain = RegNext(f0_needDrain) 2091f0e2dc7SJiawei Lin 210cfdd605fSYanqin Li val q0_entry = Wire(new UncacheEntry) 211cfdd605fSYanqin Li val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W)) 212cfdd605fSYanqin Li val q0_canSent = Wire(Bool()) 213e04c5f64SYanqin Li 214e04c5f64SYanqin Li 215cfdd605fSYanqin Li /****************************************************************** 216cfdd605fSYanqin Li * uState for non-outstanding 217cfdd605fSYanqin Li ******************************************************************/ 21837225120Ssfencevma 219cfdd605fSYanqin Li switch(uState){ 220cfdd605fSYanqin Li is(s_idle){ 22137225120Ssfencevma when(req.fire){ 222cfdd605fSYanqin Li uState := s_refill_req 22337225120Ssfencevma } 224cfdd605fSYanqin Li } 225cfdd605fSYanqin Li is(s_refill_req){ 22637225120Ssfencevma when(mem_acquire.fire){ 227cfdd605fSYanqin Li uState := s_refill_resp 22837225120Ssfencevma } 22937225120Ssfencevma } 230cfdd605fSYanqin Li is(s_refill_resp){ 23137225120Ssfencevma when(mem_grant.fire){ 232cfdd605fSYanqin Li uState := s_send_resp 23337225120Ssfencevma } 23437225120Ssfencevma } 235cfdd605fSYanqin Li is(s_send_resp){ 236cfdd605fSYanqin Li when(resp.fire){ 237cfdd605fSYanqin Li uState := s_idle 238cfdd605fSYanqin Li } 23937225120Ssfencevma } 24037225120Ssfencevma } 24137225120Ssfencevma 242cfdd605fSYanqin Li 243cfdd605fSYanqin Li /****************************************************************** 244cfdd605fSYanqin Li * Enter Buffer 245cfdd605fSYanqin Li * Version 0 (better timing) 246cfdd605fSYanqin Li * e0 judge: alloc/merge write vec 247cfdd605fSYanqin Li * e1 alloc 248cfdd605fSYanqin Li * 249cfdd605fSYanqin Li * Version 1 (better performance) 250cfdd605fSYanqin Li * solved in one cycle for achieving the original performance. 251cfdd605fSYanqin Li ******************************************************************/ 252cfdd605fSYanqin Li 253cfdd605fSYanqin Li /** 254cfdd605fSYanqin Li TODO lyq: how to merge 255cfdd605fSYanqin Li 1. same addr 256cfdd605fSYanqin Li 2. same cmd 257cfdd605fSYanqin Li 3. valid 258cfdd605fSYanqin Li FIXME lyq: not merge now due to the following issues 259cfdd605fSYanqin Li 1. load cann't be merged 260cfdd605fSYanqin Li 2. how to merge store and response precisely 261cfdd605fSYanqin Li */ 262cfdd605fSYanqin Li 263cfdd605fSYanqin Li val e0_fire = req.fire 264cfdd605fSYanqin Li val e0_req = req.bits 265e04c5f64SYanqin Li /** 266*46236761SYanqin Li TODO lyq: block or wait or forward? 267e04c5f64SYanqin Li NOW: strict block by same address; otherwise: exhaustive consideration is needed. 268e04c5f64SYanqin Li - ld->ld wait 269e04c5f64SYanqin Li - ld->st forward 270e04c5f64SYanqin Li - st->ld forward 271e04c5f64SYanqin Li - st->st block 272e04c5f64SYanqin Li */ 273*46236761SYanqin Li val e0_existSame = sizeMap(j => e0_req.addr === entries(j).addr && states(j).isValid()).asUInt.orR 274*46236761SYanqin Li val e0_invalidVec = sizeMap(i => !states(i).isValid()) 275e04c5f64SYanqin Li val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec) 276*46236761SYanqin Li val e0_alloc = e0_canAlloc && !e0_existSame && e0_fire 277*46236761SYanqin Li req_ready := e0_invalidVec.asUInt.orR && !e0_existSame && !do_uarch_drain 278cfdd605fSYanqin Li 279e04c5f64SYanqin Li when (e0_alloc) { 280e04c5f64SYanqin Li entries(e0_allocIdx).set(e0_req) 281e04c5f64SYanqin Li states(e0_allocIdx).setValid(true.B) 282cfdd605fSYanqin Li 283cfdd605fSYanqin Li // judge whether wait same block: e0 & q0 284cfdd605fSYanqin Li val waitSameVec = sizeMap(j => 285cfdd605fSYanqin Li e0_req.addr === entries(j).addr && states(j).isValid() && states(j).isInflight() 286cfdd605fSYanqin Li ) 287cfdd605fSYanqin Li val waitQ0 = e0_req.addr === q0_entry.addr && q0_canSent 288cfdd605fSYanqin Li when (waitSameVec.reduce(_ || _) || waitQ0) { 289e04c5f64SYanqin Li states(e0_allocIdx).setWaitSame(true.B) 290cfdd605fSYanqin Li } 291e04c5f64SYanqin Li 292cfdd605fSYanqin Li } 293cfdd605fSYanqin Li 294cfdd605fSYanqin Li 295cfdd605fSYanqin Li /****************************************************************** 296cfdd605fSYanqin Li * Uncache Req 297cfdd605fSYanqin Li * Version 0 (better timing) 298cfdd605fSYanqin Li * q0: choose which one is sent 299cfdd605fSYanqin Li * q0: sent 300cfdd605fSYanqin Li * 301cfdd605fSYanqin Li * Version 1 (better performance) 302cfdd605fSYanqin Li * solved in one cycle for achieving the original performance. 303cfdd605fSYanqin Li * NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline, 304cfdd605fSYanqin Li * because there is no guarantee that mem_aquire will be always ready. 305cfdd605fSYanqin Li ******************************************************************/ 306cfdd605fSYanqin Li 307cfdd605fSYanqin Li val q0_canSentVec = sizeMap(i => 308e04c5f64SYanqin Li (io.enableOutstanding || uState === s_refill_req) && 309cfdd605fSYanqin Li states(i).can2Uncache() 310cfdd605fSYanqin Li ) 311cfdd605fSYanqin Li val q0_res = PriorityEncoderWithFlag(q0_canSentVec) 312cfdd605fSYanqin Li q0_canSentIdx := q0_res._1 313cfdd605fSYanqin Li q0_canSent := q0_res._2 314cfdd605fSYanqin Li q0_entry := entries(q0_canSentIdx) 315cfdd605fSYanqin Li 316cfdd605fSYanqin Li val size = PopCount(q0_entry.mask) 317cfdd605fSYanqin Li val (lgSize, legal) = PriorityMuxWithFlag(Seq( 318cfdd605fSYanqin Li 1.U -> 0.U, 319cfdd605fSYanqin Li 2.U -> 1.U, 320cfdd605fSYanqin Li 4.U -> 2.U, 321cfdd605fSYanqin Li 8.U -> 3.U 322cfdd605fSYanqin Li ).map(m => (size===m._1) -> m._2)) 323cfdd605fSYanqin Li assert(!(q0_canSent && !legal)) 324cfdd605fSYanqin Li 325cfdd605fSYanqin Li val q0_load = edge.Get( 326cfdd605fSYanqin Li fromSource = q0_canSentIdx, 327cfdd605fSYanqin Li toAddress = q0_entry.addr, 328cfdd605fSYanqin Li lgSize = lgSize 329cfdd605fSYanqin Li )._2 330cfdd605fSYanqin Li 331cfdd605fSYanqin Li val q0_store = edge.Put( 332cfdd605fSYanqin Li fromSource = q0_canSentIdx, 333cfdd605fSYanqin Li toAddress = q0_entry.addr, 334cfdd605fSYanqin Li lgSize = lgSize, 335cfdd605fSYanqin Li data = q0_entry.data, 336cfdd605fSYanqin Li mask = q0_entry.mask 337cfdd605fSYanqin Li )._2 338cfdd605fSYanqin Li 339cfdd605fSYanqin Li val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR 340cfdd605fSYanqin Li 341cfdd605fSYanqin Li mem_acquire.valid := q0_canSent 342cfdd605fSYanqin Li mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load) 343cfdd605fSYanqin Li when(mem_acquire.fire){ 344cfdd605fSYanqin Li states(q0_canSentIdx).setInflight(true.B) 345cfdd605fSYanqin Li 346cfdd605fSYanqin Li // q0 should judge whether wait same block 347cfdd605fSYanqin Li (0 until UncacheBufferSize).map(j => 348cfdd605fSYanqin Li when(q0_entry.addr === entries(j).addr && states(j).isValid() && !states(j).isWaitReturn()){ 349cfdd605fSYanqin Li states(j).setWaitSame(true.B) 350cfdd605fSYanqin Li } 351cfdd605fSYanqin Li ) 352cfdd605fSYanqin Li } 353cfdd605fSYanqin Li 354cfdd605fSYanqin Li 355cfdd605fSYanqin Li /****************************************************************** 356cfdd605fSYanqin Li * Uncache Resp 357cfdd605fSYanqin Li ******************************************************************/ 358cfdd605fSYanqin Li 359cfdd605fSYanqin Li val (_, _, refill_done, _) = edge.addr_inc(mem_grant) 360cfdd605fSYanqin Li 361cfdd605fSYanqin Li mem_grant.ready := true.B 362cfdd605fSYanqin Li when (mem_grant.fire) { 363cfdd605fSYanqin Li val id = mem_grant.bits.source 364cfdd605fSYanqin Li entries(id).update(mem_grant.bits) 365cfdd605fSYanqin Li states(id).updateUncacheResp() 366cfdd605fSYanqin Li assert(refill_done, "Uncache response should be one beat only!") 367cfdd605fSYanqin Li 368cfdd605fSYanqin Li // remove state of wait same block 369cfdd605fSYanqin Li (0 until UncacheBufferSize).map(j => 370cfdd605fSYanqin Li when(entries(id).addr === entries(j).addr && states(j).isValid() && states(j).isWaitSame()){ 371cfdd605fSYanqin Li states(j).setWaitSame(false.B) 372cfdd605fSYanqin Li } 373cfdd605fSYanqin Li ) 374cfdd605fSYanqin Li } 375cfdd605fSYanqin Li 376cfdd605fSYanqin Li 377cfdd605fSYanqin Li /****************************************************************** 378cfdd605fSYanqin Li * Return to LSQ 379cfdd605fSYanqin Li ******************************************************************/ 380cfdd605fSYanqin Li 381cfdd605fSYanqin Li val r0_canSentVec = sizeMap(i => states(i).can2Lsq()) 382cfdd605fSYanqin Li val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec) 383cfdd605fSYanqin Li resp.valid := r0_canSent 384cfdd605fSYanqin Li resp.bits := entries(r0_canSentIdx).toUncacheWordResp() 385cfdd605fSYanqin Li when(resp.fire){ 386cfdd605fSYanqin Li states(r0_canSentIdx).updateReturn() 387cfdd605fSYanqin Li } 388cfdd605fSYanqin Li 389cfdd605fSYanqin Li 390cfdd605fSYanqin Li /****************************************************************** 391cfdd605fSYanqin Li * Buffer Flush 392*46236761SYanqin Li * 1. when io.flush.valid is true: drain store queue and ubuffer 393*46236761SYanqin Li * 2. when io.lsq.req.bits.atomic is true: not support temporarily 394cfdd605fSYanqin Li ******************************************************************/ 395e04c5f64SYanqin Li empty := !VecInit(states.map(_.isValid())).asUInt.orR 396e04c5f64SYanqin Li io.flush.empty := empty 397cfdd605fSYanqin Li 398e04c5f64SYanqin Li 399e04c5f64SYanqin Li /****************************************************************** 400e04c5f64SYanqin Li * Load Data Forward 401e04c5f64SYanqin Li * 402e04c5f64SYanqin Li * 0. ld in ldu pipeline 403e04c5f64SYanqin Li * f0: tag match, fast resp 404e04c5f64SYanqin Li * f1: data resp 405e04c5f64SYanqin Li * 406e04c5f64SYanqin Li * 1. ld in buffer (in "Enter Buffer") 407e04c5f64SYanqin Li * ld(en) -> st(in): ld entry.update, state.updateUncacheResp 408e04c5f64SYanqin Li * st(en) -> ld(in): ld entry.update, state.updateUncacheResp 409e04c5f64SYanqin Li * NOW: strict block by same address; there is no such forward. 410e04c5f64SYanqin Li * 411e04c5f64SYanqin Li ******************************************************************/ 412e04c5f64SYanqin Li 413e04c5f64SYanqin Li val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid()) 414e04c5f64SYanqin Li val f0_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool())) 415e04c5f64SYanqin Li f0_needDrain := f0_tagMismatchVec.asUInt.orR && !empty 416e04c5f64SYanqin Li 417e04c5f64SYanqin Li for ((forward, i) <- io.forward.zipWithIndex) { 418e04c5f64SYanqin Li val f0_vtagMatches = sizeMap(w => entries(w).vaddr === forward.vaddr) 419e04c5f64SYanqin Li val f0_ptagMatches = sizeMap(w => entries(w).addr === forward.paddr) 420e04c5f64SYanqin Li f0_tagMismatchVec(i) := forward.valid && sizeMap(w => 421e04c5f64SYanqin Li f0_vtagMatches(w) =/= f0_ptagMatches(w) && f0_validMask(w) 422e04c5f64SYanqin Li ).asUInt.orR 423e04c5f64SYanqin Li when (f0_tagMismatchVec(i)) { 424e04c5f64SYanqin Li XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n", 425e04c5f64SYanqin Li RegNext(f0_ptagMatches.asUInt), 426e04c5f64SYanqin Li RegNext(f0_vtagMatches.asUInt), 427e04c5f64SYanqin Li RegNext(forward.vaddr), 428e04c5f64SYanqin Li RegNext(forward.paddr) 429e04c5f64SYanqin Li ) 430e04c5f64SYanqin Li } 431e04c5f64SYanqin Li 432e04c5f64SYanqin Li val f0_validTagMatches = sizeMap(w => f0_ptagMatches(w) && f0_validMask(w) && forward.valid) 433e04c5f64SYanqin Li 434e04c5f64SYanqin Li val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask)) 435e04c5f64SYanqin Li val f0_fwdDataCandidates = VecInit(entries.map(e => e.data)) 436e04c5f64SYanqin Li val f0_fwdMask = shiftMaskToHigh( 437e04c5f64SYanqin Li forward.paddr, 438e04c5f64SYanqin Li Mux1H(f0_validTagMatches, f0_fwdMaskCandidates) 439e04c5f64SYanqin Li ).asTypeOf(Vec(VDataBytes, Bool())) 440e04c5f64SYanqin Li val f0_fwdData = shiftDataToHigh( 441e04c5f64SYanqin Li forward.paddr, 442e04c5f64SYanqin Li Mux1H(f0_validTagMatches, f0_fwdDataCandidates) 443e04c5f64SYanqin Li ).asTypeOf(Vec(VDataBytes, UInt(8.W))) 444e04c5f64SYanqin Li 445e04c5f64SYanqin Li val f1_fwdValid = RegNext(forward.valid) 446e04c5f64SYanqin Li val f1_fwdMask = RegEnable(f0_fwdMask, forward.valid) 447e04c5f64SYanqin Li val f1_fwdData = RegEnable(f0_fwdData, forward.valid) 448e04c5f64SYanqin Li 449e04c5f64SYanqin Li forward.addrInvalid := false.B // addr in ubuffer is always ready 450e04c5f64SYanqin Li forward.dataInvalid := false.B // data in ubuffer is always ready 451e04c5f64SYanqin Li forward.matchInvalid := f0_tagMismatchVec(i) // paddr / vaddr cam result does not match 452e04c5f64SYanqin Li for (j <- 0 until VDataBytes) { 453e04c5f64SYanqin Li forward.forwardMaskFast(j) := f0_fwdMask(j) 454e04c5f64SYanqin Li 455e04c5f64SYanqin Li forward.forwardMask(j) := false.B 456e04c5f64SYanqin Li forward.forwardData(j) := DontCare 457e04c5f64SYanqin Li when(f1_fwdMask(j) && f1_fwdValid) { 458e04c5f64SYanqin Li forward.forwardMask(j) := true.B 459e04c5f64SYanqin Li forward.forwardData(j) := f1_fwdData(j) 460e04c5f64SYanqin Li } 461e04c5f64SYanqin Li } 462e04c5f64SYanqin Li 463e04c5f64SYanqin Li } 4641f0e2dc7SJiawei Lin 4651f0e2dc7SJiawei Lin 466cfdd605fSYanqin Li /****************************************************************** 467cfdd605fSYanqin Li * Debug / Performance 468cfdd605fSYanqin Li ******************************************************************/ 469cfdd605fSYanqin Li 470cfdd605fSYanqin Li /* Debug Counters */ 4711f0e2dc7SJiawei Lin // print all input/output requests for debug purpose 4721f0e2dc7SJiawei Lin // print req/resp 473935edac4STang Haojin XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n", 4741f0e2dc7SJiawei Lin req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask) 475935edac4STang Haojin XSDebug(resp.fire, "data: %x\n", req.bits.data) 4761f0e2dc7SJiawei Lin // print tilelink messages 4771f0e2dc7SJiawei Lin when(mem_acquire.valid){ 4781f0e2dc7SJiawei Lin XSDebug("mem_acquire valid, ready=%d ", mem_acquire.ready) 4791f0e2dc7SJiawei Lin mem_acquire.bits.dump 4801f0e2dc7SJiawei Lin } 481935edac4STang Haojin when (mem_grant.fire) { 4821f0e2dc7SJiawei Lin XSDebug("mem_grant fire ") 4831f0e2dc7SJiawei Lin mem_grant.bits.dump 4841f0e2dc7SJiawei Lin } 48537225120Ssfencevma 486cfdd605fSYanqin Li /* Performance Counters */ 487e04c5f64SYanqin Li XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 488e04c5f64SYanqin Li XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 489e04c5f64SYanqin Li XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 490e04c5f64SYanqin Li XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 491cfdd605fSYanqin Li XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire) 492*46236761SYanqin Li XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))) 493*46236761SYanqin Li XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f0_tagMismatchVec)) 494cfdd605fSYanqin Li 49537225120Ssfencevma val perfEvents = Seq( 496e04c5f64SYanqin Li ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 497e04c5f64SYanqin Li ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 498e04c5f64SYanqin Li ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 499e04c5f64SYanqin Li ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 500*46236761SYanqin Li ("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire), 501*46236761SYanqin Li ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))), 502*46236761SYanqin Li ("forward_vaddr_match_failed", PopCount(f0_tagMismatchVec)) 50337225120Ssfencevma ) 50437225120Ssfencevma 50537225120Ssfencevma generatePerfEvent() 50637225120Ssfencevma // End 5071f0e2dc7SJiawei Lin} 508