11f0e2dc7SJiawei Lin/*************************************************************************************** 21f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 31f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Peng Cheng Laboratory 41f0e2dc7SJiawei Lin* 51f0e2dc7SJiawei Lin* XiangShan is licensed under Mulan PSL v2. 61f0e2dc7SJiawei Lin* You can use this software according to the terms and conditions of the Mulan PSL v2. 71f0e2dc7SJiawei Lin* You may obtain a copy of Mulan PSL v2 at: 81f0e2dc7SJiawei Lin* http://license.coscl.org.cn/MulanPSL2 91f0e2dc7SJiawei Lin* 101f0e2dc7SJiawei Lin* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 111f0e2dc7SJiawei Lin* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 121f0e2dc7SJiawei Lin* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 131f0e2dc7SJiawei Lin* 141f0e2dc7SJiawei Lin* See the Mulan PSL v2 for more details. 151f0e2dc7SJiawei Lin***************************************************************************************/ 161f0e2dc7SJiawei Lin 171f0e2dc7SJiawei Linpackage xiangshan.cache 181f0e2dc7SJiawei Lin 198891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters 201f0e2dc7SJiawei Linimport chisel3._ 211f0e2dc7SJiawei Linimport chisel3.util._ 221ca0e4f3SYinan Xuimport freechips.rocketchip.tilelink.{TLBundleB, TLEdgeOut, TLMessages, TLPermissions} 2344f2941bSJiru Sunimport utils.HasTLDump 2444f2941bSJiru Sunimport utility.{XSDebug, XSPerfAccumulate, HasPerfEvents} 251f0e2dc7SJiawei Lin 261f0e2dc7SJiawei Linclass ProbeReq(implicit p: Parameters) extends DCacheBundle 271f0e2dc7SJiawei Lin{ 281f0e2dc7SJiawei Lin val source = UInt() 291f0e2dc7SJiawei Lin val opcode = UInt() 301f0e2dc7SJiawei Lin val addr = UInt(PAddrBits.W) 31f4d8d00eSWilliam Wang val vaddr = UInt(VAddrBits.W) // l2 uses vaddr index to probe l1 321f0e2dc7SJiawei Lin val param = UInt(TLPermissions.bdWidth.W) 331f0e2dc7SJiawei Lin val needData = Bool() 341f0e2dc7SJiawei Lin 35f4d8d00eSWilliam Wang // probe queue entry ID 36f4d8d00eSWilliam Wang val id = UInt(log2Up(cfg.nProbeEntries).W) 37f4d8d00eSWilliam Wang 38*8b33cd30Sklin02 def dump(cond: Bool) = { 39*8b33cd30Sklin02 XSDebug(cond, "ProbeReq source: %d opcode: %d addr: %x param: %d\n", 401f0e2dc7SJiawei Lin source, opcode, addr, param) 411f0e2dc7SJiawei Lin } 421f0e2dc7SJiawei Lin} 431f0e2dc7SJiawei Lin 44f4d8d00eSWilliam Wangclass ProbeResp(implicit p: Parameters) extends DCacheBundle { 45f4d8d00eSWilliam Wang // probe queue entry ID 46f4d8d00eSWilliam Wang val id = UInt(log2Up(cfg.nProbeEntries).W) 47f4d8d00eSWilliam Wang} 48f4d8d00eSWilliam Wang 491f0e2dc7SJiawei Linclass ProbeEntry(implicit p: Parameters) extends DCacheModule { 501f0e2dc7SJiawei Lin val io = IO(new Bundle { 511f0e2dc7SJiawei Lin val req = Flipped(Decoupled(new ProbeReq)) 521f0e2dc7SJiawei Lin val pipe_req = DecoupledIO(new MainPipeReq) 53f4d8d00eSWilliam Wang val pipe_resp = Input(Valid(new ProbeResp)) 541f0e2dc7SJiawei Lin val lrsc_locked_block = Input(Valid(UInt())) 55f4d8d00eSWilliam Wang val id = Input(UInt(log2Up(cfg.nProbeEntries).W)) 561f0e2dc7SJiawei Lin 571f0e2dc7SJiawei Lin // the block we are probing 581f0e2dc7SJiawei Lin val block_addr = Output(Valid(UInt())) 591f0e2dc7SJiawei Lin }) 601f0e2dc7SJiawei Lin 61f4d8d00eSWilliam Wang val s_invalid :: s_pipe_req :: s_wait_resp :: Nil = Enum(3) 621f0e2dc7SJiawei Lin 631f0e2dc7SJiawei Lin val state = RegInit(s_invalid) 641f0e2dc7SJiawei Lin 651f0e2dc7SJiawei Lin val req = Reg(new ProbeReq) 661f0e2dc7SJiawei Lin 671f0e2dc7SJiawei Lin // assign default values to signals 681f0e2dc7SJiawei Lin io.req.ready := false.B 691f0e2dc7SJiawei Lin io.pipe_req.valid := false.B 701f0e2dc7SJiawei Lin io.pipe_req.bits := DontCare 711f0e2dc7SJiawei Lin 721f0e2dc7SJiawei Lin io.block_addr.valid := state =/= s_invalid 731f0e2dc7SJiawei Lin io.block_addr.bits := req.addr 741f0e2dc7SJiawei Lin 75*8b33cd30Sklin02 XSDebug(state =/= s_invalid, "state: %d\n", state) 761f0e2dc7SJiawei Lin 77*8b33cd30Sklin02 XSDebug(state =/= s_invalid, "ProbeEntry: state: %d block_addr: %x\n", state, io.block_addr.bits) 781f0e2dc7SJiawei Lin 791f0e2dc7SJiawei Lin when (state === s_invalid) { 801f0e2dc7SJiawei Lin io.req.ready := true.B 81935edac4STang Haojin when (io.req.fire) { 821f0e2dc7SJiawei Lin req := io.req.bits 831f0e2dc7SJiawei Lin state := s_pipe_req 841f0e2dc7SJiawei Lin } 851f0e2dc7SJiawei Lin } 861f0e2dc7SJiawei Lin 87f4d8d00eSWilliam Wang val lrsc_blocked = Mux( 88935edac4STang Haojin io.req.fire, 896b6d88e6SWilliam Wang io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(io.req.bits.addr), 906b6d88e6SWilliam Wang io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr) 91f4d8d00eSWilliam Wang ) 92f4d8d00eSWilliam Wang 931f0e2dc7SJiawei Lin when (state === s_pipe_req) { 94300ded30SWilliam Wang // Note that probe req will be blocked in the next cycle if a lr updates lrsc_locked_block addr 95300ded30SWilliam Wang // in this way, we can RegNext(lrsc_blocked) for better timing 96300ded30SWilliam Wang io.pipe_req.valid := !RegNext(lrsc_blocked) 971f0e2dc7SJiawei Lin 981f0e2dc7SJiawei Lin val pipe_req = io.pipe_req.bits 991f0e2dc7SJiawei Lin pipe_req := DontCare 1001f0e2dc7SJiawei Lin pipe_req.miss := false.B 1011f0e2dc7SJiawei Lin pipe_req.probe := true.B 1021f0e2dc7SJiawei Lin pipe_req.probe_param := req.param 1031f0e2dc7SJiawei Lin pipe_req.addr := req.addr 1041f0e2dc7SJiawei Lin pipe_req.vaddr := req.vaddr 1051f0e2dc7SJiawei Lin pipe_req.probe_need_data := req.needData 1060f59c834SWilliam Wang pipe_req.error := false.B 107f4d8d00eSWilliam Wang pipe_req.id := io.id 1081f0e2dc7SJiawei Lin 109935edac4STang Haojin when (io.pipe_req.fire) { 110f4d8d00eSWilliam Wang state := s_wait_resp 111f4d8d00eSWilliam Wang } 112f4d8d00eSWilliam Wang } 113f4d8d00eSWilliam Wang 114f4d8d00eSWilliam Wang when (state === s_wait_resp) { 115f4d8d00eSWilliam Wang when (io.pipe_resp.valid && io.id === io.pipe_resp.bits.id) { 1161f0e2dc7SJiawei Lin state := s_invalid 1171f0e2dc7SJiawei Lin } 1181f0e2dc7SJiawei Lin } 1191f0e2dc7SJiawei Lin 1201f0e2dc7SJiawei Lin // perfoemance counters 121935edac4STang Haojin XSPerfAccumulate("probe_req", state === s_invalid && io.req.fire) 1221f0e2dc7SJiawei Lin XSPerfAccumulate("probe_penalty", state =/= s_invalid) 1236b6d88e6SWilliam Wang XSPerfAccumulate("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr)) 1241f0e2dc7SJiawei Lin XSPerfAccumulate("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready) 1251f0e2dc7SJiawei Lin} 1261f0e2dc7SJiawei Lin 1271ca0e4f3SYinan Xuclass ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents 1281f0e2dc7SJiawei Lin{ 1291f0e2dc7SJiawei Lin val io = IO(new Bundle { 1301f0e2dc7SJiawei Lin val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle))) 1311f0e2dc7SJiawei Lin val pipe_req = DecoupledIO(new MainPipeReq) 1321f0e2dc7SJiawei Lin val lrsc_locked_block = Input(Valid(UInt())) 133300ded30SWilliam Wang val update_resv_set = Input(Bool()) 1341f0e2dc7SJiawei Lin }) 1351f0e2dc7SJiawei Lin 136300ded30SWilliam Wang val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nProbeEntries)) 1371f0e2dc7SJiawei Lin 1381f0e2dc7SJiawei Lin // allocate a free entry for incoming request 1391f0e2dc7SJiawei Lin val primary_ready = Wire(Vec(cfg.nProbeEntries, Bool())) 1401f0e2dc7SJiawei Lin val allocate = primary_ready.asUInt.orR 1411f0e2dc7SJiawei Lin val alloc_idx = PriorityEncoder(primary_ready) 1421f0e2dc7SJiawei Lin 1431f0e2dc7SJiawei Lin // translate to inner req 1441f0e2dc7SJiawei Lin val req = Wire(new ProbeReq) 1451f0e2dc7SJiawei Lin val alias_addr_frag = io.mem_probe.bits.data(2, 1) // add extra 2 bits from vaddr to get vindex 1461f0e2dc7SJiawei Lin req.source := io.mem_probe.bits.source 1471f0e2dc7SJiawei Lin req.opcode := io.mem_probe.bits.opcode 1481f0e2dc7SJiawei Lin req.addr := io.mem_probe.bits.address 1491f0e2dc7SJiawei Lin if(DCacheAboveIndexOffset > DCacheTagOffset) { 1501f0e2dc7SJiawei Lin // have alias problem, extra alias bits needed for index 1511f0e2dc7SJiawei Lin req.vaddr := Cat( 1522f30d658SYinan Xu io.mem_probe.bits.address(PAddrBits - 1, DCacheAboveIndexOffset), // dontcare 1531f0e2dc7SJiawei Lin alias_addr_frag(DCacheAboveIndexOffset - DCacheTagOffset - 1, 0), // index 1541f0e2dc7SJiawei Lin io.mem_probe.bits.address(DCacheTagOffset - 1, 0) // index & others 1551f0e2dc7SJiawei Lin ) 1561f0e2dc7SJiawei Lin } else { // no alias problem 1571f0e2dc7SJiawei Lin req.vaddr := io.mem_probe.bits.address 1581f0e2dc7SJiawei Lin } 1591f0e2dc7SJiawei Lin req.param := io.mem_probe.bits.param 1601f0e2dc7SJiawei Lin req.needData := io.mem_probe.bits.data(0) 161f4d8d00eSWilliam Wang req.id := DontCare 1621f0e2dc7SJiawei Lin 1631f0e2dc7SJiawei Lin io.mem_probe.ready := allocate 1641f0e2dc7SJiawei Lin 1651f0e2dc7SJiawei Lin val entries = (0 until cfg.nProbeEntries) map { i => 1661f0e2dc7SJiawei Lin val entry = Module(new ProbeEntry) 167f4d8d00eSWilliam Wang entry.io.id := i.U 1681f0e2dc7SJiawei Lin 1691f0e2dc7SJiawei Lin // entry req 1701f0e2dc7SJiawei Lin entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid 1711f0e2dc7SJiawei Lin primary_ready(i) := entry.io.req.ready 1721f0e2dc7SJiawei Lin entry.io.req.bits := req 1731f0e2dc7SJiawei Lin 1741f0e2dc7SJiawei Lin // pipe_req 1751f0e2dc7SJiawei Lin pipe_req_arb.io.in(i) <> entry.io.pipe_req 1761f0e2dc7SJiawei Lin 177f4d8d00eSWilliam Wang // pipe_resp 178935edac4STang Haojin entry.io.pipe_resp.valid := io.pipe_req.fire 179f4d8d00eSWilliam Wang entry.io.pipe_resp.bits.id := io.pipe_req.bits.id 180f4d8d00eSWilliam Wang 1811f0e2dc7SJiawei Lin entry.io.lrsc_locked_block := io.lrsc_locked_block 1821f0e2dc7SJiawei Lin 1831f0e2dc7SJiawei Lin entry 1841f0e2dc7SJiawei Lin } 1851f0e2dc7SJiawei Lin 186f4d8d00eSWilliam Wang // delay probe req for 1 cycle 18753e88463SWilliam Wang val selected_req_valid = RegInit(false.B) 188935edac4STang Haojin val selected_req_bits = RegEnable(pipe_req_arb.io.out.bits, pipe_req_arb.io.out.fire) 189f4d8d00eSWilliam Wang val selected_lrsc_blocked = Mux( 190935edac4STang Haojin pipe_req_arb.io.out.fire, 1916b6d88e6SWilliam Wang io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(pipe_req_arb.io.out.bits.addr), 1926b6d88e6SWilliam Wang io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(selected_req_bits.addr) && selected_req_valid 193f4d8d00eSWilliam Wang ) 194f4d8d00eSWilliam Wang val resvsetProbeBlock = RegNext(io.update_resv_set || selected_lrsc_blocked) 195300ded30SWilliam Wang // When we update update_resv_set, block all probe req in the next cycle 196300ded30SWilliam Wang // It should give Probe reservation set addr compare an independent cycle, 197300ded30SWilliam Wang // which will lead to better timing 198935edac4STang Haojin pipe_req_arb.io.out.ready := !selected_req_valid || io.pipe_req.fire 19953e88463SWilliam Wang io.pipe_req.valid := selected_req_valid && !resvsetProbeBlock 20053e88463SWilliam Wang io.pipe_req.bits := selected_req_bits 201935edac4STang Haojin when(io.pipe_req.fire){ 20253e88463SWilliam Wang selected_req_valid := false.B 20353e88463SWilliam Wang } 204935edac4STang Haojin when(pipe_req_arb.io.out.fire){ 20553e88463SWilliam Wang selected_req_valid := true.B 206300ded30SWilliam Wang } 2071f0e2dc7SJiawei Lin 2081f0e2dc7SJiawei Lin // print all input/output requests for debug purpose 2091f0e2dc7SJiawei Lin when (io.mem_probe.valid) { 2101f0e2dc7SJiawei Lin // before a probe finishes, L2 should not further issue probes on this block 2116b6d88e6SWilliam Wang val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && get_block(e.io.block_addr.bits) === get_block(io.mem_probe.bits.address))).asUInt.orR 2121f0e2dc7SJiawei Lin assert (!probe_conflict) 2131f0e2dc7SJiawei Lin // for now, we can only deal with ProbeBlock 2141f0e2dc7SJiawei Lin assert (io.mem_probe.bits.opcode === TLMessages.Probe) 2151f0e2dc7SJiawei Lin } 2161f0e2dc7SJiawei Lin 2171f0e2dc7SJiawei Lin // debug output 218*8b33cd30Sklin02 XSDebug(io.mem_probe.fire, "mem_probe: ") 219*8b33cd30Sklin02 io.mem_probe.bits.dump(io.mem_probe.fire) 2201f0e2dc7SJiawei Lin 221*8b33cd30Sklin02// io.pipe_req.bits.dump(io.pipe_req.fire) 2221f0e2dc7SJiawei Lin 223*8b33cd30Sklin02 XSDebug(io.lrsc_locked_block.valid, "lrsc_locked_block: %x\n", io.lrsc_locked_block.bits) 224d2b20d1aSTang Haojin XSPerfAccumulate("ProbeL1DCache", io.mem_probe.fire) 2251ca0e4f3SYinan Xu 226b6d53cefSWilliam Wang val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid))) 227cd365d4cSrvcoresjw val perfEvents = Seq( 228935edac4STang Haojin ("dcache_probq_req ", io.pipe_req.fire), 229b6d53cefSWilliam Wang ("dcache_probq_1_4_valid", (perfValidCount < (cfg.nProbeEntries.U/4.U))), 230b6d53cefSWilliam Wang ("dcache_probq_2_4_valid", (perfValidCount > (cfg.nProbeEntries.U/4.U)) & (perfValidCount <= (cfg.nProbeEntries.U/2.U))), 231b6d53cefSWilliam Wang ("dcache_probq_3_4_valid", (perfValidCount > (cfg.nProbeEntries.U/2.U)) & (perfValidCount <= (cfg.nProbeEntries.U*3.U/4.U))), 232b6d53cefSWilliam Wang ("dcache_probq_4_4_valid", (perfValidCount > (cfg.nProbeEntries.U*3.U/4.U))), 233cd365d4cSrvcoresjw ) 2341ca0e4f3SYinan Xu generatePerfEvent() 2351f0e2dc7SJiawei Lin} 236