1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22 23import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleB, TLMessages, TLPermissions} 24 25import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle, PipelineConnect} 26 27class ProbeReq(implicit p: Parameters) extends DCacheBundle 28{ 29 val source = UInt() 30 val opcode = UInt() 31 val addr = UInt(PAddrBits.W) 32 val vaddr = UInt(VAddrBits.W) // l2 uses vaddr index to probe l1 33 val param = UInt(TLPermissions.bdWidth.W) 34 val needData = Bool() 35 36 // probe queue entry ID 37 val id = UInt(log2Up(cfg.nProbeEntries).W) 38 39 def dump() = { 40 XSDebug("ProbeReq source: %d opcode: %d addr: %x param: %d\n", 41 source, opcode, addr, param) 42 } 43} 44 45class ProbeResp(implicit p: Parameters) extends DCacheBundle { 46 // probe queue entry ID 47 val id = UInt(log2Up(cfg.nProbeEntries).W) 48} 49 50class ProbeEntry(implicit p: Parameters) extends DCacheModule { 51 val io = IO(new Bundle { 52 val req = Flipped(Decoupled(new ProbeReq)) 53 val pipe_req = DecoupledIO(new MainPipeReq) 54 val pipe_resp = Input(Valid(new ProbeResp)) 55 val lrsc_locked_block = Input(Valid(UInt())) 56 val id = Input(UInt(log2Up(cfg.nProbeEntries).W)) 57 58 // the block we are probing 59 val block_addr = Output(Valid(UInt())) 60 }) 61 62 val s_invalid :: s_pipe_req :: s_wait_resp :: Nil = Enum(3) 63 64 val state = RegInit(s_invalid) 65 66 val req = Reg(new ProbeReq) 67 68 // assign default values to signals 69 io.req.ready := false.B 70 io.pipe_req.valid := false.B 71 io.pipe_req.bits := DontCare 72 73 io.block_addr.valid := state =/= s_invalid 74 io.block_addr.bits := req.addr 75 76 when (state =/= s_invalid) { 77 XSDebug("state: %d\n", state) 78 } 79 80 when (state =/= s_invalid) { 81 XSDebug("ProbeEntry: state: %d block_addr: %x\n", state, io.block_addr.bits) 82 } 83 84 when (state === s_invalid) { 85 io.req.ready := true.B 86 when (io.req.fire()) { 87 req := io.req.bits 88 state := s_pipe_req 89 } 90 } 91 92 val lrsc_blocked = Mux( 93 io.req.fire(), 94 io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === io.req.bits.addr, 95 io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr 96 ) 97 98 when (state === s_pipe_req) { 99 // Note that probe req will be blocked in the next cycle if a lr updates lrsc_locked_block addr 100 // in this way, we can RegNext(lrsc_blocked) for better timing 101 io.pipe_req.valid := !RegNext(lrsc_blocked) 102 103 val pipe_req = io.pipe_req.bits 104 pipe_req := DontCare 105 pipe_req.miss := false.B 106 pipe_req.probe := true.B 107 pipe_req.probe_param := req.param 108 pipe_req.addr := req.addr 109 pipe_req.vaddr := req.vaddr 110 pipe_req.probe_need_data := req.needData 111 pipe_req.id := io.id 112 113 when (io.pipe_req.fire()) { 114 state := s_wait_resp 115 } 116 } 117 118 when (state === s_wait_resp) { 119 when (io.pipe_resp.valid && io.id === io.pipe_resp.bits.id) { 120 state := s_invalid 121 } 122 } 123 124 // perfoemance counters 125 XSPerfAccumulate("probe_req", state === s_invalid && io.req.fire()) 126 XSPerfAccumulate("probe_penalty", state =/= s_invalid) 127 XSPerfAccumulate("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr) 128 XSPerfAccumulate("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready) 129} 130 131class ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 132{ 133 val io = IO(new Bundle { 134 val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle))) 135 val pipe_req = DecoupledIO(new MainPipeReq) 136 val lrsc_locked_block = Input(Valid(UInt())) 137 val update_resv_set = Input(Bool()) 138 }) 139 140 val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nProbeEntries)) 141 142 // allocate a free entry for incoming request 143 val primary_ready = Wire(Vec(cfg.nProbeEntries, Bool())) 144 val allocate = primary_ready.asUInt.orR 145 val alloc_idx = PriorityEncoder(primary_ready) 146 147 // translate to inner req 148 val req = Wire(new ProbeReq) 149 val alias_addr_frag = io.mem_probe.bits.data(2, 1) // add extra 2 bits from vaddr to get vindex 150 req.source := io.mem_probe.bits.source 151 req.opcode := io.mem_probe.bits.opcode 152 req.addr := io.mem_probe.bits.address 153 if(DCacheAboveIndexOffset > DCacheTagOffset) { 154 // have alias problem, extra alias bits needed for index 155 req.vaddr := Cat( 156 io.mem_probe.bits.address(PAddrBits - 1, DCacheAboveIndexOffset), // dontcare 157 alias_addr_frag(DCacheAboveIndexOffset - DCacheTagOffset - 1, 0), // index 158 io.mem_probe.bits.address(DCacheTagOffset - 1, 0) // index & others 159 ) 160 } else { // no alias problem 161 req.vaddr := io.mem_probe.bits.address 162 } 163 req.param := io.mem_probe.bits.param 164 req.needData := io.mem_probe.bits.data(0) 165 req.id := DontCare 166 167 io.mem_probe.ready := allocate 168 169 val entries = (0 until cfg.nProbeEntries) map { i => 170 val entry = Module(new ProbeEntry) 171 entry.io.id := i.U 172 173 // entry req 174 entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid 175 primary_ready(i) := entry.io.req.ready 176 entry.io.req.bits := req 177 178 // pipe_req 179 pipe_req_arb.io.in(i) <> entry.io.pipe_req 180 181 // pipe_resp 182 entry.io.pipe_resp.valid := io.pipe_req.fire() 183 entry.io.pipe_resp.bits.id := io.pipe_req.bits.id 184 185 entry.io.lrsc_locked_block := io.lrsc_locked_block 186 187 entry 188 } 189 190 // delay probe req for 1 cycle 191 val selected_lrsc_blocked = Mux( 192 pipe_req_arb.io.out.fire(), 193 io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === pipe_req_arb.io.out.bits.addr, 194 io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === io.pipe_req.bits.addr && io.pipe_req.valid 195 ) 196 val resvsetProbeBlock = RegNext(io.update_resv_set || selected_lrsc_blocked) 197 PipelineConnect(pipe_req_arb.io.out, io.pipe_req, io.pipe_req.fire() && !resvsetProbeBlock, false.B, resvsetProbeBlock) 198 // When we update update_resv_set, block all probe req in the next cycle 199 // It should give Probe reservation set addr compare an independent cycle, 200 // which will lead to better timing 201 when(resvsetProbeBlock){ 202 io.pipe_req.valid := false.B 203 } 204 205 // print all input/output requests for debug purpose 206 when (io.mem_probe.valid) { 207 // before a probe finishes, L2 should not further issue probes on this block 208 val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.mem_probe.bits.address)).asUInt.orR 209 assert (!probe_conflict) 210 // for now, we can only deal with ProbeBlock 211 assert (io.mem_probe.bits.opcode === TLMessages.Probe) 212 } 213 214 // debug output 215 when (io.mem_probe.fire()) { 216 XSDebug("mem_probe: ") 217 io.mem_probe.bits.dump 218 } 219 220// when (io.pipe_req.fire()) { 221// io.pipe_req.bits.dump() 222// } 223 224 when (io.lrsc_locked_block.valid) { 225 XSDebug("lrsc_locked_block: %x\n", io.lrsc_locked_block.bits) 226 } 227 val perfinfo = IO(new Bundle(){ 228 val perfEvents = Output(new PerfEventsBundle(5)) 229 }) 230 val perfEvents = Seq( 231 ("dcache_probq_req ", io.pipe_req.fire() ), 232 ("dcache_probq_1/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nProbeEntries.U/4.U)) ), 233 ("dcache_probq_2/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U/2.U)) ), 234 ("dcache_probq_3/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U*3.U/4.U))), 235 ("dcache_probq_4/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U*3.U/4.U)) ), 236 ) 237 238 for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { 239 perf_out.incr_step := RegNext(perf) 240 } 241} 242