1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chisel3._ 20import chisel3.util._ 21import org.chipsalliance.cde.config.Parameters 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.mem._ 26import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes} 27import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters} 28 29class UncacheFlushBundle extends Bundle { 30 val valid = Output(Bool()) 31 val empty = Input(Bool()) 32} 33 34class UncacheEntry(implicit p: Parameters) extends DCacheBundle { 35 val cmd = UInt(M_SZ.W) 36 val addr = UInt(PAddrBits.W) 37 val vaddr = UInt(VAddrBits.W) 38 val data = UInt(XLEN.W) 39 val mask = UInt(DataBytes.W) 40 val id = UInt(uncacheIdxBits.W) 41 val nc = Bool() 42 val atomic = Bool() 43 44 val resp_nderr = Bool() 45 46 /* NOTE: if it support the internal forward logic, here can uncomment */ 47 // val fwd_data = UInt(XLEN.W) 48 // val fwd_mask = UInt(DataBytes.W) 49 50 def set(x: UncacheWordReq): Unit = { 51 cmd := x.cmd 52 addr := x.addr 53 vaddr := x.vaddr 54 data := x.data 55 mask := x.mask 56 id := x.id 57 nc := x.nc 58 atomic := x.atomic 59 resp_nderr := false.B 60 // fwd_data := 0.U 61 // fwd_mask := 0.U 62 } 63 64 def update(x: TLBundleD): Unit = { 65 when(cmd === MemoryOpConstants.M_XRD) { 66 data := x.data 67 } 68 resp_nderr := x.denied 69 } 70 71 // def update(forwardData: UInt, forwardMask: UInt): Unit = { 72 // fwd_data := forwardData 73 // fwd_mask := forwardMask 74 // } 75 76 def toUncacheWordResp(): UncacheWordResp = { 77 // val resp_fwd_data = VecInit((0 until DataBytes).map(j => 78 // Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j)) 79 // )).asUInt 80 val resp_fwd_data = data 81 val r = Wire(new UncacheWordResp) 82 r := DontCare 83 r.data := resp_fwd_data 84 r.id := id 85 r.nderr := resp_nderr 86 r.nc := nc 87 r.is2lq := cmd === MemoryOpConstants.M_XRD 88 r.miss := false.B 89 r.replay := false.B 90 r.tag_error := false.B 91 r.error := false.B 92 r 93 } 94} 95 96class UncacheEntryState(implicit p: Parameters) extends DCacheBundle { 97 // valid (-> waitSame) -> inflight -> waitReturn 98 val valid = Bool() 99 val inflight = Bool() // uncache -> L2 100 val waitSame = Bool() 101 val waitReturn = Bool() // uncache -> LSQ 102 103 def init: Unit = { 104 valid := false.B 105 inflight := false.B 106 waitSame := false.B 107 waitReturn := false.B 108 } 109 110 def isValid(): Bool = valid 111 def isInflight(): Bool = inflight 112 def isWaitReturn(): Bool = waitReturn 113 def isWaitSame(): Bool = waitSame 114 def can2Uncache(): Bool = valid && !inflight && !waitSame && !waitReturn 115 def can2Lsq(): Bool = valid && waitReturn 116 117 def setValid(x: Bool): Unit = { valid := x} 118 def setInflight(x: Bool): Unit = { inflight := x} 119 def setWaitReturn(x: Bool): Unit = { waitReturn := x } 120 def setWaitSame(x: Bool): Unit = { waitSame := x} 121 122 def updateUncacheResp(): Unit = { 123 assert(inflight, "The request was not sent and a response was received") 124 inflight := false.B 125 waitReturn := true.B 126 } 127 def updateReturn(): Unit = { 128 valid := false.B 129 inflight := false.B 130 waitSame := false.B 131 waitReturn := false.B 132 } 133} 134 135class UncacheIO(implicit p: Parameters) extends DCacheBundle { 136 val hartId = Input(UInt()) 137 val enableOutstanding = Input(Bool()) 138 val flush = Flipped(new UncacheFlushBundle) 139 val lsq = Flipped(new UncacheWordIO) 140 val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) 141} 142 143// convert DCacheIO to TileLink 144// for Now, we only deal with TL-UL 145 146class Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter { 147 override def shouldBeInlined: Boolean = false 148 def idRange: Int = UncacheBufferSize 149 150 val clientParameters = TLMasterPortParameters.v1( 151 clients = Seq(TLMasterParameters.v1( 152 "uncache", 153 sourceId = IdRange(0, idRange) 154 )) 155 ) 156 val clientNode = TLClientNode(Seq(clientParameters)) 157 158 lazy val module = new UncacheImp(this) 159} 160 161/* Uncache Buffer */ 162class UncacheImp(outer: Uncache)extends LazyModuleImp(outer) 163 with HasTLDump 164 with HasXSParameter 165 with HasPerfEvents 166{ 167 private val INDEX_WIDTH = log2Up(UncacheBufferSize) 168 println(s"Uncahe Buffer Size: $UncacheBufferSize entries") 169 val io = IO(new UncacheIO) 170 171 val (bus, edge) = outer.clientNode.out.head 172 173 val req = io.lsq.req 174 val resp = io.lsq.resp 175 val mem_acquire = bus.a 176 val mem_grant = bus.d 177 val req_ready = WireInit(false.B) 178 179 // assign default values to output signals 180 bus.b.ready := false.B 181 bus.c.valid := false.B 182 bus.c.bits := DontCare 183 bus.d.ready := false.B 184 bus.e.valid := false.B 185 bus.e.bits := DontCare 186 io.lsq.req.ready := req_ready 187 io.lsq.resp.valid := false.B 188 io.lsq.resp.bits := DontCare 189 190 191 /****************************************************************** 192 * Data Structure 193 ******************************************************************/ 194 195 val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry)) 196 val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState)))) 197 val fence = RegInit(Bool(), false.B) 198 val s_idle :: s_refill_req :: s_refill_resp :: s_send_resp :: Nil = Enum(4) 199 val uState = RegInit(s_idle) 200 201 def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f)) 202 def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR 203 def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR 204 def addrMatch(x: UncacheEntry, y: UncacheWordReq): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3) 205 def addrMatch(x: UncacheWordReq, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3) 206 def addrMatch(x: UncacheEntry, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3) 207 def addrMatch(x: UInt, y: UInt): Bool = x(PAddrBits - 1, 3) === y(PAddrBits - 1, 3) 208 209 // drain buffer 210 val empty = Wire(Bool()) 211 val f1_needDrain = Wire(Bool()) 212 val do_uarch_drain = RegNext(f1_needDrain) 213 214 val q0_entry = Wire(new UncacheEntry) 215 val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W)) 216 val q0_canSent = Wire(Bool()) 217 218 219 /****************************************************************** 220 * uState for non-outstanding 221 ******************************************************************/ 222 223 switch(uState){ 224 is(s_idle){ 225 when(req.fire){ 226 uState := s_refill_req 227 } 228 } 229 is(s_refill_req){ 230 when(mem_acquire.fire){ 231 uState := s_refill_resp 232 } 233 } 234 is(s_refill_resp){ 235 when(mem_grant.fire){ 236 uState := s_send_resp 237 } 238 } 239 is(s_send_resp){ 240 when(resp.fire){ 241 uState := s_idle 242 } 243 } 244 } 245 246 247 /****************************************************************** 248 * Enter Buffer 249 * Version 0 (better timing) 250 * e0 judge: alloc/merge write vec 251 * e1 alloc 252 * 253 * Version 1 (better performance) 254 * solved in one cycle for achieving the original performance. 255 ******************************************************************/ 256 257 /** 258 TODO lyq: how to merge 259 1. same addr 260 2. same cmd 261 3. valid 262 FIXME lyq: not merge now due to the following issues 263 1. load cann't be merged 264 2. how to merge store and response precisely 265 */ 266 267 val e0_fire = req.fire 268 val e0_req_valid = req.valid 269 val e0_req = req.bits 270 /** 271 TODO lyq: block or wait or forward? 272 NOW: strict block by same address; otherwise: exhaustive consideration is needed. 273 - ld->ld wait 274 - ld->st forward 275 - st->ld forward 276 - st->st block 277 */ 278 val e0_existSame = sizeMap(j => e0_req_valid && states(j).isValid() && addrMatch(e0_req, entries(j))).asUInt.orR 279 val e0_invalidVec = sizeMap(i => !states(i).isValid()) 280 val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec) 281 val e0_alloc = e0_canAlloc && !e0_existSame && e0_fire 282 req_ready := e0_invalidVec.asUInt.orR && !e0_existSame && !do_uarch_drain 283 284 when (e0_alloc) { 285 entries(e0_allocIdx).set(e0_req) 286 states(e0_allocIdx).setValid(true.B) 287 288 // judge whether wait same block: e0 & q0 289 val waitSameVec = sizeMap(j => 290 e0_req_valid && states(j).isValid() && states(j).isInflight() && addrMatch(e0_req, entries(j)) 291 ) 292 val waitQ0 = q0_canSent && addrMatch(e0_req, q0_entry) 293 when (waitSameVec.reduce(_ || _) || waitQ0) { 294 states(e0_allocIdx).setWaitSame(true.B) 295 } 296 297 } 298 299 300 /****************************************************************** 301 * Uncache Req 302 * Version 0 (better timing) 303 * q0: choose which one is sent 304 * q0: sent 305 * 306 * Version 1 (better performance) 307 * solved in one cycle for achieving the original performance. 308 * NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline, 309 * because there is no guarantee that mem_aquire will be always ready. 310 ******************************************************************/ 311 312 val q0_canSentVec = sizeMap(i => 313 (io.enableOutstanding || uState === s_refill_req) && 314 states(i).can2Uncache() 315 ) 316 val q0_res = PriorityEncoderWithFlag(q0_canSentVec) 317 q0_canSentIdx := q0_res._1 318 q0_canSent := q0_res._2 319 q0_entry := entries(q0_canSentIdx) 320 321 val size = PopCount(q0_entry.mask) 322 val (lgSize, legal) = PriorityMuxWithFlag(Seq( 323 1.U -> 0.U, 324 2.U -> 1.U, 325 4.U -> 2.U, 326 8.U -> 3.U 327 ).map(m => (size===m._1) -> m._2)) 328 assert(!(q0_canSent && !legal)) 329 330 val q0_load = edge.Get( 331 fromSource = q0_canSentIdx, 332 toAddress = q0_entry.addr, 333 lgSize = lgSize 334 )._2 335 336 val q0_store = edge.Put( 337 fromSource = q0_canSentIdx, 338 toAddress = q0_entry.addr, 339 lgSize = lgSize, 340 data = q0_entry.data, 341 mask = q0_entry.mask 342 )._2 343 344 val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR 345 346 mem_acquire.valid := q0_canSent 347 mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load) 348 when(mem_acquire.fire){ 349 states(q0_canSentIdx).setInflight(true.B) 350 351 // q0 should judge whether wait same block 352 (0 until UncacheBufferSize).map(j => 353 when(states(j).isValid() && !states(j).isWaitReturn() && addrMatch(q0_entry, entries(j))){ 354 states(j).setWaitSame(true.B) 355 } 356 ) 357 } 358 359 360 /****************************************************************** 361 * Uncache Resp 362 ******************************************************************/ 363 364 val (_, _, refill_done, _) = edge.addr_inc(mem_grant) 365 366 mem_grant.ready := true.B 367 when (mem_grant.fire) { 368 val id = mem_grant.bits.source 369 entries(id).update(mem_grant.bits) 370 states(id).updateUncacheResp() 371 assert(refill_done, "Uncache response should be one beat only!") 372 373 // remove state of wait same block 374 (0 until UncacheBufferSize).map(j => 375 when(states(j).isValid() && states(j).isWaitSame() && addrMatch(entries(id), entries(j))){ 376 states(j).setWaitSame(false.B) 377 } 378 ) 379 } 380 381 382 /****************************************************************** 383 * Return to LSQ 384 ******************************************************************/ 385 386 val r0_canSentVec = sizeMap(i => states(i).can2Lsq()) 387 val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec) 388 resp.valid := r0_canSent 389 resp.bits := entries(r0_canSentIdx).toUncacheWordResp() 390 when(resp.fire){ 391 states(r0_canSentIdx).updateReturn() 392 } 393 394 395 /****************************************************************** 396 * Buffer Flush 397 * 1. when io.flush.valid is true: drain store queue and ubuffer 398 * 2. when io.lsq.req.bits.atomic is true: not support temporarily 399 ******************************************************************/ 400 empty := !VecInit(states.map(_.isValid())).asUInt.orR 401 io.flush.empty := empty 402 403 404 /****************************************************************** 405 * Load Data Forward 406 * 407 * 0. ld in ldu pipeline 408 * f0: vaddr match, mask & data select, fast resp 409 * f1: paddr match, resp 410 * 411 * 1. ld in buffer (in "Enter Buffer") 412 * ld(en) -> st(in): ld entry.update, state.updateUncacheResp 413 * st(en) -> ld(in): ld entry.update, state.updateUncacheResp 414 * NOW: strict block by same address; there is no such forward. 415 * 416 ******************************************************************/ 417 418 val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid()) 419 val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask)) 420 val f0_fwdDataCandidates = VecInit(entries.map(e => e.data)) 421 val f1_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool())) 422 f1_needDrain := f1_tagMismatchVec.asUInt.orR && !empty 423 424 for ((forward, i) <- io.forward.zipWithIndex) { 425 val f0_fwdValid = forward.valid 426 val f1_fwdValid = RegNext(f0_fwdValid) 427 428 // f0 vaddr match 429 val f0_vtagMatches = sizeMap(w => addrMatch(entries(w).vaddr, forward.vaddr)) 430 val f0_validTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid) 431 // f0 select 432 val f0_fwdMask = shiftMaskToHigh( 433 forward.vaddr, 434 Mux1H(f0_validTagMatches, f0_fwdMaskCandidates) 435 ).asTypeOf(Vec(VDataBytes, Bool())) 436 val f0_fwdData = shiftDataToHigh( 437 forward.vaddr, 438 Mux1H(f0_validTagMatches, f0_fwdDataCandidates) 439 ).asTypeOf(Vec(VDataBytes, UInt(8.W))) 440 441 // f1 paddr match 442 val f1_fwdMask = RegEnable(f0_fwdMask, f0_fwdValid) 443 val f1_fwdData = RegEnable(f0_fwdData, f0_fwdValid) 444 // forward.paddr from dtlb, which is far from uncache 445 val f1_ptagMatches = sizeMap(w => addrMatch(RegEnable(entries(w).addr, f0_fwdValid), RegEnable(forward.paddr, f0_fwdValid))) 446 f1_tagMismatchVec(i) := sizeMap(w => 447 RegEnable(f0_vtagMatches(w), f0_fwdValid) =/= f1_ptagMatches(w) && RegEnable(f0_validMask(w), f0_fwdValid) && f1_fwdValid 448 ).asUInt.orR 449 when(f1_tagMismatchVec(i)) { 450 XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n", 451 f1_ptagMatches.asUInt, 452 RegEnable(f0_vtagMatches.asUInt, f0_fwdValid), 453 RegEnable(forward.vaddr, f0_fwdValid), 454 RegEnable(forward.paddr, f0_fwdValid) 455 ) 456 } 457 // f1 output 458 forward.addrInvalid := false.B // addr in ubuffer is always ready 459 forward.dataInvalid := false.B // data in ubuffer is always ready 460 forward.matchInvalid := f1_tagMismatchVec(i) // paddr / vaddr cam result does not match 461 for (j <- 0 until VDataBytes) { 462 forward.forwardMaskFast(j) := f0_fwdMask(j) 463 464 forward.forwardData(j) := f1_fwdData(j) 465 forward.forwardMask(j) := false.B 466 when(f1_fwdMask(j) && f1_fwdValid) { 467 forward.forwardMask(j) := true.B 468 } 469 } 470 471 } 472 473 474 /****************************************************************** 475 * Debug / Performance 476 ******************************************************************/ 477 478 /* Debug Counters */ 479 // print all input/output requests for debug purpose 480 // print req/resp 481 XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n", 482 req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask) 483 XSDebug(resp.fire, "data: %x\n", req.bits.data) 484 // print tilelink messages 485 XSDebug(mem_acquire.valid, "mem_acquire valid, ready=%d ", mem_acquire.ready) 486 mem_acquire.bits.dump(mem_acquire.valid) 487 488 XSDebug(mem_grant.fire, "mem_grant fire ") 489 mem_grant.bits.dump(mem_grant.fire) 490 491 /* Performance Counters */ 492 XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 493 XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 494 XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 495 XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 496 XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire) 497 XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))) 498 XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec)) 499 500 val perfEvents = Seq( 501 ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 502 ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 503 ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 504 ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 505 ("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire), 506 ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))), 507 ("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec)) 508 ) 509 510 generatePerfEvent() 511 // End 512} 513