1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chisel3._ 20import chisel3.util._ 21import org.chipsalliance.cde.config.Parameters 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.mem._ 26import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes} 27import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters} 28 29class UncacheFlushBundle extends Bundle { 30 val valid = Output(Bool()) 31 val empty = Input(Bool()) 32} 33 34class UncacheEntry(implicit p: Parameters) extends DCacheBundle { 35 val cmd = UInt(M_SZ.W) 36 val addr = UInt(PAddrBits.W) 37 val vaddr = UInt(VAddrBits.W) 38 val data = UInt(XLEN.W) 39 val mask = UInt(DataBytes.W) 40 val id = UInt(uncacheIdxBits.W) 41 val nc = Bool() 42 val atomic = Bool() 43 44 val resp_nderr = Bool() 45 46 /* NOTE: if it support the internal forward logic, here can uncomment */ 47 // val fwd_data = UInt(XLEN.W) 48 // val fwd_mask = UInt(DataBytes.W) 49 50 def set(x: UncacheWordReq): Unit = { 51 cmd := x.cmd 52 addr := x.addr 53 vaddr := x.vaddr 54 data := x.data 55 mask := x.mask 56 id := x.id 57 nc := x.nc 58 atomic := x.atomic 59 resp_nderr := false.B 60 // fwd_data := 0.U 61 // fwd_mask := 0.U 62 } 63 64 def update(x: TLBundleD): Unit = { 65 when(cmd === MemoryOpConstants.M_XRD) { 66 data := x.data 67 } 68 resp_nderr := x.denied 69 } 70 71 // def update(forwardData: UInt, forwardMask: UInt): Unit = { 72 // fwd_data := forwardData 73 // fwd_mask := forwardMask 74 // } 75 76 def toUncacheWordResp(): UncacheWordResp = { 77 // val resp_fwd_data = VecInit((0 until DataBytes).map(j => 78 // Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j)) 79 // )).asUInt 80 val resp_fwd_data = data 81 val r = Wire(new UncacheWordResp) 82 r := DontCare 83 r.data := resp_fwd_data 84 r.id := id 85 r.nderr := resp_nderr 86 r.nc := nc 87 r.is2lq := cmd === MemoryOpConstants.M_XRD 88 r.miss := false.B 89 r.replay := false.B 90 r.tag_error := false.B 91 r.error := false.B 92 r 93 } 94} 95 96class UncacheEntryState(implicit p: Parameters) extends DCacheBundle { 97 // valid (-> waitSame) -> inflight -> waitReturn 98 val valid = Bool() 99 val inflight = Bool() // uncache -> L2 100 val waitSame = Bool() 101 val waitReturn = Bool() // uncache -> LSQ 102 103 def init: Unit = { 104 valid := false.B 105 inflight := false.B 106 waitSame := false.B 107 waitReturn := false.B 108 } 109 110 def isValid(): Bool = valid 111 def isInflight(): Bool = inflight 112 def isWaitReturn(): Bool = waitReturn 113 def isWaitSame(): Bool = waitSame 114 def can2Uncache(): Bool = valid && !inflight && !waitSame && !waitReturn 115 def can2Lsq(): Bool = valid && waitReturn 116 117 def setValid(x: Bool): Unit = { valid := x} 118 def setInflight(x: Bool): Unit = { inflight := x} 119 def setWaitReturn(x: Bool): Unit = { waitReturn := x } 120 def setWaitSame(x: Bool): Unit = { waitSame := x} 121 122 def updateUncacheResp(): Unit = { 123 assert(inflight, "The request was not sent and a response was received") 124 inflight := false.B 125 waitReturn := true.B 126 } 127 def updateReturn(): Unit = { 128 valid := false.B 129 inflight := false.B 130 waitSame := false.B 131 waitReturn := false.B 132 } 133} 134 135class UncacheIO(implicit p: Parameters) extends DCacheBundle { 136 val hartId = Input(UInt()) 137 val enableOutstanding = Input(Bool()) 138 val flush = Flipped(new UncacheFlushBundle) 139 val lsq = Flipped(new UncacheWordIO) 140 val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) 141} 142 143// convert DCacheIO to TileLink 144// for Now, we only deal with TL-UL 145 146class Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter { 147 override def shouldBeInlined: Boolean = false 148 def idRange: Int = UncacheBufferSize 149 150 val clientParameters = TLMasterPortParameters.v1( 151 clients = Seq(TLMasterParameters.v1( 152 "uncache", 153 sourceId = IdRange(0, idRange) 154 )) 155 ) 156 val clientNode = TLClientNode(Seq(clientParameters)) 157 158 lazy val module = new UncacheImp(this) 159} 160 161/* Uncache Buffer */ 162class UncacheImp(outer: Uncache)extends LazyModuleImp(outer) 163 with HasTLDump 164 with HasXSParameter 165 with HasPerfEvents 166{ 167 private val INDEX_WIDTH = log2Up(UncacheBufferSize) 168 println(s"Uncahe Buffer Size: $UncacheBufferSize entries") 169 val io = IO(new UncacheIO) 170 171 val (bus, edge) = outer.clientNode.out.head 172 173 val req = io.lsq.req 174 val resp = io.lsq.resp 175 val mem_acquire = bus.a 176 val mem_grant = bus.d 177 val req_ready = WireInit(false.B) 178 179 // assign default values to output signals 180 bus.b.ready := false.B 181 bus.c.valid := false.B 182 bus.c.bits := DontCare 183 bus.d.ready := false.B 184 bus.e.valid := false.B 185 bus.e.bits := DontCare 186 io.lsq.req.ready := req_ready 187 io.lsq.resp.valid := false.B 188 io.lsq.resp.bits := DontCare 189 190 191 /****************************************************************** 192 * Data Structure 193 ******************************************************************/ 194 195 val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry)) 196 val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState)))) 197 val fence = RegInit(Bool(), false.B) 198 val s_idle :: s_refill_req :: s_refill_resp :: s_send_resp :: Nil = Enum(4) 199 val uState = RegInit(s_idle) 200 201 def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f)) 202 def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR 203 def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR 204 205 // drain buffer 206 val empty = Wire(Bool()) 207 val f0_needDrain = Wire(Bool()) 208 val do_uarch_drain = RegNext(f0_needDrain) 209 210 val q0_entry = Wire(new UncacheEntry) 211 val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W)) 212 val q0_canSent = Wire(Bool()) 213 214 215 /****************************************************************** 216 * uState for non-outstanding 217 ******************************************************************/ 218 219 switch(uState){ 220 is(s_idle){ 221 when(req.fire){ 222 uState := s_refill_req 223 } 224 } 225 is(s_refill_req){ 226 when(mem_acquire.fire){ 227 uState := s_refill_resp 228 } 229 } 230 is(s_refill_resp){ 231 when(mem_grant.fire){ 232 uState := s_send_resp 233 } 234 } 235 is(s_send_resp){ 236 when(resp.fire){ 237 uState := s_idle 238 } 239 } 240 } 241 242 243 /****************************************************************** 244 * Enter Buffer 245 * Version 0 (better timing) 246 * e0 judge: alloc/merge write vec 247 * e1 alloc 248 * 249 * Version 1 (better performance) 250 * solved in one cycle for achieving the original performance. 251 ******************************************************************/ 252 253 /** 254 TODO lyq: how to merge 255 1. same addr 256 2. same cmd 257 3. valid 258 FIXME lyq: not merge now due to the following issues 259 1. load cann't be merged 260 2. how to merge store and response precisely 261 */ 262 263 val e0_fire = req.fire 264 val e0_req = req.bits 265 /** 266 TODO lyq: block or wait or forward? 267 NOW: strict block by same address; otherwise: exhaustive consideration is needed. 268 - ld->ld wait 269 - ld->st forward 270 - st->ld forward 271 - st->st block 272 */ 273 val e0_existSame = sizeMap(j => e0_req.addr === entries(j).addr && states(j).isValid()).asUInt.orR 274 val e0_invalidVec = sizeMap(i => !states(i).isValid()) 275 val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec) 276 val e0_alloc = e0_canAlloc && !e0_existSame && e0_fire 277 req_ready := e0_invalidVec.asUInt.orR && !e0_existSame && !do_uarch_drain 278 279 when (e0_alloc) { 280 entries(e0_allocIdx).set(e0_req) 281 states(e0_allocIdx).setValid(true.B) 282 283 // judge whether wait same block: e0 & q0 284 val waitSameVec = sizeMap(j => 285 e0_req.addr === entries(j).addr && states(j).isValid() && states(j).isInflight() 286 ) 287 val waitQ0 = e0_req.addr === q0_entry.addr && q0_canSent 288 when (waitSameVec.reduce(_ || _) || waitQ0) { 289 states(e0_allocIdx).setWaitSame(true.B) 290 } 291 292 } 293 294 295 /****************************************************************** 296 * Uncache Req 297 * Version 0 (better timing) 298 * q0: choose which one is sent 299 * q0: sent 300 * 301 * Version 1 (better performance) 302 * solved in one cycle for achieving the original performance. 303 * NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline, 304 * because there is no guarantee that mem_aquire will be always ready. 305 ******************************************************************/ 306 307 val q0_canSentVec = sizeMap(i => 308 (io.enableOutstanding || uState === s_refill_req) && 309 states(i).can2Uncache() 310 ) 311 val q0_res = PriorityEncoderWithFlag(q0_canSentVec) 312 q0_canSentIdx := q0_res._1 313 q0_canSent := q0_res._2 314 q0_entry := entries(q0_canSentIdx) 315 316 val size = PopCount(q0_entry.mask) 317 val (lgSize, legal) = PriorityMuxWithFlag(Seq( 318 1.U -> 0.U, 319 2.U -> 1.U, 320 4.U -> 2.U, 321 8.U -> 3.U 322 ).map(m => (size===m._1) -> m._2)) 323 assert(!(q0_canSent && !legal)) 324 325 val q0_load = edge.Get( 326 fromSource = q0_canSentIdx, 327 toAddress = q0_entry.addr, 328 lgSize = lgSize 329 )._2 330 331 val q0_store = edge.Put( 332 fromSource = q0_canSentIdx, 333 toAddress = q0_entry.addr, 334 lgSize = lgSize, 335 data = q0_entry.data, 336 mask = q0_entry.mask 337 )._2 338 339 val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR 340 341 mem_acquire.valid := q0_canSent 342 mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load) 343 when(mem_acquire.fire){ 344 states(q0_canSentIdx).setInflight(true.B) 345 346 // q0 should judge whether wait same block 347 (0 until UncacheBufferSize).map(j => 348 when(q0_entry.addr === entries(j).addr && states(j).isValid() && !states(j).isWaitReturn()){ 349 states(j).setWaitSame(true.B) 350 } 351 ) 352 } 353 354 355 /****************************************************************** 356 * Uncache Resp 357 ******************************************************************/ 358 359 val (_, _, refill_done, _) = edge.addr_inc(mem_grant) 360 361 mem_grant.ready := true.B 362 when (mem_grant.fire) { 363 val id = mem_grant.bits.source 364 entries(id).update(mem_grant.bits) 365 states(id).updateUncacheResp() 366 assert(refill_done, "Uncache response should be one beat only!") 367 368 // remove state of wait same block 369 (0 until UncacheBufferSize).map(j => 370 when(entries(id).addr === entries(j).addr && states(j).isValid() && states(j).isWaitSame()){ 371 states(j).setWaitSame(false.B) 372 } 373 ) 374 } 375 376 377 /****************************************************************** 378 * Return to LSQ 379 ******************************************************************/ 380 381 val r0_canSentVec = sizeMap(i => states(i).can2Lsq()) 382 val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec) 383 resp.valid := r0_canSent 384 resp.bits := entries(r0_canSentIdx).toUncacheWordResp() 385 when(resp.fire){ 386 states(r0_canSentIdx).updateReturn() 387 } 388 389 390 /****************************************************************** 391 * Buffer Flush 392 * 1. when io.flush.valid is true: drain store queue and ubuffer 393 * 2. when io.lsq.req.bits.atomic is true: not support temporarily 394 ******************************************************************/ 395 empty := !VecInit(states.map(_.isValid())).asUInt.orR 396 io.flush.empty := empty 397 398 399 /****************************************************************** 400 * Load Data Forward 401 * 402 * 0. ld in ldu pipeline 403 * f0: tag match, fast resp 404 * f1: data resp 405 * 406 * 1. ld in buffer (in "Enter Buffer") 407 * ld(en) -> st(in): ld entry.update, state.updateUncacheResp 408 * st(en) -> ld(in): ld entry.update, state.updateUncacheResp 409 * NOW: strict block by same address; there is no such forward. 410 * 411 ******************************************************************/ 412 413 val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid()) 414 val f0_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool())) 415 f0_needDrain := f0_tagMismatchVec.asUInt.orR && !empty 416 417 for ((forward, i) <- io.forward.zipWithIndex) { 418 val f0_vtagMatches = sizeMap(w => entries(w).vaddr === forward.vaddr) 419 val f0_ptagMatches = sizeMap(w => entries(w).addr === forward.paddr) 420 f0_tagMismatchVec(i) := forward.valid && sizeMap(w => 421 f0_vtagMatches(w) =/= f0_ptagMatches(w) && f0_validMask(w) 422 ).asUInt.orR 423 when (f0_tagMismatchVec(i)) { 424 XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n", 425 RegNext(f0_ptagMatches.asUInt), 426 RegNext(f0_vtagMatches.asUInt), 427 RegNext(forward.vaddr), 428 RegNext(forward.paddr) 429 ) 430 } 431 432 val f0_validTagMatches = sizeMap(w => f0_ptagMatches(w) && f0_validMask(w) && forward.valid) 433 434 val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask)) 435 val f0_fwdDataCandidates = VecInit(entries.map(e => e.data)) 436 val f0_fwdMask = shiftMaskToHigh( 437 forward.paddr, 438 Mux1H(f0_validTagMatches, f0_fwdMaskCandidates) 439 ).asTypeOf(Vec(VDataBytes, Bool())) 440 val f0_fwdData = shiftDataToHigh( 441 forward.paddr, 442 Mux1H(f0_validTagMatches, f0_fwdDataCandidates) 443 ).asTypeOf(Vec(VDataBytes, UInt(8.W))) 444 445 val f1_fwdValid = RegNext(forward.valid) 446 val f1_fwdMask = RegEnable(f0_fwdMask, forward.valid) 447 val f1_fwdData = RegEnable(f0_fwdData, forward.valid) 448 449 forward.addrInvalid := false.B // addr in ubuffer is always ready 450 forward.dataInvalid := false.B // data in ubuffer is always ready 451 forward.matchInvalid := f0_tagMismatchVec(i) // paddr / vaddr cam result does not match 452 for (j <- 0 until VDataBytes) { 453 forward.forwardMaskFast(j) := f0_fwdMask(j) 454 455 forward.forwardMask(j) := false.B 456 forward.forwardData(j) := DontCare 457 when(f1_fwdMask(j) && f1_fwdValid) { 458 forward.forwardMask(j) := true.B 459 forward.forwardData(j) := f1_fwdData(j) 460 } 461 } 462 463 } 464 465 466 /****************************************************************** 467 * Debug / Performance 468 ******************************************************************/ 469 470 /* Debug Counters */ 471 // print all input/output requests for debug purpose 472 // print req/resp 473 XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n", 474 req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask) 475 XSDebug(resp.fire, "data: %x\n", req.bits.data) 476 // print tilelink messages 477 when(mem_acquire.valid){ 478 XSDebug("mem_acquire valid, ready=%d ", mem_acquire.ready) 479 mem_acquire.bits.dump 480 } 481 when (mem_grant.fire) { 482 XSDebug("mem_grant fire ") 483 mem_grant.bits.dump 484 } 485 486 /* Performance Counters */ 487 XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 488 XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 489 XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 490 XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 491 XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire) 492 XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))) 493 XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f0_tagMismatchVec)) 494 495 val perfEvents = Seq( 496 ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 497 ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 498 ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 499 ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 500 ("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire), 501 ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))), 502 ("forward_vaddr_match_failed", PopCount(f0_tagMismatchVec)) 503 ) 504 505 generatePerfEvent() 506 // End 507} 508