1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chisel3._ 20import chisel3.util._ 21import org.chipsalliance.cde.config.Parameters 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.mem._ 26import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes} 27import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters} 28 29class UncacheFlushBundle extends Bundle { 30 val valid = Output(Bool()) 31 val empty = Input(Bool()) 32} 33 34class UncacheEntry(implicit p: Parameters) extends DCacheBundle { 35 val cmd = UInt(M_SZ.W) 36 val addr = UInt(PAddrBits.W) 37 val vaddr = UInt(VAddrBits.W) 38 val data = UInt(XLEN.W) 39 val mask = UInt(DataBytes.W) 40 val id = UInt(uncacheIdxBits.W) 41 val nc = Bool() 42 val atomic = Bool() 43 44 // FIXME lyq: data and resp_data can be merged? 45 val resp_data = UInt(XLEN.W) 46 val resp_nderr = Bool() 47 48 // FIXME lyq: Confirm the forward logic. if no forward, it can be removed 49 val fwd_data = UInt(XLEN.W) 50 val fwd_mask = UInt(DataBytes.W) 51 52 def set(x: UncacheWordReq): Unit = { 53 cmd := x.cmd 54 addr := x.addr 55 vaddr := x.vaddr 56 data := x.data 57 mask := x.mask 58 id := x.id 59 nc := x.nc 60 atomic := x.atomic 61 resp_nderr := false.B 62 resp_data := 0.U 63 fwd_data := 0.U 64 fwd_mask := 0.U 65 } 66 67 def update(x: TLBundleD): Unit = { 68 resp_data := x.data 69 resp_nderr := x.denied 70 } 71 72 def update(forwardData: UInt, forwardMask: UInt): Unit = { 73 fwd_data := forwardData 74 fwd_mask := forwardMask 75 } 76 77 def toUncacheWordResp(): UncacheWordResp = { 78 val resp_fwd_data = VecInit((0 until DataBytes).map(j => 79 Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), resp_data(8*(j+1)-1, 8*j)) 80 )).asUInt 81 val r = Wire(new UncacheWordResp) 82 r := DontCare 83 r.data := resp_fwd_data 84 r.id := id 85 r.nderr := resp_nderr 86 r.nc := nc 87 r.is2lq := cmd === MemoryOpConstants.M_XRD 88 r.miss := false.B 89 r.replay := false.B 90 r.tag_error := false.B 91 r.error := false.B 92 r 93 } 94} 95 96class UncacheEntryState(implicit p: Parameters) extends DCacheBundle { 97 // FIXME lyq: state is multi bools or UInt()? 98 // valid (-> waitSame) -> inflight -> waitReturn 99 val valid = Bool() 100 val inflight = Bool() // uncache -> L2 101 val waitSame = Bool() 102 val waitReturn = Bool() // uncache -> LSQ 103 104 def init: Unit = { 105 valid := false.B 106 inflight := false.B 107 waitSame := false.B 108 waitReturn := false.B 109 } 110 111 def isValid(): Bool = valid 112 def isInflight(): Bool = inflight 113 def isWaitReturn(): Bool = waitReturn 114 def isWaitSame(): Bool = waitSame 115 def can2Uncache(): Bool = valid && !inflight && !waitSame && !waitReturn 116 def can2Lsq(): Bool = valid && waitReturn 117 118 def setValid(x: Bool): Unit = { valid := x} 119 def setInflight(x: Bool): Unit = { inflight := x} 120 def setWaitReturn(x: Bool): Unit = { waitReturn := x } 121 def setWaitSame(x: Bool): Unit = { waitSame := x} 122 123 def updateUncacheResp(): Unit = { 124 assert(inflight, "The request was not sent and a response was received") 125 inflight := false.B 126 waitReturn := true.B 127 } 128 def updateReturn(): Unit = { 129 valid := false.B 130 inflight := false.B 131 waitSame := false.B 132 waitReturn := false.B 133 } 134} 135 136class UncacheIO(implicit p: Parameters) extends DCacheBundle { 137 val hartId = Input(UInt()) 138 val enableOutstanding = Input(Bool()) 139 val flush = Flipped(new UncacheFlushBundle) 140 val lsq = Flipped(new UncacheWordIO) 141 val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) 142} 143 144// convert DCacheIO to TileLink 145// for Now, we only deal with TL-UL 146 147class Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter { 148 override def shouldBeInlined: Boolean = false 149 def idRange: Int = UncacheBufferSize 150 151 val clientParameters = TLMasterPortParameters.v1( 152 clients = Seq(TLMasterParameters.v1( 153 "uncache", 154 sourceId = IdRange(0, idRange) 155 )) 156 ) 157 val clientNode = TLClientNode(Seq(clientParameters)) 158 159 lazy val module = new UncacheImp(this) 160} 161 162/* Uncache Buffer */ 163class UncacheImp(outer: Uncache)extends LazyModuleImp(outer) 164 with HasTLDump 165 with HasXSParameter 166 with HasPerfEvents 167{ 168 private val INDEX_WIDTH = log2Up(UncacheBufferSize) 169 println(s"Uncahe Buffer Size: $UncacheBufferSize entries") 170 val io = IO(new UncacheIO) 171 172 val (bus, edge) = outer.clientNode.out.head 173 174 val req = io.lsq.req 175 val resp = io.lsq.resp 176 val mem_acquire = bus.a 177 val mem_grant = bus.d 178 val req_ready = WireInit(false.B) 179 180 // assign default values to output signals 181 bus.b.ready := false.B 182 bus.c.valid := false.B 183 bus.c.bits := DontCare 184 bus.d.ready := false.B 185 bus.e.valid := false.B 186 bus.e.bits := DontCare 187 io.lsq.req.ready := req_ready 188 io.lsq.resp.valid := false.B 189 io.lsq.resp.bits := DontCare 190 191 192 /****************************************************************** 193 * Data Structure 194 ******************************************************************/ 195 196 val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry)) 197 val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState)))) 198 val fence = RegInit(Bool(), false.B) 199 val s_idle :: s_refill_req :: s_refill_resp :: s_send_resp :: Nil = Enum(4) 200 val uState = RegInit(s_idle) 201 202 def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f)) 203 def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR 204 def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR 205 206 // drain buffer 207 val empty = Wire(Bool()) 208 val f0_needDrain = Wire(Bool()) 209 val do_uarch_drain = RegNext(f0_needDrain) 210 211 val q0_entry = Wire(new UncacheEntry) 212 val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W)) 213 val q0_canSent = Wire(Bool()) 214 215 216 /****************************************************************** 217 * uState for non-outstanding 218 ******************************************************************/ 219 220 switch(uState){ 221 is(s_idle){ 222 when(req.fire){ 223 uState := s_refill_req 224 } 225 } 226 is(s_refill_req){ 227 when(mem_acquire.fire){ 228 uState := s_refill_resp 229 } 230 } 231 is(s_refill_resp){ 232 when(mem_grant.fire){ 233 uState := s_send_resp 234 } 235 } 236 is(s_send_resp){ 237 when(resp.fire){ 238 uState := s_idle 239 } 240 } 241 } 242 243 244 /****************************************************************** 245 * Enter Buffer 246 * Version 0 (better timing) 247 * e0 judge: alloc/merge write vec 248 * e1 alloc 249 * 250 * Version 1 (better performance) 251 * solved in one cycle for achieving the original performance. 252 ******************************************************************/ 253 254 /** 255 TODO lyq: how to merge 256 1. same addr 257 2. same cmd 258 3. valid 259 FIXME lyq: not merge now due to the following issues 260 1. load cann't be merged 261 2. how to merge store and response precisely 262 */ 263 264 val e0_fire = req.fire 265 val e0_req = req.bits 266 /** 267 TODO lyq: prohibit or wait or forward? 268 NOW: strict block by same address; otherwise: exhaustive consideration is needed. 269 - ld->ld wait 270 - ld->st forward 271 - st->ld forward 272 - st->st block 273 */ 274 val e0_existSameVec = sizeMap(j => 275 e0_req.addr === entries(j).addr && states(j).isValid() 276 ) 277 val e0_invalidVec = sizeMap(i => !states(i).isValid() && !e0_existSameVec(i)) 278 val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec) 279 val e0_alloc = e0_canAlloc && e0_fire 280 req_ready := e0_invalidVec.asUInt.orR && !do_uarch_drain 281 282 when (e0_alloc) { 283 entries(e0_allocIdx).set(e0_req) 284 states(e0_allocIdx).setValid(true.B) 285 286 // judge whether wait same block: e0 & q0 287 val waitSameVec = sizeMap(j => 288 e0_req.addr === entries(j).addr && states(j).isValid() && states(j).isInflight() 289 ) 290 val waitQ0 = e0_req.addr === q0_entry.addr && q0_canSent 291 when (waitSameVec.reduce(_ || _) || waitQ0) { 292 states(e0_allocIdx).setWaitSame(true.B) 293 } 294 295 } 296 297 298 /****************************************************************** 299 * Uncache Req 300 * Version 0 (better timing) 301 * q0: choose which one is sent 302 * q0: sent 303 * 304 * Version 1 (better performance) 305 * solved in one cycle for achieving the original performance. 306 * NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline, 307 * because there is no guarantee that mem_aquire will be always ready. 308 ******************************************************************/ 309 310 val q0_canSentVec = sizeMap(i => 311 (io.enableOutstanding || uState === s_refill_req) && 312 states(i).can2Uncache() 313 ) 314 val q0_res = PriorityEncoderWithFlag(q0_canSentVec) 315 q0_canSentIdx := q0_res._1 316 q0_canSent := q0_res._2 317 q0_entry := entries(q0_canSentIdx) 318 319 val size = PopCount(q0_entry.mask) 320 val (lgSize, legal) = PriorityMuxWithFlag(Seq( 321 1.U -> 0.U, 322 2.U -> 1.U, 323 4.U -> 2.U, 324 8.U -> 3.U 325 ).map(m => (size===m._1) -> m._2)) 326 assert(!(q0_canSent && !legal)) 327 328 val q0_load = edge.Get( 329 fromSource = q0_canSentIdx, 330 toAddress = q0_entry.addr, 331 lgSize = lgSize 332 )._2 333 334 val q0_store = edge.Put( 335 fromSource = q0_canSentIdx, 336 toAddress = q0_entry.addr, 337 lgSize = lgSize, 338 data = q0_entry.data, 339 mask = q0_entry.mask 340 )._2 341 342 val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR 343 344 mem_acquire.valid := q0_canSent 345 mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load) 346 when(mem_acquire.fire){ 347 states(q0_canSentIdx).setInflight(true.B) 348 349 // q0 should judge whether wait same block 350 (0 until UncacheBufferSize).map(j => 351 when(q0_entry.addr === entries(j).addr && states(j).isValid() && !states(j).isWaitReturn()){ 352 states(j).setWaitSame(true.B) 353 } 354 ) 355 } 356 357 358 /****************************************************************** 359 * Uncache Resp 360 ******************************************************************/ 361 362 val (_, _, refill_done, _) = edge.addr_inc(mem_grant) 363 364 mem_grant.ready := true.B 365 when (mem_grant.fire) { 366 val id = mem_grant.bits.source 367 entries(id).update(mem_grant.bits) 368 states(id).updateUncacheResp() 369 assert(refill_done, "Uncache response should be one beat only!") 370 371 // remove state of wait same block 372 (0 until UncacheBufferSize).map(j => 373 when(entries(id).addr === entries(j).addr && states(j).isValid() && states(j).isWaitSame()){ 374 states(j).setWaitSame(false.B) 375 } 376 ) 377 } 378 379 380 /****************************************************************** 381 * Return to LSQ 382 ******************************************************************/ 383 384 val r0_canSentVec = sizeMap(i => states(i).can2Lsq()) 385 val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec) 386 resp.valid := r0_canSent 387 resp.bits := entries(r0_canSentIdx).toUncacheWordResp() 388 when(resp.fire){ 389 states(r0_canSentIdx).updateReturn() 390 } 391 392 393 /****************************************************************** 394 * Buffer Flush 395 * // FIXME lyq: how to deal 396 * 1. when io.flush.valid is true 397 * 2. when io.lsq.req.bits.atomic is true 398 ******************************************************************/ 399 empty := !VecInit(states.map(_.isValid())).asUInt.orR 400 io.flush.empty := empty 401 402 403 /****************************************************************** 404 * Load Data Forward 405 * 406 * 0. ld in ldu pipeline 407 * f0: tag match, fast resp 408 * f1: data resp 409 * 410 * 1. ld in buffer (in "Enter Buffer") 411 * ld(en) -> st(in): ld entry.update, state.updateUncacheResp 412 * st(en) -> ld(in): ld entry.update, state.updateUncacheResp 413 * NOW: strict block by same address; there is no such forward. 414 * 415 ******************************************************************/ 416 417 val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid()) 418 val f0_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool())) 419 f0_needDrain := f0_tagMismatchVec.asUInt.orR && !empty 420 421 for ((forward, i) <- io.forward.zipWithIndex) { 422 val f0_vtagMatches = sizeMap(w => entries(w).vaddr === forward.vaddr) 423 val f0_ptagMatches = sizeMap(w => entries(w).addr === forward.paddr) 424 f0_tagMismatchVec(i) := forward.valid && sizeMap(w => 425 f0_vtagMatches(w) =/= f0_ptagMatches(w) && f0_validMask(w) 426 ).asUInt.orR 427 when (f0_tagMismatchVec(i)) { 428 XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n", 429 RegNext(f0_ptagMatches.asUInt), 430 RegNext(f0_vtagMatches.asUInt), 431 RegNext(forward.vaddr), 432 RegNext(forward.paddr) 433 ) 434 } 435 436 val f0_validTagMatches = sizeMap(w => f0_ptagMatches(w) && f0_validMask(w) && forward.valid) 437 438 val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask)) 439 val f0_fwdDataCandidates = VecInit(entries.map(e => e.data)) 440 val f0_fwdMask = shiftMaskToHigh( 441 forward.paddr, 442 Mux1H(f0_validTagMatches, f0_fwdMaskCandidates) 443 ).asTypeOf(Vec(VDataBytes, Bool())) 444 val f0_fwdData = shiftDataToHigh( 445 forward.paddr, 446 Mux1H(f0_validTagMatches, f0_fwdDataCandidates) 447 ).asTypeOf(Vec(VDataBytes, UInt(8.W))) 448 449 val f1_fwdValid = RegNext(forward.valid) 450 val f1_fwdMask = RegEnable(f0_fwdMask, forward.valid) 451 val f1_fwdData = RegEnable(f0_fwdData, forward.valid) 452 453 forward.addrInvalid := false.B // addr in ubuffer is always ready 454 forward.dataInvalid := false.B // data in ubuffer is always ready 455 forward.matchInvalid := f0_tagMismatchVec(i) // paddr / vaddr cam result does not match 456 for (j <- 0 until VDataBytes) { 457 forward.forwardMaskFast(j) := f0_fwdMask(j) 458 459 forward.forwardMask(j) := false.B 460 forward.forwardData(j) := DontCare 461 when(f1_fwdMask(j) && f1_fwdValid) { 462 forward.forwardMask(j) := true.B 463 forward.forwardData(j) := f1_fwdData(j) 464 } 465 } 466 467 } 468 469 470 /****************************************************************** 471 * Debug / Performance 472 ******************************************************************/ 473 474 /* Debug Counters */ 475 // print all input/output requests for debug purpose 476 // print req/resp 477 XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n", 478 req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask) 479 XSDebug(resp.fire, "data: %x\n", req.bits.data) 480 // print tilelink messages 481 when(mem_acquire.valid){ 482 XSDebug("mem_acquire valid, ready=%d ", mem_acquire.ready) 483 mem_acquire.bits.dump 484 } 485 when (mem_grant.fire) { 486 XSDebug("mem_grant fire ") 487 mem_grant.bits.dump 488 } 489 490 /* Performance Counters */ 491 XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 492 XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 493 XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 494 XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 495 XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire) 496 XSPerfAccumulate("vaddr_match_failed", PopCount(f0_tagMismatchVec)) 497 498 val perfEvents = Seq( 499 ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 500 ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 501 ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 502 ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 503 ("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire) 504 ) 505 506 generatePerfEvent() 507 // End 508} 509