1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.tilelink.ClientStates._ 23import freechips.rocketchip.tilelink.MemoryOpCategories._ 24import freechips.rocketchip.tilelink.TLPermissions._ 25import freechips.rocketchip.tilelink.{ClientMetadata, ClientStates, TLPermissions} 26import utils._ 27import xiangshan.L1CacheErrorInfo 28 29class MainPipeReq(implicit p: Parameters) extends DCacheBundle { 30 val miss = Bool() // only amo miss will refill in main pipe 31 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 32 val miss_param = UInt(TLPermissions.bdWidth.W) 33 val miss_dirty = Bool() 34 val miss_way_en = UInt(DCacheWays.W) 35 36 val probe = Bool() 37 val probe_param = UInt(TLPermissions.bdWidth.W) 38 val probe_need_data = Bool() 39 40 // request info 41 // reqs from Store, AMO use this 42 // probe does not use this 43 val source = UInt(sourceTypeWidth.W) 44 val cmd = UInt(M_SZ.W) 45 // if dcache size > 32KB, vaddr is also needed for store 46 // vaddr is used to get extra index bits 47 val vaddr = UInt(VAddrBits.W) 48 // must be aligned to block 49 val addr = UInt(PAddrBits.W) 50 51 // store 52 val store_data = UInt((cfg.blockBytes * 8).W) 53 val store_mask = UInt(cfg.blockBytes.W) 54 55 // which word does amo work on? 56 val word_idx = UInt(log2Up(cfg.blockBytes * 8 / DataBits).W) 57 val amo_data = UInt(DataBits.W) 58 val amo_mask = UInt((DataBits / 8).W) 59 60 // error 61 val error = Bool() 62 63 // replace 64 val replace = Bool() 65 val replace_way_en = UInt(DCacheWays.W) 66 67 val id = UInt(reqIdWidth.W) 68 69 def isLoad: Bool = source === LOAD_SOURCE.U 70 def isStore: Bool = source === STORE_SOURCE.U 71 def isAMO: Bool = source === AMO_SOURCE.U 72 73 def convertStoreReq(store: DCacheLineReq): MainPipeReq = { 74 val req = Wire(new MainPipeReq) 75 req := DontCare 76 req.miss := false.B 77 req.miss_dirty := false.B 78 req.probe := false.B 79 req.probe_need_data := false.B 80 req.source := STORE_SOURCE.U 81 req.cmd := store.cmd 82 req.addr := store.addr 83 req.vaddr := store.vaddr 84 req.store_data := store.data 85 req.store_mask := store.mask 86 req.replace := false.B 87 req.error := false.B 88 req.id := store.id 89 req 90 } 91} 92 93class MainPipe(implicit p: Parameters) extends DCacheModule with HasPerfEvents { 94 val io = IO(new Bundle() { 95 // probe queue 96 val probe_req = Flipped(DecoupledIO(new MainPipeReq)) 97 // store miss go to miss queue 98 val miss_req = DecoupledIO(new MissReq) 99 // store buffer 100 val store_req = Flipped(DecoupledIO(new DCacheLineReq)) 101 val store_replay_resp = ValidIO(new DCacheLineResp) 102 val store_hit_resp = ValidIO(new DCacheLineResp) 103 val release_update = ValidIO(new ReleaseUpdate) 104 // atmoics 105 val atomic_req = Flipped(DecoupledIO(new MainPipeReq)) 106 val atomic_resp = ValidIO(new AtomicsResp) 107 // replace 108 val replace_req = Flipped(DecoupledIO(new MainPipeReq)) 109 val replace_resp = ValidIO(UInt(log2Up(cfg.nMissEntries).W)) 110 // write-back queue 111 val wb = DecoupledIO(new WritebackReq) 112 113 val data_read_intend = Output(Bool()) 114 val data_read = DecoupledIO(new L1BankedDataReadLineReq) 115 val data_resp = Input(Vec(DCacheBanks, new L1BankedDataReadResult())) 116 val readline_error_delayed = Input(Bool()) 117 val data_write = DecoupledIO(new L1BankedDataWriteReq) 118 119 val meta_read = DecoupledIO(new MetaReadReq) 120 val meta_resp = Input(Vec(nWays, new Meta)) 121 val meta_write = DecoupledIO(new MetaWriteReq) 122 val error_flag_resp = Input(Vec(nWays, Bool())) 123 val error_flag_write = DecoupledIO(new ErrorWriteReq) 124 125 val tag_read = DecoupledIO(new TagReadReq) 126 val tag_resp = Input(Vec(nWays, UInt(encTagBits.W))) 127 val tag_write = DecoupledIO(new TagWriteReq) 128 val tag_write_intend = Output(new Bool()) 129 130 // update state vec in replacement algo 131 val replace_access = ValidIO(new ReplacementAccessBundle) 132 // find the way to be replaced 133 val replace_way = new ReplacementWayReqIO 134 135 val status = new Bundle() { 136 val s0_set = ValidIO(UInt(idxBits.W)) 137 val s1, s2, s3 = ValidIO(new Bundle() { 138 val set = UInt(idxBits.W) 139 val way_en = UInt(nWays.W) 140 }) 141 } 142 143 // lrsc locked block should block probe 144 val lrsc_locked_block = Output(Valid(UInt(PAddrBits.W))) 145 val invalid_resv_set = Input(Bool()) 146 val update_resv_set = Output(Bool()) 147 val block_lr = Output(Bool()) 148 149 // ecc error 150 val error = Output(new L1CacheErrorInfo()) 151 }) 152 153 // meta array is made of regs, so meta write or read should always be ready 154 assert(RegNext(io.meta_read.ready)) 155 assert(RegNext(io.meta_write.ready)) 156 157 val s1_s0_set_conflict, s2_s0_set_conlict, s3_s0_set_conflict = Wire(Bool()) 158 val set_conflict = s1_s0_set_conflict || s2_s0_set_conlict || s3_s0_set_conflict 159 // check sbuffer store req set_conflict in parallel with req arbiter 160 // it will speed up the generation of store_req.ready, which is in crit. path 161 val s1_s0_set_conflict_store, s2_s0_set_conlict_store, s3_s0_set_conflict_store = Wire(Bool()) 162 val store_set_conflict = s1_s0_set_conflict_store || s2_s0_set_conlict_store || s3_s0_set_conflict_store 163 val s1_ready, s2_ready, s3_ready = Wire(Bool()) 164 165 // convert store req to main pipe req, and select a req from store and probe 166 val store_req = Wire(DecoupledIO(new MainPipeReq)) 167 store_req.bits := (new MainPipeReq).convertStoreReq(io.store_req.bits) 168 store_req.valid := io.store_req.valid 169 io.store_req.ready := store_req.ready 170 171 // s0: read meta and tag 172 val req = Wire(DecoupledIO(new MainPipeReq)) 173 arbiter( 174 in = Seq( 175 io.probe_req, 176 io.replace_req, 177 store_req, // Note: store_req.ready is now manually assigned for better timing 178 io.atomic_req 179 ), 180 out = req, 181 name = Some("main_pipe_req") 182 ) 183 184 val store_idx = get_idx(io.store_req.bits.vaddr) 185 // manually assign store_req.ready for better timing 186 // now store_req set conflict check is done in parallel with req arbiter 187 store_req.ready := io.meta_read.ready && io.tag_read.ready && s1_ready && !store_set_conflict && 188 !io.probe_req.valid && !io.replace_req.valid 189 val s0_req = req.bits 190 val s0_idx = get_idx(s0_req.vaddr) 191 val s0_need_tag = io.tag_read.valid 192 val s0_can_go = io.meta_read.ready && io.tag_read.ready && s1_ready && !set_conflict 193 val s0_fire = req.valid && s0_can_go 194 195 val bank_write = VecInit((0 until DCacheBanks).map(i => get_mask_of_bank(i, s0_req.store_mask).orR)).asUInt 196 val bank_full_write = VecInit((0 until DCacheBanks).map(i => get_mask_of_bank(i, s0_req.store_mask).andR)).asUInt 197 val banks_full_overwrite = bank_full_write.andR 198 199 val banked_store_rmask = bank_write & ~bank_full_write 200 val banked_full_rmask = ~0.U(DCacheBanks.W) 201 val banked_none_rmask = 0.U(DCacheBanks.W) 202 203 val store_need_data = !s0_req.probe && s0_req.isStore && banked_store_rmask.orR 204 val probe_need_data = s0_req.probe 205 val amo_need_data = !s0_req.probe && s0_req.isAMO 206 val miss_need_data = s0_req.miss 207 val replace_need_data = s0_req.replace 208 209 val banked_need_data = store_need_data || probe_need_data || amo_need_data || miss_need_data || replace_need_data 210 211 val s0_banked_rmask = Mux(store_need_data, banked_store_rmask, 212 Mux(probe_need_data || amo_need_data || miss_need_data || replace_need_data, 213 banked_full_rmask, 214 banked_none_rmask 215 )) 216 217 // generate wmask here and use it in stage 2 218 val banked_store_wmask = bank_write 219 val banked_full_wmask = ~0.U(DCacheBanks.W) 220 val banked_none_wmask = 0.U(DCacheBanks.W) 221 222 // s1: read data 223 val s1_valid = RegInit(false.B) 224 val s1_need_data = RegEnable(banked_need_data, s0_fire) 225 val s1_req = RegEnable(s0_req, s0_fire) 226 val s1_banked_rmask = RegEnable(s0_banked_rmask, s0_fire) 227 val s1_banked_store_wmask = RegEnable(banked_store_wmask, s0_fire) 228 val s1_need_tag = RegEnable(s0_need_tag, s0_fire) 229 val s1_can_go = s2_ready && (io.data_read.ready || !s1_need_data) 230 val s1_fire = s1_valid && s1_can_go 231 val s1_idx = get_idx(s1_req.vaddr) 232 233 // duplicate regs to reduce fanout 234 val s1_valid_dup = RegInit(VecInit(Seq.fill(6)(false.B))) 235 val s1_req_vaddr_dup_for_data_read = RegEnable(s0_req.vaddr, s0_fire) 236 val s1_idx_dup_for_replace_way = RegEnable(get_idx(s0_req.vaddr), s0_fire) 237 val s1_idx_dup_for_status = RegEnable(get_idx(s0_req.vaddr), s0_fire) 238 239 when (s0_fire) { 240 s1_valid := true.B 241 s1_valid_dup.foreach(_ := true.B) 242 }.elsewhen (s1_fire) { 243 s1_valid := false.B 244 s1_valid_dup.foreach(_ := false.B) 245 } 246 s1_ready := !s1_valid_dup(0) || s1_can_go 247 s1_s0_set_conflict := s1_valid_dup(1) && s0_idx === s1_idx 248 s1_s0_set_conflict_store := s1_valid_dup(2) && store_idx === s1_idx 249 250 val meta_resp = Wire(Vec(nWays, (new Meta).asUInt())) 251 val tag_resp = Wire(Vec(nWays, UInt(tagBits.W))) 252 val ecc_resp = Wire(Vec(nWays, UInt(eccTagBits.W))) 253 meta_resp := Mux(RegNext(s0_fire), VecInit(io.meta_resp.map(_.asUInt)), RegNext(meta_resp)) 254 tag_resp := Mux(RegNext(s0_fire), VecInit(io.tag_resp.map(r => r(tagBits - 1, 0))), RegNext(tag_resp)) 255 ecc_resp := Mux(RegNext(s0_fire), VecInit(io.tag_resp.map(r => r(encTagBits - 1, tagBits))), RegNext(ecc_resp)) 256 val enc_tag_resp = Wire(io.tag_resp.cloneType) 257 enc_tag_resp := Mux(RegNext(s0_fire), io.tag_resp, RegNext(enc_tag_resp)) 258 259 def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f)) 260 val s1_tag_eq_way = wayMap((w: Int) => tag_resp(w) === get_tag(s1_req.addr)).asUInt 261 val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && Meta(meta_resp(w)).coh.isValid()).asUInt 262 val s1_tag_match = s1_tag_match_way.orR 263 264 val s1_hit_tag = Mux(s1_tag_match, Mux1H(s1_tag_match_way, wayMap(w => tag_resp(w))), get_tag(s1_req.addr)) 265 val s1_hit_coh = ClientMetadata(Mux(s1_tag_match, Mux1H(s1_tag_match_way, wayMap(w => meta_resp(w))), 0.U)) 266 val s1_encTag = Mux1H(s1_tag_match_way, wayMap((w: Int) => enc_tag_resp(w))) 267 val s1_flag_error = Mux(s1_tag_match, Mux1H(s1_tag_match_way, wayMap(w => io.error_flag_resp(w))), false.B) 268 val s1_l2_error = s1_req.error 269 270 // replacement policy 271 val s1_repl_way_en = WireInit(0.U(nWays.W)) 272 s1_repl_way_en := Mux(RegNext(s0_fire), UIntToOH(io.replace_way.way), RegNext(s1_repl_way_en)) 273 val s1_repl_tag = Mux1H(s1_repl_way_en, wayMap(w => tag_resp(w))) 274 val s1_repl_coh = Mux1H(s1_repl_way_en, wayMap(w => meta_resp(w))).asTypeOf(new ClientMetadata) 275 val s1_miss_tag = Mux1H(s1_req.miss_way_en, wayMap(w => tag_resp(w))) 276 val s1_miss_coh = Mux1H(s1_req.miss_way_en, wayMap(w => meta_resp(w))).asTypeOf(new ClientMetadata) 277 278 val s1_repl_way_raw = WireInit(0.U(log2Up(nWays).W)) 279 s1_repl_way_raw := Mux(RegNext(s0_fire), io.replace_way.way, RegNext(s1_repl_way_raw)) 280 281 val s1_need_replacement = (s1_req.miss || s1_req.isStore && !s1_req.probe) && !s1_tag_match 282 val s1_way_en = Mux( 283 s1_req.replace, 284 s1_req.replace_way_en, 285 Mux( 286 s1_req.miss, 287 s1_req.miss_way_en, 288 Mux( 289 s1_need_replacement, 290 s1_repl_way_en, 291 s1_tag_match_way 292 ) 293 ) 294 ) 295 assert(!RegNext(s1_fire && PopCount(s1_way_en) > 1.U)) 296 val s1_tag = Mux( 297 s1_req.replace, 298 get_tag(s1_req.addr), 299 Mux( 300 s1_req.miss, 301 s1_miss_tag, 302 Mux(s1_need_replacement, s1_repl_tag, s1_hit_tag) 303 ) 304 ) 305 val s1_coh = Mux( 306 s1_req.replace, 307 Mux1H(s1_req.replace_way_en, meta_resp.map(ClientMetadata(_))), 308 Mux( 309 s1_req.miss, 310 s1_miss_coh, 311 Mux(s1_need_replacement, s1_repl_coh, s1_hit_coh) 312 ) 313 ) 314 315 val s1_has_permission = s1_hit_coh.onAccess(s1_req.cmd)._1 316 val s1_hit = s1_tag_match && s1_has_permission 317 val s1_pregen_can_go_to_mq = !s1_req.replace && !s1_req.probe && !s1_req.miss && (s1_req.isStore || s1_req.isAMO) && !s1_hit 318 319 // s2: select data, return resp if this is a store miss 320 val s2_valid = RegInit(false.B) 321 val s2_req = RegEnable(s1_req, s1_fire) 322 val s2_tag_match = RegEnable(s1_tag_match, s1_fire) 323 val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_fire) 324 val s2_hit_coh = RegEnable(s1_hit_coh, s1_fire) 325 val (s2_has_permission, _, s2_new_hit_coh) = s2_hit_coh.onAccess(s2_req.cmd) 326 327 val s2_repl_tag = RegEnable(s1_repl_tag, s1_fire) 328 val s2_repl_coh = RegEnable(s1_repl_coh, s1_fire) 329 val s2_repl_way_en = RegEnable(s1_repl_way_en, s1_fire) 330 val s2_need_replacement = RegEnable(s1_need_replacement, s1_fire) 331 val s2_need_data = RegEnable(s1_need_data, s1_fire) 332 val s2_need_tag = RegEnable(s1_need_tag, s1_fire) 333 val s2_encTag = RegEnable(s1_encTag, s1_fire) 334 val s2_idx = get_idx(s2_req.vaddr) 335 336 // duplicate regs to reduce fanout 337 val s2_valid_dup = RegInit(VecInit(Seq.fill(8)(false.B))) 338 val s2_req_vaddr_dup_for_miss_req = RegEnable(s1_req.vaddr, s1_fire) 339 val s2_idx_dup_for_status = RegEnable(get_idx(s1_req.vaddr), s1_fire) 340 val s2_idx_dup_for_replace_access = RegEnable(get_idx(s1_req.vaddr), s1_fire) 341 342 val s2_req_replace_dup_1, 343 s2_req_replace_dup_2 = RegEnable(s1_req.replace, s1_fire) 344 345 val s2_can_go_to_mq_dup = (0 until 3).map(_ => RegEnable(s1_pregen_can_go_to_mq, s1_fire)) 346 347 val s2_way_en = RegEnable(s1_way_en, s1_fire) 348 val s2_tag = RegEnable(s1_tag, s1_fire) 349 val s2_coh = RegEnable(s1_coh, s1_fire) 350 val s2_banked_store_wmask = RegEnable(s1_banked_store_wmask, s1_fire) 351 val s2_flag_error = RegEnable(s1_flag_error, s1_fire) 352 val s2_tag_error = dcacheParameters.tagCode.decode(s2_encTag).error && s2_need_tag 353 val s2_l2_error = s2_req.error 354 val s2_error = s2_flag_error || s2_tag_error || s2_l2_error // data_error not included 355 356 val s2_may_report_data_error = s2_need_data && s2_coh.state =/= ClientStates.Nothing 357 358 val s2_hit = s2_tag_match && s2_has_permission 359 val s2_amo_hit = s2_hit && !s2_req.probe && !s2_req.miss && s2_req.isAMO 360 val s2_store_hit = s2_hit && !s2_req.probe && !s2_req.miss && s2_req.isStore 361 362 s2_s0_set_conlict := s2_valid_dup(0) && s0_idx === s2_idx 363 s2_s0_set_conlict_store := s2_valid_dup(1) && store_idx === s2_idx 364 365 // For a store req, it either hits and goes to s3, or miss and enter miss queue immediately 366 val s2_can_go_to_s3 = (s2_req_replace_dup_1 || s2_req.probe || s2_req.miss || (s2_req.isStore || s2_req.isAMO) && s2_hit) && s3_ready 367 val s2_can_go_to_mq = RegEnable(s1_pregen_can_go_to_mq, s1_fire) 368 assert(RegNext(!(s2_valid && s2_can_go_to_s3 && s2_can_go_to_mq))) 369 val s2_can_go = s2_can_go_to_s3 || s2_can_go_to_mq 370 val s2_fire = s2_valid && s2_can_go 371 val s2_fire_to_s3 = s2_valid_dup(2) && s2_can_go_to_s3 372 when (s1_fire) { 373 s2_valid := true.B 374 s2_valid_dup.foreach(_ := true.B) 375 }.elsewhen (s2_fire) { 376 s2_valid := false.B 377 s2_valid_dup.foreach(_ := false.B) 378 } 379 s2_ready := !s2_valid_dup(3) || s2_can_go 380 val replay = !io.miss_req.ready 381 382 val data_resp = Wire(io.data_resp.cloneType) 383 data_resp := Mux(RegNext(s1_fire), io.data_resp, RegNext(data_resp)) 384 val s2_store_data_merged = Wire(Vec(DCacheBanks, UInt(DCacheSRAMRowBits.W))) 385 386 def mergePutData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = { 387 val full_wmask = FillInterleaved(8, wmask) 388 ((~full_wmask & old_data) | (full_wmask & new_data)) 389 } 390 391 val s2_data = WireInit(VecInit((0 until DCacheBanks).map(i => { 392 data_resp(i).raw_data 393 }))) 394 395 for (i <- 0 until DCacheBanks) { 396 val old_data = s2_data(i) 397 val new_data = get_data_of_bank(i, s2_req.store_data) 398 // for amo hit, we should use read out SRAM data 399 // do not merge with store data 400 val wmask = Mux(s2_amo_hit, 0.U(wordBytes.W), get_mask_of_bank(i, s2_req.store_mask)) 401 s2_store_data_merged(i) := mergePutData(old_data, new_data, wmask) 402 } 403 404 val s2_data_word = s2_store_data_merged(s2_req.word_idx) 405 406 // s3: write data, meta and tag 407 val s3_valid = RegInit(false.B) 408 val s3_req = RegEnable(s2_req, s2_fire_to_s3) 409 // val s3_idx = get_idx(s3_req.vaddr) 410 val s3_tag = RegEnable(s2_tag, s2_fire_to_s3) 411 val s3_tag_match = RegEnable(s2_tag_match, s2_fire_to_s3) 412 val s3_coh = RegEnable(s2_coh, s2_fire_to_s3) 413 val s3_hit = RegEnable(s2_hit, s2_fire_to_s3) 414 val s3_amo_hit = RegEnable(s2_amo_hit, s2_fire_to_s3) 415 val s3_store_hit = RegEnable(s2_store_hit, s2_fire_to_s3) 416 val s3_hit_coh = RegEnable(s2_hit_coh, s2_fire_to_s3) 417 val s3_new_hit_coh = RegEnable(s2_new_hit_coh, s2_fire_to_s3) 418 val s3_way_en = RegEnable(s2_way_en, s2_fire_to_s3) 419 val s3_banked_store_wmask = RegEnable(s2_banked_store_wmask, s2_fire_to_s3) 420 val s3_store_data_merged = RegEnable(s2_store_data_merged, s2_fire_to_s3) 421 val s3_data_word = RegEnable(s2_data_word, s2_fire_to_s3) 422 val s3_data = RegEnable(s2_data, s2_fire_to_s3) 423 val s3_l2_error = s3_req.error 424 // data_error will be reported by data array 1 cycle after data read resp 425 val s3_data_error = Wire(Bool()) 426 s3_data_error := Mux(RegNext(RegNext(s1_fire)), // ecc check result is generated 2 cycle after read req 427 io.readline_error_delayed && RegNext(s2_may_report_data_error), 428 RegNext(s3_data_error) // do not update s3_data_error if !s1_fire 429 ) 430 // error signal for amo inst 431 // s3_error = s3_flag_error || s3_tag_error || s3_l2_error || s3_data_error 432 val s3_error = RegEnable(s2_error, s2_fire_to_s3) || s3_data_error 433 val (_, _, probe_new_coh) = s3_coh.onProbe(s3_req.probe_param) 434 val s3_need_replacement = RegEnable(s2_need_replacement, s2_fire_to_s3) 435 436 437 // duplicate regs to reduce fanout 438 val s3_valid_dup = RegInit(VecInit(Seq.fill(14)(false.B))) 439 val s3_way_en_dup = (0 until 4).map(_ => RegEnable(s2_way_en, s2_fire_to_s3)) 440 val s3_coh_dup = (0 until 6).map(_ => RegEnable(s2_coh, s2_fire_to_s3)) 441 val s3_tag_match_dup = RegEnable(s2_tag_match, s2_fire_to_s3) 442 443 val s3_req_vaddr_dup_for_wb, 444 s3_req_vaddr_dup_for_data_write = RegEnable(s2_req.vaddr, s2_fire_to_s3) 445 446 val s3_idx_dup = (0 until 6).map(_ => RegEnable(get_idx(s2_req.vaddr), s2_fire_to_s3)) 447 448 val s3_req_replace_dup = (0 until 8).map(_ => RegEnable(s2_req.replace, s2_fire_to_s3)) 449 val s3_req_cmd_dup = (0 until 6).map(_ => RegEnable(s2_req.cmd, s2_fire_to_s3)) 450 val s3_req_source_dup_1, s3_req_source_dup_2 = RegEnable(s2_req.source, s2_fire_to_s3) 451 val s3_req_addr_dup = (0 until 5).map(_ => RegEnable(s2_req.addr, s2_fire_to_s3)) 452 val s3_req_probe_dup = (0 until 10).map(_ => RegEnable(s2_req.probe, s2_fire_to_s3)) 453 val s3_req_miss_dup = (0 until 10).map(_ => RegEnable(s2_req.miss, s2_fire_to_s3)) 454 val s3_req_word_idx_dup = (0 until DCacheBanks).map(_ => RegEnable(s2_req.word_idx, s2_fire_to_s3)) 455 456 val s3_need_replacement_dup = RegEnable(s2_need_replacement, s2_fire_to_s3) 457 458 val s3_s_amoalu_dup = RegInit(VecInit(Seq.fill(3)(false.B))) 459 460 val s3_hit_coh_dup = RegEnable(s2_hit_coh, s2_fire_to_s3) 461 val s3_new_hit_coh_dup = (0 until 2).map(_ => RegEnable(s2_new_hit_coh, s2_fire_to_s3)) 462 val s3_amo_hit_dup = RegEnable(s2_amo_hit, s2_fire_to_s3) 463 val s3_store_hit_dup = (0 until 2).map(_ => RegEnable(s2_store_hit, s2_fire_to_s3)) 464 465 val lrsc_count_dup = RegInit(VecInit(Seq.fill(3)(0.U(log2Ceil(LRSCCycles).W)))) 466 val lrsc_valid_dup = lrsc_count_dup.map { case cnt => cnt > LRSCBackOff.U } 467 val lrsc_addr_dup = Reg(UInt()) 468 469 val s3_req_probe_param_dup = RegEnable(s2_req.probe_param, s2_fire_to_s3) 470 val (_, probe_shrink_param, _) = s3_coh.onProbe(s3_req_probe_param_dup) 471 472 473 val miss_update_meta = s3_req.miss 474 val probe_update_meta = s3_req_probe_dup(0) && s3_tag_match_dup && s3_coh_dup(0) =/= probe_new_coh 475 val store_update_meta = s3_req.isStore && !s3_req_probe_dup(1) && s3_hit_coh =/= s3_new_hit_coh_dup(0) 476 val amo_update_meta = s3_req.isAMO && !s3_req_probe_dup(2) && s3_hit_coh_dup =/= s3_new_hit_coh_dup(1) 477 val amo_wait_amoalu = s3_req.isAMO && s3_req_cmd_dup(0) =/= M_XLR && s3_req_cmd_dup(1) =/= M_XSC 478 val update_meta = (miss_update_meta || probe_update_meta || store_update_meta || amo_update_meta) && !s3_req_replace_dup(0) 479 480 def missCohGen(cmd: UInt, param: UInt, dirty: Bool) = { 481 val c = categorize(cmd) 482 MuxLookup(Cat(c, param, dirty), Nothing, Seq( 483 //(effect param) -> (next) 484 Cat(rd, toB, false.B) -> Branch, 485 Cat(rd, toB, true.B) -> Branch, 486 Cat(rd, toT, false.B) -> Trunk, 487 Cat(rd, toT, true.B) -> Dirty, 488 Cat(wi, toT, false.B) -> Trunk, 489 Cat(wi, toT, true.B) -> Dirty, 490 Cat(wr, toT, false.B) -> Dirty, 491 Cat(wr, toT, true.B) -> Dirty)) 492 } 493 val miss_new_coh = ClientMetadata(missCohGen(s3_req_cmd_dup(2), s3_req.miss_param, s3_req.miss_dirty)) 494 495 val new_coh = Mux( 496 miss_update_meta, 497 miss_new_coh, 498 Mux( 499 probe_update_meta, 500 probe_new_coh, 501 Mux( 502 store_update_meta || amo_update_meta, 503 s3_new_hit_coh, 504 ClientMetadata.onReset 505 ) 506 ) 507 ) 508 509 // LR, SC and AMO 510 val debug_sc_fail_addr = RegInit(0.U) 511 val debug_sc_fail_cnt = RegInit(0.U(8.W)) 512 513 val lrsc_count = RegInit(0.U(log2Ceil(LRSCCycles).W)) 514 // val lrsc_valid = lrsc_count > LRSCBackOff.U 515 val lrsc_addr = Reg(UInt()) 516 val s3_lr = !s3_req_probe_dup(3) && s3_req.isAMO && s3_req_cmd_dup(3) === M_XLR 517 val s3_sc = !s3_req_probe_dup(4) && s3_req.isAMO && s3_req_cmd_dup(4) === M_XSC 518 val s3_lrsc_addr_match = lrsc_valid_dup(0) && lrsc_addr === get_block_addr(s3_req.addr) 519 val s3_sc_fail = s3_sc && !s3_lrsc_addr_match 520 val s3_sc_resp = Mux(s3_sc_fail, 1.U, 0.U) 521 522 val s3_can_do_amo = (s3_req_miss_dup(0) && !s3_req_probe_dup(5) && s3_req.isAMO) || s3_amo_hit 523 val s3_can_do_amo_write = s3_can_do_amo && isWrite(s3_req_cmd_dup(5)) && !s3_sc_fail 524 525 when (s3_valid_dup(0) && (s3_lr || s3_sc)) { 526 when (s3_can_do_amo && s3_lr) { 527 lrsc_count := (LRSCCycles - 1).U 528 lrsc_count_dup.foreach(_ := (LRSCCycles - 1).U) 529 lrsc_addr := get_block_addr(s3_req_addr_dup(0)) 530 lrsc_addr_dup := get_block_addr(s3_req_addr_dup(0)) 531 } .otherwise { 532 lrsc_count := 0.U 533 lrsc_count_dup.foreach(_ := 0.U) 534 } 535 } .elsewhen (io.invalid_resv_set) { 536 // when we release this block, 537 // we invalidate this reservation set 538 lrsc_count := 0.U 539 } .elsewhen (lrsc_count > 0.U) { 540 lrsc_count := lrsc_count - 1.U 541 lrsc_count_dup.foreach({case cnt => 542 cnt := cnt - 1.U 543 }) 544 } 545 546 io.lrsc_locked_block.valid := lrsc_valid_dup(1) 547 io.lrsc_locked_block.bits := lrsc_addr_dup 548 io.block_lr := RegNext(lrsc_count > 0.U) 549 550 // When we update update_resv_set, block all probe req in the next cycle 551 // It should give Probe reservation set addr compare an independent cycle, 552 // which will lead to better timing 553 io.update_resv_set := s3_valid_dup(1) && s3_lr && s3_can_do_amo 554 555 // when we release this block, 556 // we invalidate this reservation set 557 when (io.invalid_resv_set) { 558 lrsc_count := 0.U 559 lrsc_count_dup.foreach(_ := 0.U) 560 } 561 562 when (s3_valid_dup(2)) { 563 when (s3_req_addr_dup(1) === debug_sc_fail_addr) { 564 when (s3_sc_fail) { 565 debug_sc_fail_cnt := debug_sc_fail_cnt + 1.U 566 } .elsewhen (s3_sc) { 567 debug_sc_fail_cnt := 0.U 568 } 569 } .otherwise { 570 when (s3_sc_fail) { 571 debug_sc_fail_addr := s3_req_addr_dup(2) 572 debug_sc_fail_cnt := 1.U 573 XSWarn(s3_sc_fail === 100.U, p"L1DCache failed too many SCs in a row 0x${Hexadecimal(debug_sc_fail_addr)}, check if sth went wrong\n") 574 } 575 } 576 } 577 // assert(debug_sc_fail_cnt < 100.U, "L1DCache failed too many SCs in a row") 578 579 val banked_amo_wmask = UIntToOH(s3_req.word_idx) 580// val banked_wmask = s3_banked_store_wmask 581 val banked_wmask = Mux( 582 s3_req_miss_dup(1), 583 banked_full_wmask, 584 Mux( 585 s3_store_hit, 586 s3_banked_store_wmask, 587 Mux( 588 s3_can_do_amo_write, 589 banked_amo_wmask, 590 banked_none_wmask 591 ) 592 ) 593 ) 594 val update_data = s3_req_miss_dup(2) || s3_store_hit_dup(0) || s3_can_do_amo_write 595 assert(!(s3_valid && banked_wmask.orR && !update_data)) 596 597 // generate write data 598 // AMO hits 599 val s3_s_amoalu = RegInit(false.B) 600 val do_amoalu = amo_wait_amoalu && s3_valid_dup(3) && !s3_s_amoalu 601 val amoalu = Module(new AMOALU(wordBits)) 602 amoalu.io.mask := s3_req.amo_mask 603 amoalu.io.cmd := s3_req.cmd 604 amoalu.io.lhs := s3_data_word 605 amoalu.io.rhs := s3_req.amo_data 606 607 // merge amo write data 608// val amo_bitmask = FillInterleaved(8, s3_req.amo_mask) 609 val s3_amo_data_merged = Wire(Vec(DCacheBanks, UInt(DCacheSRAMRowBits.W))) 610 val s3_sc_data_merged = Wire(Vec(DCacheBanks, UInt(DCacheSRAMRowBits.W))) 611 for (i <- 0 until DCacheBanks) { 612 val old_data = s3_store_data_merged(i) 613 val new_data = amoalu.io.out 614 val wmask = Mux( 615 s3_req_word_idx_dup(i) === i.U, 616 ~0.U(wordBytes.W), 617 0.U(wordBytes.W) 618 ) 619 s3_amo_data_merged(i) := mergePutData(old_data, new_data, wmask) 620// s3_sc_data_merged(i) := amo_bitmask & s3_req.amo_data | ~amo_bitmask & old_data 621 s3_sc_data_merged(i) := mergePutData(old_data, s3_req.amo_data, 622 Mux(s3_req_word_idx_dup(i) === i.U && !s3_sc_fail, s3_req.amo_mask, 0.U(wordBytes.W)) 623 ) 624 } 625 val s3_amo_data_merged_reg = RegEnable(s3_amo_data_merged, do_amoalu) 626 when(do_amoalu){ 627 s3_s_amoalu := true.B 628 s3_s_amoalu_dup.foreach(_ := true.B) 629 } 630 631 val miss_wb = s3_req_miss_dup(3) && s3_need_replacement && s3_coh_dup(1).state =/= ClientStates.Nothing 632 val miss_wb_dup = s3_req_miss_dup(3) && s3_need_replacement_dup && s3_coh_dup(1).state =/= ClientStates.Nothing 633 val probe_wb = s3_req.probe 634 val replace_wb = s3_req.replace 635 val need_wb = miss_wb_dup || probe_wb || replace_wb 636 637 val (_, miss_shrink_param, _) = s3_coh_dup(2).onCacheControl(M_FLUSH) 638 val writeback_param = Mux(probe_wb, probe_shrink_param, miss_shrink_param) 639 val writeback_data = if (dcacheParameters.alwaysReleaseData) { 640 s3_tag_match && s3_req_probe_dup(6) && s3_req.probe_need_data || 641 s3_coh_dup(3) === ClientStates.Dirty || (miss_wb || replace_wb) && s3_coh_dup(3).state =/= ClientStates.Nothing 642 } else { 643 s3_tag_match && s3_req_probe_dup(6) && s3_req.probe_need_data || s3_coh_dup(3) === ClientStates.Dirty 644 } 645 646 val s3_probe_can_go = s3_req_probe_dup(7) && io.wb.ready && (io.meta_write.ready || !probe_update_meta) 647 val s3_store_can_go = s3_req_source_dup_1 === STORE_SOURCE.U && !s3_req_probe_dup(8) && (io.meta_write.ready || !store_update_meta) && (io.data_write.ready || !update_data) 648 val s3_amo_can_go = s3_amo_hit_dup && (io.meta_write.ready || !amo_update_meta) && (io.data_write.ready || !update_data) && (s3_s_amoalu_dup(0) || !amo_wait_amoalu) 649 val s3_miss_can_go = s3_req_miss_dup(4) && 650 (io.meta_write.ready || !amo_update_meta) && 651 (io.data_write.ready || !update_data) && 652 (s3_s_amoalu_dup(1) || !amo_wait_amoalu) && 653 io.tag_write.ready && 654 io.wb.ready 655 val s3_replace_nothing = s3_req_replace_dup(1) && s3_coh_dup(4).state === ClientStates.Nothing 656 val s3_replace_can_go = s3_req_replace_dup(2) && (s3_replace_nothing || io.wb.ready) 657 val s3_can_go = s3_probe_can_go || s3_store_can_go || s3_amo_can_go || s3_miss_can_go || s3_replace_can_go 658 val s3_update_data_cango = s3_store_can_go || s3_amo_can_go || s3_miss_can_go // used to speed up data_write gen 659 val s3_fire = s3_valid_dup(4) && s3_can_go 660 when (s2_fire_to_s3) { 661 s3_valid := true.B 662 s3_valid_dup.foreach(_ := true.B) 663 }.elsewhen (s3_fire) { 664 s3_valid := false.B 665 s3_valid_dup.foreach(_ := false.B) 666 } 667 s3_ready := !s3_valid_dup(5) || s3_can_go 668 s3_s0_set_conflict := s3_valid_dup(6) && s3_idx_dup(0) === s0_idx 669 s3_s0_set_conflict_store := s3_valid_dup(7) && s3_idx_dup(1) === store_idx 670 assert(RegNext(!s3_valid || !(s3_req_source_dup_2 === STORE_SOURCE.U && !s3_req.probe) || s3_hit)) // miss store should never come to s3 671 672 when(s3_fire) { 673 s3_s_amoalu := false.B 674 s3_s_amoalu_dup.foreach(_ := false.B) 675 } 676 677 req.ready := s0_can_go 678 679 io.meta_read.valid := req.valid && s1_ready && !set_conflict 680 io.meta_read.bits.idx := get_idx(s0_req.vaddr) 681 io.meta_read.bits.way_en := Mux(s0_req.replace, s0_req.replace_way_en, ~0.U(nWays.W)) 682 683 io.tag_read.valid := req.valid && s1_ready && !set_conflict && !s0_req.replace 684 io.tag_read.bits.idx := get_idx(s0_req.vaddr) 685 io.tag_read.bits.way_en := ~0.U(nWays.W) 686 687 io.data_read_intend := s1_valid_dup(3) && s1_need_data 688 io.data_read.valid := s1_valid_dup(4) && s1_need_data && s2_ready 689 io.data_read.bits.rmask := s1_banked_rmask 690 io.data_read.bits.way_en := s1_way_en 691 io.data_read.bits.addr := s1_req_vaddr_dup_for_data_read 692 693 io.miss_req.valid := s2_valid_dup(4) && s2_can_go_to_mq_dup(0) 694 val miss_req = io.miss_req.bits 695 miss_req := DontCare 696 miss_req.source := s2_req.source 697 miss_req.cmd := s2_req.cmd 698 miss_req.addr := s2_req.addr 699 miss_req.vaddr := s2_req_vaddr_dup_for_miss_req 700 miss_req.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_repl_way_en) 701 miss_req.store_data := s2_req.store_data 702 miss_req.store_mask := s2_req.store_mask 703 miss_req.word_idx := s2_req.word_idx 704 miss_req.amo_data := s2_req.amo_data 705 miss_req.amo_mask := s2_req.amo_mask 706 miss_req.req_coh := s2_hit_coh 707 miss_req.replace_coh := s2_repl_coh 708 miss_req.replace_tag := s2_repl_tag 709 miss_req.id := s2_req.id 710 miss_req.cancel := false.B 711 712 io.store_replay_resp.valid := s2_valid_dup(5) && s2_can_go_to_mq_dup(1) && replay && s2_req.isStore 713 io.store_replay_resp.bits.data := DontCare 714 io.store_replay_resp.bits.miss := true.B 715 io.store_replay_resp.bits.replay := true.B 716 io.store_replay_resp.bits.id := s2_req.id 717 718 io.store_hit_resp.valid := s3_valid_dup(8) && s3_store_can_go 719 io.store_hit_resp.bits.data := DontCare 720 io.store_hit_resp.bits.miss := false.B 721 io.store_hit_resp.bits.replay := false.B 722 io.store_hit_resp.bits.id := s3_req.id 723 724 io.release_update.valid := s3_valid_dup(9) && (s3_store_can_go || s3_amo_can_go) && s3_hit && update_data 725 io.release_update.bits.addr := s3_req_addr_dup(3) 726 io.release_update.bits.mask := Mux(s3_store_hit_dup(1), s3_banked_store_wmask, banked_amo_wmask) 727 io.release_update.bits.data := Mux( 728 amo_wait_amoalu, 729 s3_amo_data_merged_reg, 730 Mux( 731 s3_sc, 732 s3_sc_data_merged, 733 s3_store_data_merged 734 ) 735 ).asUInt 736 737 val atomic_hit_resp = Wire(new AtomicsResp) 738 atomic_hit_resp.data := Mux(s3_sc, s3_sc_resp, s3_data_word) 739 atomic_hit_resp.miss := false.B 740 atomic_hit_resp.miss_id := s3_req.miss_id 741 atomic_hit_resp.error := s3_error 742 atomic_hit_resp.replay := false.B 743 atomic_hit_resp.ack_miss_queue := s3_req_miss_dup(5) 744 atomic_hit_resp.id := lrsc_valid_dup(2) 745 val atomic_replay_resp = Wire(new AtomicsResp) 746 atomic_replay_resp.data := DontCare 747 atomic_replay_resp.miss := true.B 748 atomic_replay_resp.miss_id := DontCare 749 atomic_replay_resp.error := false.B 750 atomic_replay_resp.replay := true.B 751 atomic_replay_resp.ack_miss_queue := false.B 752 atomic_replay_resp.id := DontCare 753 val atomic_replay_resp_valid = s2_valid_dup(6) && s2_can_go_to_mq_dup(2) && replay && s2_req.isAMO 754 val atomic_hit_resp_valid = s3_valid_dup(10) && (s3_amo_can_go || s3_miss_can_go && s3_req.isAMO) 755 io.atomic_resp.valid := atomic_replay_resp_valid || atomic_hit_resp_valid 756 io.atomic_resp.bits := Mux(atomic_replay_resp_valid, atomic_replay_resp, atomic_hit_resp) 757 758 io.replace_resp.valid := s3_fire && s3_req_replace_dup(3) 759 io.replace_resp.bits := s3_req.miss_id 760 761 io.meta_write.valid := s3_fire && update_meta 762 io.meta_write.bits.idx := s3_idx_dup(2) 763 io.meta_write.bits.way_en := s3_way_en_dup(0) 764 io.meta_write.bits.meta.coh := new_coh 765 766 io.error_flag_write.valid := s3_fire && update_meta && s3_l2_error 767 io.error_flag_write.bits.idx := s3_idx_dup(3) 768 io.error_flag_write.bits.way_en := s3_way_en_dup(1) 769 io.error_flag_write.bits.error := s3_l2_error 770 771 io.tag_write.valid := s3_fire && s3_req_miss_dup(6) 772 io.tag_write.bits.idx := s3_idx_dup(4) 773 io.tag_write.bits.way_en := s3_way_en_dup(2) 774 io.tag_write.bits.tag := get_tag(s3_req_addr_dup(4)) 775 776 io.tag_write_intend := s3_req_miss_dup(7) && s3_valid_dup(11) 777 XSPerfAccumulate("fake_tag_write_intend", io.tag_write_intend && !io.tag_write.valid) 778 XSPerfAccumulate("mainpipe_tag_write", io.tag_write.valid) 779 780 assert(!RegNext(io.tag_write.valid && !io.tag_write_intend)) 781 782 io.data_write.valid := s3_valid_dup(12) && s3_update_data_cango && update_data 783 io.data_write.bits.way_en := s3_way_en_dup(3) 784 io.data_write.bits.addr := s3_req_vaddr_dup_for_data_write 785 io.data_write.bits.wmask := banked_wmask 786 io.data_write.bits.data := Mux( 787 amo_wait_amoalu, 788 s3_amo_data_merged_reg, 789 Mux( 790 s3_sc, 791 s3_sc_data_merged, 792 s3_store_data_merged 793 ) 794 ) 795 assert(RegNext(!io.meta_write.valid || !s3_req.replace)) 796 assert(RegNext(!io.tag_write.valid || !s3_req.replace)) 797 assert(RegNext(!io.data_write.valid || !s3_req.replace)) 798 799 io.wb.valid := s3_valid_dup(13) && ( 800 // replace 801 s3_req_replace_dup(4) && !s3_replace_nothing || 802 // probe can go to wbq 803 s3_req_probe_dup(9) && (io.meta_write.ready || !probe_update_meta) || 804 // amo miss can go to wbq 805 s3_req_miss_dup(8) && 806 (io.meta_write.ready || !amo_update_meta) && 807 (io.data_write.ready || !update_data) && 808 (s3_s_amoalu_dup(2) || !amo_wait_amoalu) && 809 io.tag_write.ready 810 ) && need_wb 811 812 io.wb.bits.addr := get_block_addr(Cat(s3_tag, get_untag(s3_req.vaddr))) 813 io.wb.bits.addr_dup_0 := get_block_addr(Cat(s3_tag, get_untag(s3_req_vaddr_dup_for_wb))) 814 io.wb.bits.addr_dup_1 := get_block_addr(Cat(s3_tag, get_untag(s3_req_vaddr_dup_for_wb))) 815 io.wb.bits.param := writeback_param 816 io.wb.bits.voluntary := s3_req_miss_dup(9) || s3_req_replace_dup(5) 817 io.wb.bits.hasData := writeback_data 818 io.wb.bits.dirty := s3_coh_dup(5) === ClientStates.Dirty 819 io.wb.bits.data := s3_data.asUInt() 820 io.wb.bits.delay_release := s3_req_replace_dup(6) 821 io.wb.bits.miss_id := s3_req.miss_id 822 823 io.replace_access.valid := RegNext(s1_fire && (s1_req.isAMO || s1_req.isStore) && !s1_req.probe) 824 io.replace_access.bits.set := s2_idx_dup_for_replace_access 825 io.replace_access.bits.way := RegNext(OHToUInt(s1_way_en)) 826 827 io.replace_way.set.valid := RegNext(s0_fire) 828 io.replace_way.set.bits := s1_idx_dup_for_replace_way 829 830 // TODO: consider block policy of a finer granularity 831 io.status.s0_set.valid := req.valid 832 io.status.s0_set.bits := get_idx(s0_req.vaddr) 833 io.status.s1.valid := s1_valid_dup(5) 834 io.status.s1.bits.set := s1_idx_dup_for_status 835 io.status.s1.bits.way_en := s1_way_en 836 io.status.s2.valid := s2_valid_dup(7) && !s2_req_replace_dup_2 837 io.status.s2.bits.set := s2_idx_dup_for_status 838 io.status.s2.bits.way_en := s2_way_en 839 io.status.s3.valid := s3_valid && !s3_req_replace_dup(7) 840 io.status.s3.bits.set := s3_idx_dup(5) 841 io.status.s3.bits.way_en := s3_way_en 842 843 // report error to beu and csr, 1 cycle after read data resp 844 io.error := 0.U.asTypeOf(new L1CacheErrorInfo()) 845 // report error, update error csr 846 io.error.valid := s3_error && RegNext(s2_fire) 847 // only tag_error and data_error will be reported to beu 848 // l2_error should not be reported (l2 will report that) 849 io.error.report_to_beu := (RegEnable(s2_tag_error, s2_fire) || s3_data_error) && RegNext(s2_fire) 850 io.error.paddr := RegEnable(s2_req.addr, s2_fire) 851 io.error.source.tag := RegEnable(s2_tag_error, s2_fire) 852 io.error.source.data := s3_data_error 853 io.error.source.l2 := RegEnable(s2_flag_error || s2_l2_error, s2_fire) 854 io.error.opType.store := RegEnable(s2_req.isStore && !s2_req.probe, s2_fire) 855 io.error.opType.probe := RegEnable(s2_req.probe, s2_fire) 856 io.error.opType.release := RegEnable(s2_req.replace, s2_fire) 857 io.error.opType.atom := RegEnable(s2_req.isAMO && !s2_req.probe, s2_fire) 858 859 val perfEvents = Seq( 860 ("dcache_mp_req ", s0_fire ), 861 ("dcache_mp_total_penalty", PopCount(VecInit(Seq(s0_fire, s1_valid, s2_valid, s3_valid)))) 862 ) 863 generatePerfEvent() 864} 865