1*9e12e8edScz4e/*************************************************************************************** 2*9e12e8edScz4e* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3*9e12e8edScz4e* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4*9e12e8edScz4e* Copyright (c) 2020-2021 Peng Cheng Laboratory 5*9e12e8edScz4e* 6*9e12e8edScz4e* XiangShan is licensed under Mulan PSL v2. 7*9e12e8edScz4e* You can use this software according to the terms and conditions of the Mulan PSL v2. 8*9e12e8edScz4e* You may obtain a copy of Mulan PSL v2 at: 9*9e12e8edScz4e* http://license.coscl.org.cn/MulanPSL2 10*9e12e8edScz4e* 11*9e12e8edScz4e* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12*9e12e8edScz4e* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13*9e12e8edScz4e* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14*9e12e8edScz4e* 15*9e12e8edScz4e* See the Mulan PSL v2 for more details. 16*9e12e8edScz4e***************************************************************************************/ 17*9e12e8edScz4e 18*9e12e8edScz4epackage xiangshan.mem 19*9e12e8edScz4e 20*9e12e8edScz4e 21*9e12e8edScz4eimport org.chipsalliance.cde.config.Parameters 22*9e12e8edScz4eimport chisel3._ 23*9e12e8edScz4eimport chisel3.util._ 24*9e12e8edScz4eimport utility._ 25*9e12e8edScz4eimport utils._ 26*9e12e8edScz4eimport xiangshan._ 27*9e12e8edScz4eimport xiangshan.backend.Bundles._ 28*9e12e8edScz4eimport xiangshan.backend.rob.RobPtr 29*9e12e8edScz4eimport xiangshan.backend.fu.FenceToSbuffer 30*9e12e8edScz4eimport xiangshan.backend.fu.vector.Bundles._ 31*9e12e8edScz4eimport xiangshan.backend.Bundles._ 32*9e12e8edScz4eimport xiangshan.mem.prefetch.PrefetchReqBundle 33*9e12e8edScz4eimport xiangshan.cache._ 34*9e12e8edScz4eimport xiangshan.cache.wpu.ReplayCarry 35*9e12e8edScz4eimport xiangshan.cache.mmu._ 36*9e12e8edScz4eimport math._ 37*9e12e8edScz4e 38*9e12e8edScz4eobject Bundles { 39*9e12e8edScz4e 40*9e12e8edScz4e class LsPipelineBundle(implicit p: Parameters) extends XSBundle 41*9e12e8edScz4e with HasDCacheParameters 42*9e12e8edScz4e with HasVLSUParameters { 43*9e12e8edScz4e val uop = new DynInst 44*9e12e8edScz4e val vaddr = UInt(VAddrBits.W) 45*9e12e8edScz4e // For exception vaddr generate 46*9e12e8edScz4e val fullva = UInt(XLEN.W) 47*9e12e8edScz4e val vaNeedExt = Bool() 48*9e12e8edScz4e val isHyper = Bool() 49*9e12e8edScz4e val paddr = UInt(PAddrBits.W) 50*9e12e8edScz4e val gpaddr = UInt(XLEN.W) 51*9e12e8edScz4e val isForVSnonLeafPTE = Bool() 52*9e12e8edScz4e // val func = UInt(6.W) 53*9e12e8edScz4e val mask = UInt((VLEN/8).W) 54*9e12e8edScz4e val data = UInt((VLEN+1).W) 55*9e12e8edScz4e val wlineflag = Bool() // store write the whole cache line 56*9e12e8edScz4e 57*9e12e8edScz4e val miss = Bool() 58*9e12e8edScz4e val tlbMiss = Bool() 59*9e12e8edScz4e val ptwBack = Bool() 60*9e12e8edScz4e val af = Bool() 61*9e12e8edScz4e val nc = Bool() 62*9e12e8edScz4e val mmio = Bool() 63*9e12e8edScz4e val memBackTypeMM = Bool() // 1: main memory, 0: IO 64*9e12e8edScz4e val atomic = Bool() 65*9e12e8edScz4e val hasException = Bool() 66*9e12e8edScz4e 67*9e12e8edScz4e val forwardMask = Vec(VLEN/8, Bool()) 68*9e12e8edScz4e val forwardData = Vec(VLEN/8, UInt(8.W)) 69*9e12e8edScz4e 70*9e12e8edScz4e // prefetch 71*9e12e8edScz4e val isPrefetch = Bool() 72*9e12e8edScz4e val isHWPrefetch = Bool() 73*9e12e8edScz4e def isSWPrefetch = isPrefetch && !isHWPrefetch 74*9e12e8edScz4e 75*9e12e8edScz4e // misalignBuffer 76*9e12e8edScz4e val isFrmMisAlignBuf = Bool() 77*9e12e8edScz4e 78*9e12e8edScz4e // vector 79*9e12e8edScz4e val isvec = Bool() 80*9e12e8edScz4e val isLastElem = Bool() 81*9e12e8edScz4e val is128bit = Bool() 82*9e12e8edScz4e val uop_unit_stride_fof = Bool() 83*9e12e8edScz4e val usSecondInv = Bool() 84*9e12e8edScz4e val elemIdx = UInt(elemIdxBits.W) 85*9e12e8edScz4e val alignedType = UInt(alignTypeBits.W) 86*9e12e8edScz4e val mbIndex = UInt(max(vlmBindexBits, vsmBindexBits).W) 87*9e12e8edScz4e // val rob_idx_valid = Vec(2,Bool()) 88*9e12e8edScz4e // val inner_idx = Vec(2,UInt(3.W)) 89*9e12e8edScz4e // val rob_idx = Vec(2,new RobPtr) 90*9e12e8edScz4e val reg_offset = UInt(vOffsetBits.W) 91*9e12e8edScz4e val elemIdxInsideVd = UInt(elemIdxBits.W) 92*9e12e8edScz4e // val offset = Vec(2,UInt(4.W)) 93*9e12e8edScz4e val vecActive = Bool() // 1: vector active element or scala mem operation, 0: vector not active element 94*9e12e8edScz4e val is_first_ele = Bool() 95*9e12e8edScz4e val vecBaseVaddr = UInt(VAddrBits.W) 96*9e12e8edScz4e val vecVaddrOffset = UInt(VAddrBits.W) 97*9e12e8edScz4e val vecTriggerMask = UInt((VLEN/8).W) 98*9e12e8edScz4e // val flowPtr = new VlflowPtr() // VLFlowQueue ptr 99*9e12e8edScz4e // val sflowPtr = new VsFlowPtr() // VSFlowQueue ptr 100*9e12e8edScz4e 101*9e12e8edScz4e // For debug usage 102*9e12e8edScz4e val isFirstIssue = Bool() 103*9e12e8edScz4e val hasROBEntry = Bool() 104*9e12e8edScz4e 105*9e12e8edScz4e // For load replay 106*9e12e8edScz4e val isLoadReplay = Bool() 107*9e12e8edScz4e val isFastPath = Bool() 108*9e12e8edScz4e val isFastReplay = Bool() 109*9e12e8edScz4e val replayCarry = new ReplayCarry(nWays) 110*9e12e8edScz4e 111*9e12e8edScz4e // For dcache miss load 112*9e12e8edScz4e val mshrid = UInt(log2Up(cfg.nMissEntries).W) 113*9e12e8edScz4e val handledByMSHR = Bool() 114*9e12e8edScz4e val replacementUpdated = Bool() 115*9e12e8edScz4e val missDbUpdated = Bool() 116*9e12e8edScz4e 117*9e12e8edScz4e val forward_tlDchannel = Bool() 118*9e12e8edScz4e val dcacheRequireReplay = Bool() 119*9e12e8edScz4e val delayedLoadError = Bool() 120*9e12e8edScz4e val lateKill = Bool() 121*9e12e8edScz4e val feedbacked = Bool() 122*9e12e8edScz4e val ldCancel = ValidUndirectioned(UInt(log2Ceil(LoadPipelineWidth).W)) 123*9e12e8edScz4e // loadQueueReplay index. 124*9e12e8edScz4e val schedIndex = UInt(log2Up(LoadQueueReplaySize).W) 125*9e12e8edScz4e // hardware prefetch and fast replay no need to query tlb 126*9e12e8edScz4e val tlbNoQuery = Bool() 127*9e12e8edScz4e 128*9e12e8edScz4e // misalign 129*9e12e8edScz4e val isMisalign = Bool() 130*9e12e8edScz4e val isFinalSplit = Bool() 131*9e12e8edScz4e val misalignWith16Byte = Bool() 132*9e12e8edScz4e val misalignNeedWakeUp = Bool() 133*9e12e8edScz4e val updateAddrValid = Bool() 134*9e12e8edScz4e } 135*9e12e8edScz4e 136*9e12e8edScz4e class LdPrefetchTrainBundle(implicit p: Parameters) extends LsPipelineBundle { 137*9e12e8edScz4e val meta_prefetch = UInt(L1PfSourceBits.W) 138*9e12e8edScz4e val meta_access = Bool() 139*9e12e8edScz4e 140*9e12e8edScz4e def fromLsPipelineBundle(input: LsPipelineBundle, latch: Boolean = false, enable: Bool = true.B) = { 141*9e12e8edScz4e if (latch) vaddr := RegEnable(input.vaddr, enable) else vaddr := input.vaddr 142*9e12e8edScz4e if (latch) fullva := RegEnable(input.fullva, enable) else fullva := input.fullva 143*9e12e8edScz4e if (latch) vaNeedExt := RegEnable(input.vaNeedExt, enable) else vaNeedExt := input.vaNeedExt 144*9e12e8edScz4e if (latch) isHyper := RegEnable(input.isHyper, enable) else isHyper := input.isHyper 145*9e12e8edScz4e if (latch) paddr := RegEnable(input.paddr, enable) else paddr := input.paddr 146*9e12e8edScz4e if (latch) gpaddr := RegEnable(input.gpaddr, enable) else gpaddr := input.gpaddr 147*9e12e8edScz4e if (latch) isForVSnonLeafPTE := RegEnable(input.isForVSnonLeafPTE, enable) else isForVSnonLeafPTE := input.isForVSnonLeafPTE 148*9e12e8edScz4e if (latch) mask := RegEnable(input.mask, enable) else mask := input.mask 149*9e12e8edScz4e if (latch) data := RegEnable(input.data, enable) else data := input.data 150*9e12e8edScz4e if (latch) uop := RegEnable(input.uop, enable) else uop := input.uop 151*9e12e8edScz4e if (latch) wlineflag := RegEnable(input.wlineflag, enable) else wlineflag := input.wlineflag 152*9e12e8edScz4e if (latch) miss := RegEnable(input.miss, enable) else miss := input.miss 153*9e12e8edScz4e if (latch) tlbMiss := RegEnable(input.tlbMiss, enable) else tlbMiss := input.tlbMiss 154*9e12e8edScz4e if (latch) ptwBack := RegEnable(input.ptwBack, enable) else ptwBack := input.ptwBack 155*9e12e8edScz4e if (latch) af := RegEnable(input.af, enable) else af := input.af 156*9e12e8edScz4e if (latch) nc := RegEnable(input.nc, enable) else nc := input.nc 157*9e12e8edScz4e if (latch) mmio := RegEnable(input.mmio, enable) else mmio := input.mmio 158*9e12e8edScz4e if (latch) memBackTypeMM := RegEnable(input.memBackTypeMM, enable) else memBackTypeMM := input.memBackTypeMM 159*9e12e8edScz4e if (latch) forwardMask := RegEnable(input.forwardMask, enable) else forwardMask := input.forwardMask 160*9e12e8edScz4e if (latch) forwardData := RegEnable(input.forwardData, enable) else forwardData := input.forwardData 161*9e12e8edScz4e if (latch) isPrefetch := RegEnable(input.isPrefetch, enable) else isPrefetch := input.isPrefetch 162*9e12e8edScz4e if (latch) isHWPrefetch := RegEnable(input.isHWPrefetch, enable) else isHWPrefetch := input.isHWPrefetch 163*9e12e8edScz4e if (latch) isFrmMisAlignBuf := RegEnable(input.isFrmMisAlignBuf, enable) else isFrmMisAlignBuf := input.isFrmMisAlignBuf 164*9e12e8edScz4e if (latch) isFirstIssue := RegEnable(input.isFirstIssue, enable) else isFirstIssue := input.isFirstIssue 165*9e12e8edScz4e if (latch) hasROBEntry := RegEnable(input.hasROBEntry, enable) else hasROBEntry := input.hasROBEntry 166*9e12e8edScz4e if (latch) dcacheRequireReplay := RegEnable(input.dcacheRequireReplay, enable) else dcacheRequireReplay := input.dcacheRequireReplay 167*9e12e8edScz4e if (latch) schedIndex := RegEnable(input.schedIndex, enable) else schedIndex := input.schedIndex 168*9e12e8edScz4e if (latch) tlbNoQuery := RegEnable(input.tlbNoQuery, enable) else tlbNoQuery := input.tlbNoQuery 169*9e12e8edScz4e if (latch) isvec := RegEnable(input.isvec, enable) else isvec := input.isvec 170*9e12e8edScz4e if (latch) isLastElem := RegEnable(input.isLastElem, enable) else isLastElem := input.isLastElem 171*9e12e8edScz4e if (latch) is128bit := RegEnable(input.is128bit, enable) else is128bit := input.is128bit 172*9e12e8edScz4e if (latch) vecActive := RegEnable(input.vecActive, enable) else vecActive := input.vecActive 173*9e12e8edScz4e if (latch) is_first_ele := RegEnable(input.is_first_ele, enable) else is_first_ele := input.is_first_ele 174*9e12e8edScz4e if (latch) uop_unit_stride_fof := RegEnable(input.uop_unit_stride_fof, enable) else uop_unit_stride_fof := input.uop_unit_stride_fof 175*9e12e8edScz4e if (latch) usSecondInv := RegEnable(input.usSecondInv, enable) else usSecondInv := input.usSecondInv 176*9e12e8edScz4e if (latch) reg_offset := RegEnable(input.reg_offset, enable) else reg_offset := input.reg_offset 177*9e12e8edScz4e if (latch) elemIdx := RegEnable(input.elemIdx, enable) else elemIdx := input.elemIdx 178*9e12e8edScz4e if (latch) alignedType := RegEnable(input.alignedType, enable) else alignedType := input.alignedType 179*9e12e8edScz4e if (latch) mbIndex := RegEnable(input.mbIndex, enable) else mbIndex := input.mbIndex 180*9e12e8edScz4e if (latch) elemIdxInsideVd := RegEnable(input.elemIdxInsideVd, enable) else elemIdxInsideVd := input.elemIdxInsideVd 181*9e12e8edScz4e if (latch) vecBaseVaddr := RegEnable(input.vecBaseVaddr, enable) else vecBaseVaddr := input.vecBaseVaddr 182*9e12e8edScz4e if (latch) vecVaddrOffset := RegEnable(input.vecVaddrOffset, enable) else vecVaddrOffset := input.vecVaddrOffset 183*9e12e8edScz4e if (latch) vecTriggerMask := RegEnable(input.vecTriggerMask, enable) else vecTriggerMask := input.vecTriggerMask 184*9e12e8edScz4e // if (latch) flowPtr := RegEnable(input.flowPtr, enable) else flowPtr := input.flowPtr 185*9e12e8edScz4e // if (latch) sflowPtr := RegEnable(input.sflowPtr, enable) else sflowPtr := input.sflowPtr 186*9e12e8edScz4e 187*9e12e8edScz4e meta_prefetch := DontCare 188*9e12e8edScz4e meta_access := DontCare 189*9e12e8edScz4e forward_tlDchannel := DontCare 190*9e12e8edScz4e mshrid := DontCare 191*9e12e8edScz4e replayCarry := DontCare 192*9e12e8edScz4e atomic := DontCare 193*9e12e8edScz4e isLoadReplay := DontCare 194*9e12e8edScz4e isFastPath := DontCare 195*9e12e8edScz4e isFastReplay := DontCare 196*9e12e8edScz4e handledByMSHR := DontCare 197*9e12e8edScz4e replacementUpdated := DontCare 198*9e12e8edScz4e missDbUpdated := DontCare 199*9e12e8edScz4e delayedLoadError := DontCare 200*9e12e8edScz4e lateKill := DontCare 201*9e12e8edScz4e feedbacked := DontCare 202*9e12e8edScz4e ldCancel := DontCare 203*9e12e8edScz4e } 204*9e12e8edScz4e 205*9e12e8edScz4e def asPrefetchReqBundle(): PrefetchReqBundle = { 206*9e12e8edScz4e val res = Wire(new PrefetchReqBundle) 207*9e12e8edScz4e res.vaddr := this.vaddr 208*9e12e8edScz4e res.paddr := this.paddr 209*9e12e8edScz4e res.pc := this.uop.pc 210*9e12e8edScz4e res.miss := this.miss 211*9e12e8edScz4e res.pfHitStream := isFromStream(this.meta_prefetch) 212*9e12e8edScz4e 213*9e12e8edScz4e res 214*9e12e8edScz4e } 215*9e12e8edScz4e } 216*9e12e8edScz4e 217*9e12e8edScz4e class StPrefetchTrainBundle(implicit p: Parameters) extends LdPrefetchTrainBundle {} 218*9e12e8edScz4e 219*9e12e8edScz4e class LqWriteBundle(implicit p: Parameters) extends LsPipelineBundle { 220*9e12e8edScz4e // load inst replay informations 221*9e12e8edScz4e val rep_info = new LoadToLsqReplayIO 222*9e12e8edScz4e // queue entry data, except flag bits, will be updated if writeQueue is true, 223*9e12e8edScz4e // valid bit in LqWriteBundle will be ignored 224*9e12e8edScz4e val data_wen_dup = Vec(6, Bool()) // dirty reg dup 225*9e12e8edScz4e 226*9e12e8edScz4e 227*9e12e8edScz4e def fromLsPipelineBundle(input: LsPipelineBundle, latch: Boolean = false, enable: Bool = true.B) = { 228*9e12e8edScz4e if(latch) vaddr := RegEnable(input.vaddr, enable) else vaddr := input.vaddr 229*9e12e8edScz4e if(latch) fullva := RegEnable(input.fullva, enable) else fullva := input.fullva 230*9e12e8edScz4e if(latch) vaNeedExt := RegEnable(input.vaNeedExt, enable) else vaNeedExt := input.vaNeedExt 231*9e12e8edScz4e if(latch) isHyper := RegEnable(input.isHyper, enable) else isHyper := input.isHyper 232*9e12e8edScz4e if(latch) paddr := RegEnable(input.paddr, enable) else paddr := input.paddr 233*9e12e8edScz4e if(latch) gpaddr := RegEnable(input.gpaddr, enable) else gpaddr := input.gpaddr 234*9e12e8edScz4e if(latch) isForVSnonLeafPTE := RegEnable(input.isForVSnonLeafPTE, enable) else isForVSnonLeafPTE := input.isForVSnonLeafPTE 235*9e12e8edScz4e if(latch) mask := RegEnable(input.mask, enable) else mask := input.mask 236*9e12e8edScz4e if(latch) data := RegEnable(input.data, enable) else data := input.data 237*9e12e8edScz4e if(latch) uop := RegEnable(input.uop, enable) else uop := input.uop 238*9e12e8edScz4e if(latch) wlineflag := RegEnable(input.wlineflag, enable) else wlineflag := input.wlineflag 239*9e12e8edScz4e if(latch) miss := RegEnable(input.miss, enable) else miss := input.miss 240*9e12e8edScz4e if(latch) tlbMiss := RegEnable(input.tlbMiss, enable) else tlbMiss := input.tlbMiss 241*9e12e8edScz4e if(latch) ptwBack := RegEnable(input.ptwBack, enable) else ptwBack := input.ptwBack 242*9e12e8edScz4e if(latch) mmio := RegEnable(input.mmio, enable) else mmio := input.mmio 243*9e12e8edScz4e if(latch) atomic := RegEnable(input.atomic, enable) else atomic := input.atomic 244*9e12e8edScz4e if(latch) forwardMask := RegEnable(input.forwardMask, enable) else forwardMask := input.forwardMask 245*9e12e8edScz4e if(latch) forwardData := RegEnable(input.forwardData, enable) else forwardData := input.forwardData 246*9e12e8edScz4e if(latch) isPrefetch := RegEnable(input.isPrefetch, enable) else isPrefetch := input.isPrefetch 247*9e12e8edScz4e if(latch) isHWPrefetch := RegEnable(input.isHWPrefetch, enable) else isHWPrefetch := input.isHWPrefetch 248*9e12e8edScz4e if(latch) isFrmMisAlignBuf := RegEnable(input.isFrmMisAlignBuf, enable) else isFrmMisAlignBuf := input.isFrmMisAlignBuf 249*9e12e8edScz4e if(latch) isFirstIssue := RegEnable(input.isFirstIssue, enable) else isFirstIssue := input.isFirstIssue 250*9e12e8edScz4e if(latch) hasROBEntry := RegEnable(input.hasROBEntry, enable) else hasROBEntry := input.hasROBEntry 251*9e12e8edScz4e if(latch) isLoadReplay := RegEnable(input.isLoadReplay, enable) else isLoadReplay := input.isLoadReplay 252*9e12e8edScz4e if(latch) isFastPath := RegEnable(input.isFastPath, enable) else isFastPath := input.isFastPath 253*9e12e8edScz4e if(latch) isFastReplay := RegEnable(input.isFastReplay, enable) else isFastReplay := input.isFastReplay 254*9e12e8edScz4e if(latch) mshrid := RegEnable(input.mshrid, enable) else mshrid := input.mshrid 255*9e12e8edScz4e if(latch) forward_tlDchannel := RegEnable(input.forward_tlDchannel, enable) else forward_tlDchannel := input.forward_tlDchannel 256*9e12e8edScz4e if(latch) replayCarry := RegEnable(input.replayCarry, enable) else replayCarry := input.replayCarry 257*9e12e8edScz4e if(latch) dcacheRequireReplay := RegEnable(input.dcacheRequireReplay, enable) else dcacheRequireReplay := input.dcacheRequireReplay 258*9e12e8edScz4e if(latch) schedIndex := RegEnable(input.schedIndex, enable) else schedIndex := input.schedIndex 259*9e12e8edScz4e if(latch) handledByMSHR := RegEnable(input.handledByMSHR, enable) else handledByMSHR := input.handledByMSHR 260*9e12e8edScz4e if(latch) replacementUpdated := RegEnable(input.replacementUpdated, enable) else replacementUpdated := input.replacementUpdated 261*9e12e8edScz4e if(latch) missDbUpdated := RegEnable(input.missDbUpdated, enable) else missDbUpdated := input.missDbUpdated 262*9e12e8edScz4e if(latch) delayedLoadError := RegEnable(input.delayedLoadError, enable) else delayedLoadError := input.delayedLoadError 263*9e12e8edScz4e if(latch) lateKill := RegEnable(input.lateKill, enable) else lateKill := input.lateKill 264*9e12e8edScz4e if(latch) feedbacked := RegEnable(input.feedbacked, enable) else feedbacked := input.feedbacked 265*9e12e8edScz4e if(latch) isvec := RegEnable(input.isvec, enable) else isvec := input.isvec 266*9e12e8edScz4e if(latch) is128bit := RegEnable(input.is128bit, enable) else is128bit := input.is128bit 267*9e12e8edScz4e if(latch) vecActive := RegEnable(input.vecActive, enable) else vecActive := input.vecActive 268*9e12e8edScz4e if(latch) uop_unit_stride_fof := RegEnable(input.uop_unit_stride_fof, enable) else uop_unit_stride_fof := input.uop_unit_stride_fof 269*9e12e8edScz4e if(latch) reg_offset := RegEnable(input.reg_offset, enable) else reg_offset := input.reg_offset 270*9e12e8edScz4e if(latch) mbIndex := RegEnable(input.mbIndex, enable) else mbIndex := input.mbIndex 271*9e12e8edScz4e if(latch) elemIdxInsideVd := RegEnable(input.elemIdxInsideVd, enable) else elemIdxInsideVd := input.elemIdxInsideVd 272*9e12e8edScz4e 273*9e12e8edScz4e rep_info := DontCare 274*9e12e8edScz4e data_wen_dup := DontCare 275*9e12e8edScz4e } 276*9e12e8edScz4e } 277*9e12e8edScz4e 278*9e12e8edScz4e class SqWriteBundle(implicit p: Parameters) extends LsPipelineBundle { 279*9e12e8edScz4e val need_rep = Bool() 280*9e12e8edScz4e } 281*9e12e8edScz4e 282*9e12e8edScz4e class LoadForwardQueryIO(implicit p: Parameters) extends XSBundle { 283*9e12e8edScz4e val vaddr = Output(UInt(VAddrBits.W)) 284*9e12e8edScz4e val paddr = Output(UInt(PAddrBits.W)) 285*9e12e8edScz4e val mask = Output(UInt((VLEN/8).W)) 286*9e12e8edScz4e val uop = Output(new DynInst) // for replay 287*9e12e8edScz4e val pc = Output(UInt(VAddrBits.W)) //for debug 288*9e12e8edScz4e val valid = Output(Bool()) 289*9e12e8edScz4e 290*9e12e8edScz4e val forwardMaskFast = Input(Vec((VLEN/8), Bool())) // resp to load_s1 291*9e12e8edScz4e val forwardMask = Input(Vec((VLEN/8), Bool())) // resp to load_s2 292*9e12e8edScz4e val forwardData = Input(Vec((VLEN/8), UInt(8.W))) // resp to load_s2 293*9e12e8edScz4e 294*9e12e8edScz4e // val lqIdx = Output(UInt(LoadQueueIdxWidth.W)) 295*9e12e8edScz4e val sqIdx = Output(new SqPtr) 296*9e12e8edScz4e 297*9e12e8edScz4e // dataInvalid suggests store to load forward found forward should happen, 298*9e12e8edScz4e // but data is not available for now. If dataInvalid, load inst should 299*9e12e8edScz4e // be replayed from RS. Feedback type should be RSFeedbackType.dataInvalid 300*9e12e8edScz4e val dataInvalid = Input(Bool()) // Addr match, but data is not valid for now 301*9e12e8edScz4e 302*9e12e8edScz4e // matchInvalid suggests in store to load forward logic, paddr cam result does 303*9e12e8edScz4e // to equal to vaddr cam result. If matchInvalid, a microarchitectural exception 304*9e12e8edScz4e // should be raised to flush SQ and committed sbuffer. 305*9e12e8edScz4e val matchInvalid = Input(Bool()) // resp to load_s2 306*9e12e8edScz4e 307*9e12e8edScz4e // addrInvalid suggests store to load forward found forward should happen, 308*9e12e8edScz4e // but address (SSID) is not available for now. If addrInvalid, load inst should 309*9e12e8edScz4e // be replayed from RS. Feedback type should be RSFeedbackType.addrInvalid 310*9e12e8edScz4e val addrInvalid = Input(Bool()) 311*9e12e8edScz4e } 312*9e12e8edScz4e 313*9e12e8edScz4e // LoadForwardQueryIO used in load pipeline 314*9e12e8edScz4e // 315*9e12e8edScz4e // Difference between PipeLoadForwardQueryIO and LoadForwardQueryIO: 316*9e12e8edScz4e // PipeIO use predecoded sqIdxMask for better forward timing 317*9e12e8edScz4e class PipeLoadForwardQueryIO(implicit p: Parameters) extends LoadForwardQueryIO { 318*9e12e8edScz4e // val sqIdx = Output(new SqPtr) // for debug, should not be used in pipeline for timing reasons 319*9e12e8edScz4e // sqIdxMask is calcuated in earlier stage for better timing 320*9e12e8edScz4e val sqIdxMask = Output(UInt(StoreQueueSize.W)) 321*9e12e8edScz4e 322*9e12e8edScz4e // dataInvalid: addr match, but data is not valid for now 323*9e12e8edScz4e val dataInvalidFast = Input(Bool()) // resp to load_s1 324*9e12e8edScz4e // val dataInvalid = Input(Bool()) // resp to load_s2 325*9e12e8edScz4e val dataInvalidSqIdx = Input(new SqPtr) // resp to load_s2, sqIdx 326*9e12e8edScz4e val addrInvalidSqIdx = Input(new SqPtr) // resp to load_s2, sqIdx 327*9e12e8edScz4e } 328*9e12e8edScz4e 329*9e12e8edScz4e // Query load queue for ld-ld violation 330*9e12e8edScz4e // 331*9e12e8edScz4e // Req should be send in load_s1 332*9e12e8edScz4e // Resp will be generated 1 cycle later 333*9e12e8edScz4e // 334*9e12e8edScz4e // Note that query req may be !ready, as dcache is releasing a block 335*9e12e8edScz4e // If it happens, a replay from rs is needed. 336*9e12e8edScz4e class LoadNukeQueryReq(implicit p: Parameters) extends XSBundle { // provide lqIdx 337*9e12e8edScz4e val uop = new DynInst 338*9e12e8edScz4e // mask: load's data mask. 339*9e12e8edScz4e val mask = UInt((VLEN/8).W) 340*9e12e8edScz4e 341*9e12e8edScz4e // paddr: load's paddr. 342*9e12e8edScz4e val paddr = UInt(PAddrBits.W) 343*9e12e8edScz4e // dataInvalid: load data is invalid. 344*9e12e8edScz4e val data_valid = Bool() 345*9e12e8edScz4e // nc: is NC access 346*9e12e8edScz4e val is_nc = Bool() 347*9e12e8edScz4e } 348*9e12e8edScz4e 349*9e12e8edScz4e class LoadNukeQueryResp(implicit p: Parameters) extends XSBundle { 350*9e12e8edScz4e // rep_frm_fetch: ld-ld violation check success, replay from fetch. 351*9e12e8edScz4e val rep_frm_fetch = Bool() 352*9e12e8edScz4e } 353*9e12e8edScz4e 354*9e12e8edScz4e class LoadNukeQueryIO(implicit p: Parameters) extends XSBundle { 355*9e12e8edScz4e val req = Decoupled(new LoadNukeQueryReq) 356*9e12e8edScz4e val resp = Flipped(Valid(new LoadNukeQueryResp)) 357*9e12e8edScz4e val revoke = Output(Bool()) 358*9e12e8edScz4e } 359*9e12e8edScz4e 360*9e12e8edScz4e class StoreNukeQueryIO(implicit p: Parameters) extends XSBundle { 361*9e12e8edScz4e // robIdx: Requestor's (a store instruction) rob index for match logic. 362*9e12e8edScz4e val robIdx = new RobPtr 363*9e12e8edScz4e 364*9e12e8edScz4e // paddr: requestor's (a store instruction) physical address for match logic. 365*9e12e8edScz4e val paddr = UInt(PAddrBits.W) 366*9e12e8edScz4e 367*9e12e8edScz4e // mask: requestor's (a store instruction) data width mask for match logic. 368*9e12e8edScz4e val mask = UInt((VLEN/8).W) 369*9e12e8edScz4e 370*9e12e8edScz4e // matchLine: if store is vector 128-bits, load unit need to compare 128-bits vaddr. 371*9e12e8edScz4e val matchLine = Bool() 372*9e12e8edScz4e } 373*9e12e8edScz4e 374*9e12e8edScz4e class StoreMaBufToSqControlIO(implicit p: Parameters) extends XSBundle { 375*9e12e8edScz4e // from storeMisalignBuffer to storeQueue, control it's sbuffer write 376*9e12e8edScz4e val toStoreQueue = Output(new XSBundle { 377*9e12e8edScz4e // This entry is a cross page 378*9e12e8edScz4e val crossPageWithHit = Bool() 379*9e12e8edScz4e val crossPageCanDeq = Bool() 380*9e12e8edScz4e // High page Paddr 381*9e12e8edScz4e val paddr = UInt(PAddrBits.W) 382*9e12e8edScz4e 383*9e12e8edScz4e val withSameUop = Bool() 384*9e12e8edScz4e }) 385*9e12e8edScz4e // from storeQueue to storeMisalignBuffer, provide detail info of this store 386*9e12e8edScz4e val toStoreMisalignBuffer = Input(new XSBundle { 387*9e12e8edScz4e val sqPtr = new SqPtr 388*9e12e8edScz4e val doDeq = Bool() 389*9e12e8edScz4e 390*9e12e8edScz4e val uop = new DynInst() 391*9e12e8edScz4e }) 392*9e12e8edScz4e } 393*9e12e8edScz4e 394*9e12e8edScz4e class StoreMaBufToVecStoreMergeBufferIO(implicit p: Parameters) extends VLSUBundle{ 395*9e12e8edScz4e val mbIndex = Output(UInt(vsmBindexBits.W)) 396*9e12e8edScz4e val flush = Output(Bool()) 397*9e12e8edScz4e } 398*9e12e8edScz4e 399*9e12e8edScz4e // Store byte valid mask write bundle 400*9e12e8edScz4e // 401*9e12e8edScz4e // Store byte valid mask write to SQ takes 2 cycles 402*9e12e8edScz4e class StoreMaskBundle(implicit p: Parameters) extends XSBundle { 403*9e12e8edScz4e val sqIdx = new SqPtr 404*9e12e8edScz4e val mask = UInt((VLEN/8).W) 405*9e12e8edScz4e } 406*9e12e8edScz4e 407*9e12e8edScz4e class LoadDataFromDcacheBundle(implicit p: Parameters) extends DCacheBundle { 408*9e12e8edScz4e // old dcache: optimize data sram read fanout 409*9e12e8edScz4e // val bankedDcacheData = Vec(DCacheBanks, UInt(64.W)) 410*9e12e8edScz4e // val bank_oh = UInt(DCacheBanks.W) 411*9e12e8edScz4e 412*9e12e8edScz4e // new dcache 413*9e12e8edScz4e val respDcacheData = UInt(VLEN.W) 414*9e12e8edScz4e val forwardMask = Vec(VLEN/8, Bool()) 415*9e12e8edScz4e val forwardData = Vec(VLEN/8, UInt(8.W)) 416*9e12e8edScz4e val uop = new DynInst // for data selection, only fwen and fuOpType are used 417*9e12e8edScz4e val addrOffset = UInt(4.W) // for data selection 418*9e12e8edScz4e 419*9e12e8edScz4e // forward tilelink D channel 420*9e12e8edScz4e val forward_D = Bool() 421*9e12e8edScz4e val forwardData_D = Vec(VLEN/8, UInt(8.W)) 422*9e12e8edScz4e 423*9e12e8edScz4e // forward mshr data 424*9e12e8edScz4e val forward_mshr = Bool() 425*9e12e8edScz4e val forwardData_mshr = Vec(VLEN/8, UInt(8.W)) 426*9e12e8edScz4e 427*9e12e8edScz4e val forward_result_valid = Bool() 428*9e12e8edScz4e 429*9e12e8edScz4e def mergeTLData(): UInt = { 430*9e12e8edScz4e // merge TL D or MSHR data at load s2 431*9e12e8edScz4e val dcache_data = respDcacheData 432*9e12e8edScz4e val use_D = forward_D && forward_result_valid 433*9e12e8edScz4e val use_mshr = forward_mshr && forward_result_valid 434*9e12e8edScz4e Mux( 435*9e12e8edScz4e use_D || use_mshr, 436*9e12e8edScz4e Mux( 437*9e12e8edScz4e use_D, 438*9e12e8edScz4e forwardData_D.asUInt, 439*9e12e8edScz4e forwardData_mshr.asUInt 440*9e12e8edScz4e ), 441*9e12e8edScz4e dcache_data 442*9e12e8edScz4e ) 443*9e12e8edScz4e } 444*9e12e8edScz4e 445*9e12e8edScz4e def mergeLsqFwdData(dcacheData: UInt): UInt = { 446*9e12e8edScz4e // merge dcache and lsq forward data at load s3 447*9e12e8edScz4e val rdataVec = VecInit((0 until VLEN / 8).map(j => 448*9e12e8edScz4e Mux(forwardMask(j), forwardData(j), dcacheData(8*(j+1)-1, 8*j)) 449*9e12e8edScz4e )) 450*9e12e8edScz4e rdataVec.asUInt 451*9e12e8edScz4e } 452*9e12e8edScz4e } 453*9e12e8edScz4e 454*9e12e8edScz4e // Load writeback data from load queue (refill) 455*9e12e8edScz4e class LoadDataFromLQBundle(implicit p: Parameters) extends XSBundle { 456*9e12e8edScz4e val lqData = UInt(64.W) // load queue has merged data 457*9e12e8edScz4e val uop = new DynInst // for data selection, only fwen and fuOpType are used 458*9e12e8edScz4e val addrOffset = UInt(3.W) // for data selection 459*9e12e8edScz4e 460*9e12e8edScz4e def mergedData(): UInt = { 461*9e12e8edScz4e lqData 462*9e12e8edScz4e } 463*9e12e8edScz4e } 464*9e12e8edScz4e 465*9e12e8edScz4e // Bundle for load / store wait waking up 466*9e12e8edScz4e class MemWaitUpdateReq(implicit p: Parameters) extends XSBundle { 467*9e12e8edScz4e val robIdx = Vec(backendParams.StaExuCnt, ValidIO(new RobPtr)) 468*9e12e8edScz4e val sqIdx = Vec(backendParams.StdCnt, ValidIO(new SqPtr)) 469*9e12e8edScz4e } 470*9e12e8edScz4e 471*9e12e8edScz4e} 472