1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27import xiangshan.mem._ 28import xiangshan.backend.fu.vector.Bundles._ 29 30 31class VSplitPipeline(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{ 32 val io = IO(new VSplitPipelineIO(isVStore)) 33 34 def us_whole_reg(fuOpType: UInt) = fuOpType === VlduType.vlr 35 def us_mask(fuOpType: UInt) = fuOpType === VlduType.vlm 36 def us_fof(fuOpType: UInt) = fuOpType === VlduType.vleff 37 38 val s1_ready = WireInit(false.B) 39 io.in.ready := s1_ready 40 41 /**----------------------------------------------------------- 42 * s0 stage 43 * decode and generate AlignedType, uop mask, preIsSplit 44 * ---------------------------------------------------------- 45 */ 46 val s0_vtype = io.in.bits.uop.vpu.vtype 47 val s0_sew = s0_vtype.vsew 48 val s0_eew = io.in.bits.uop.vpu.veew 49 val s0_lmul = s0_vtype.vlmul 50 // when load whole register or unit-stride masked , emul should be 1 51 val s0_fuOpType = io.in.bits.uop.fuOpType 52 val s0_mop = s0_fuOpType(6, 5) 53 val s0_nf = Mux(us_whole_reg(s0_fuOpType), 0.U, io.in.bits.uop.vpu.nf) 54 val s0_vm = io.in.bits.uop.vpu.vm 55 val s0_emul = Mux(us_whole_reg(s0_fuOpType) ,GenUSWholeEmul(io.in.bits.uop.vpu.nf), Mux(us_mask(s0_fuOpType), 0.U(mulBits.W), EewLog2(s0_eew) - s0_sew + s0_lmul)) 56 val s0_preIsSplit = !(isUnitStride(s0_mop) && !us_fof(s0_fuOpType)) 57 58 val s0_valid = Wire(Bool()) 59 val s0_kill = io.in.bits.uop.robIdx.needFlush(io.redirect) 60 val s0_can_go = s1_ready 61 val s0_fire = s0_valid && s0_can_go 62 val s0_out = Wire(new VLSBundle(isVStore)) 63 64 val isUsWholeReg = isUnitStride(s0_mop) && us_whole_reg(s0_fuOpType) 65 val isMaskReg = isUnitStride(s0_mop) && us_mask(s0_fuOpType) 66 val isSegment = s0_nf =/= 0.U && !us_whole_reg(s0_fuOpType) 67 val instType = Cat(isSegment, s0_mop) 68 val uopIdx = io.in.bits.uop.vpu.vuopIdx 69 val uopIdxInField = GenUopIdxInField(instType, s0_emul, s0_lmul, uopIdx) 70 val vdIdxInField = GenVdIdxInField(instType, s0_emul, s0_lmul, uopIdxInField) 71 val lmulLog2 = Mux(s0_lmul.asSInt >= 0.S, 0.U, s0_lmul) 72 val emulLog2 = Mux(s0_emul.asSInt >= 0.S, 0.U, s0_emul) 73 val numEewLog2 = emulLog2 - EewLog2(s0_eew) 74 val numSewLog2 = lmulLog2 - s0_sew 75 val numFlowsSameVdLog2 = Mux( 76 isIndexed(instType), 77 log2Up(VLENB).U - s0_sew(1,0), 78 log2Up(VLENB).U - s0_eew(1,0) 79 ) 80 // numUops = nf * max(lmul, emul) 81 val lmulLog2Pos = Mux(s0_lmul.asSInt < 0.S, 0.U, s0_lmul) 82 val emulLog2Pos = Mux(s0_emul.asSInt < 0.S, 0.U, s0_emul) 83 val numUops = Mux( 84 isIndexed(s0_mop) && s0_lmul.asSInt > s0_emul.asSInt, 85 (s0_nf +& 1.U) << lmulLog2Pos, 86 (s0_nf +& 1.U) << emulLog2Pos 87 ) 88 89 val vvl = io.in.bits.src_vl.asTypeOf(VConfig()).vl 90 val evl = Mux(isUsWholeReg, GenUSWholeRegVL(io.in.bits.uop.vpu.nf +& 1.U,s0_eew), Mux(isMaskReg, GenUSMaskRegVL(vvl), vvl)) 91 val vvstart = io.in.bits.uop.vpu.vstart 92 val alignedType = Mux(isIndexed(instType), s0_sew(1, 0), s0_eew(1, 0)) 93 val broadenAligendType = Mux(s0_preIsSplit, Cat("b0".U, alignedType), "b100".U) // if is unit-stride, use 128-bits memory access 94 val flowsLog2 = GenRealFlowLog2(instType, s0_emul, s0_lmul, s0_eew, s0_sew) 95 val flowsPrevThisUop = uopIdxInField << flowsLog2 // # of flows before this uop in a field 96 val flowsPrevThisVd = vdIdxInField << numFlowsSameVdLog2 // # of flows before this vd in a field 97 val flowsIncludeThisUop = (uopIdxInField +& 1.U) << flowsLog2 // # of flows before this uop besides this uop 98 val flowNum = io.in.bits.flowNum.get 99 val srcMask = GenFlowMask(Mux(s0_vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vvstart, evl, true) 100 101 val flowMask = ((srcMask & 102 UIntToMask(flowsIncludeThisUop.asUInt, VLEN + 1) & 103 (~UIntToMask(flowsPrevThisUop.asUInt, VLEN)).asUInt 104 ) >> flowsPrevThisVd)(VLENB - 1, 0) 105 val vlmax = GenVLMAX(s0_lmul, s0_sew) 106 107 // connect 108 s0_out := DontCare 109 s0_out match {case x => 110 x.uop := io.in.bits.uop 111 x.uop.vpu.vl := evl 112 x.uop.uopIdx := uopIdx 113 x.uop.numUops := numUops 114 x.uop.lastUop := (uopIdx +& 1.U) === numUops 115 x.flowMask := flowMask 116 x.byteMask := GenUopByteMask(flowMask, broadenAligendType)(VLENB - 1, 0) 117 x.fof := isUnitStride(s0_mop) && us_fof(s0_fuOpType) 118 x.baseAddr := io.in.bits.src_rs1 119 x.stride := io.in.bits.src_stride 120 x.flowNum := (1.U << flowNum) 121 x.nfields := s0_nf +& 1.U 122 x.vm := s0_vm 123 x.usWholeReg := isUsWholeReg 124 x.usMaskReg := isMaskReg 125 x.eew := s0_eew 126 x.sew := s0_sew 127 x.emul := s0_emul 128 x.lmul := s0_lmul 129 x.vlmax := Mux(isUsWholeReg, evl, vlmax) 130 x.instType := instType 131 x.data := io.in.bits.src_vs3 132 x.vdIdxInField := vdIdxInField 133 x.preIsSplit := s0_preIsSplit 134 x.alignedType := broadenAligendType 135 } 136 s0_valid := io.in.valid && !s0_kill 137 /**------------------------------------- 138 * s1 stage 139 * ------------------------------------ 140 * generate UopOffset 141 */ 142 val s1_valid = RegInit(false.B) 143 val s1_kill = Wire(Bool()) 144 val s1_in = Wire(new VLSBundle(isVStore)) 145 val s1_can_go = io.out.ready && io.toMergeBuffer.resp.valid 146 val s1_fire = s1_valid && !s1_kill && s1_can_go 147 148 s1_ready := s1_kill || !s1_valid || io.out.ready 149 150 when(s0_fire){ 151 s1_valid := true.B 152 }.elsewhen(s1_fire){ 153 s1_valid := false.B 154 }.elsewhen(s1_kill){ 155 s1_valid := false.B 156 } 157 s1_in := RegEnable(s0_out, s0_fire) 158 159 val s1_uopidx = s1_in.uop.vpu.vuopIdx 160 val s1_nf = s1_in.uop.vpu.nf 161 val s1_nfields = s1_in.nfields 162 val s1_eew = s1_in.eew 163 val s1_instType = s1_in.instType 164 val s1_stride = s1_in.stride 165 val s1_alignedType = Mux(isIndexed(s1_in.instType), s1_in.sew(1, 0), s1_in.eew(1, 0)) 166 val s1_notIndexedStride = Mux( // stride for strided/unit-stride instruction 167 isStrided(s1_instType), 168 s1_stride(XLEN - 1, 0), // for strided load, stride = x[rs2] 169 s1_nfields << s1_eew(1, 0) // for unit-stride load, stride = eew * NFIELDS 170 ) 171 val uopOffset = (s1_uopidx >> s1_nf) << s1_alignedType 172 val stride = Mux(isIndexed(s1_instType), s1_stride, s1_notIndexedStride) // if is index instructions, get index when split 173 174 s1_kill := s1_in.uop.robIdx.needFlush(io.redirect) 175 176 // query mergeBuffer 177 io.toMergeBuffer.req.valid := s1_fire // only can_go will get MergeBuffer entry 178 io.toMergeBuffer.req.bits.flowNum := Mux(s1_in.preIsSplit, 1.U << flowNum, PopCount(s1_in.flowMask)) 179 io.toMergeBuffer.req.bits.data := s1_in.data 180 io.toMergeBuffer.req.bits.uop := s1_in.uop 181 io.toMergeBuffer.req.bits.mask := flowMask 182 io.toMergeBuffer.req.bits.vaddr := DontCare 183// io.toMergeBuffer.req.bits.vdOffset := 184 185 // out connect 186 io.out.valid := s1_valid 187 io.out.bits := s1_in 188 io.out.bits.uopOffset := uopOffset 189 io.out.bits.stride := stride 190 io.out.bits.mBIndex := io.toMergeBuffer.resp.bits.mBIndex 191} 192 193abstract class VSplitBuffer(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{ 194 val io = IO(new VSplitBufferIO(isVStore)) 195 196 val bufferSize: Int 197 198 class VSplitPtr(implicit p: Parameters) extends CircularQueuePtr[VSplitPtr](bufferSize){ 199 } 200 201 object VSplitPtr { 202 def apply(f: Bool, v: UInt)(implicit p: Parameters): VSplitPtr = { 203 val ptr = Wire(new VSplitPtr) 204 ptr.flag := f 205 ptr.value := v 206 ptr 207 } 208 } 209 210 val uopq = Reg(Vec(bufferSize, new VLSBundle(isVStore))) 211 val valid = RegInit(VecInit(Seq.fill(bufferSize)(false.B))) 212 val vstart = RegInit(VecInit(Seq.fill(bufferSize)(0.U(elemIdxBits.W)))) // index of the exception element 213 val vl = RegInit(VecInit(Seq.fill(bufferSize)(0.U.asTypeOf(Valid(UInt(elemIdxBits.W)))))) // only for fof instructions that modify vl 214 val srcMaskVec = Reg(Vec(bufferSize, UInt(VLEN.W))) 215 // ptr 216 val enqPtr = RegInit(0.U.asTypeOf(new VSplitPtr)) 217 val deqPtr = RegInit(0.U.asTypeOf(new VSplitPtr)) 218 // for split 219 val splitIdx = RegInit(0.U(flowIdxBits.W)) 220 val strideOffsetReg = RegInit(0.U(VLEN.W)) 221 222 /** 223 * Redirect 224 */ 225 val flushed = WireInit(VecInit(Seq.fill(bufferSize)(false.B))) // entry has been flushed by the redirect arrived in the pre 1 cycle 226 val flushVec = (valid zip flushed).zip(uopq).map { case ((v, f), entry) => v && entry.uop.robIdx.needFlush(io.redirect) && !f } 227 val flushEnq = io.in.fire && io.in.bits.uop.robIdx.needFlush(io.redirect) 228 val flushNumReg = RegNext(PopCount(flushEnq +: flushVec)) 229 val redirectReg = RegNext(io.redirect) 230 val flushVecReg = RegNext(WireInit(VecInit(flushVec))) 231 232 // enqueue 233 when (io.in.fire && !flushEnq) { 234 val id = enqPtr.value 235 uopq(id) := io.in.bits 236 valid(id) := true.B 237 } 238 io.in.ready := isAfter(enqPtr, deqPtr) 239 240 //split uops 241 val issueValid = valid(deqPtr.value) 242 val issueEntry = uopq(deqPtr.value) 243 val issueMbIndex = uopq(deqPtr.value).mBIndex 244 val issueFlowNum = issueEntry.flowNum 245 val issueBaseAddr = issueEntry.baseAddr 246 val issueUop = issueEntry.uop 247 val issueUopIdx = issueUop.vpu.vuopIdx 248 val issueInstType = issueEntry.instType 249 val issueUopOffset = issueEntry.uopOffset 250 val issueEew = issueEntry.eew 251 val issueSew = issueEntry.sew 252 val issueLmul = issueEntry.emul 253 val issueEmul = issueEntry.lmul 254 val issueAlignedType = issueEntry.alignedType 255 val issuePreIsSplit = issueEntry.preIsSplit 256 val issueByteMask = issueEntry.byteMask 257 val elemIdx = GenElemIdx( 258 instType = issueInstType, 259 emul = issueEmul, 260 lmul = issueLmul, 261 eew = issueEew, 262 sew = issueSew, 263 uopIdx = issueUopIdx, 264 flowIdx = splitIdx 265 ) // elemIdx inside an inst, for exception 266 val indexedStride = IndexAddr( // index for indexed instruction 267 index = issueEntry.stride, 268 flow_inner_idx = ((splitIdx << issueEew(1, 0))(vOffsetBits - 1, 0) >> issueEew(1, 0)).asUInt, 269 eew = issueEew 270 ) 271 val issueStride = Mux(isIndexed(issueInstType), indexedStride, strideOffsetReg) 272 val vaddr = issueBaseAddr + issueUopOffset + issueStride 273 val mask = genVWmask128(vaddr ,issueAlignedType) // scala maske for flow 274 val flowMask = issueEntry.flowMask 275 val vecActive = (flowMask & UIntToOH(splitIdx)).orR 276 /* 277 * Unit-Stride split to one flow or two flow. 278 * for Unit-Stride, if uop's addr is aligned with 128-bits, split it to one flow, otherwise split two 279 */ 280 281 val usAligned128 = (vaddr(3,0) === 0.U)// addr 128-bit aligned 282 val usSplitMask = genUSSplitMask(issueByteMask, splitIdx, vaddr(3,0)) 283 val usNoSplit = (usAligned128 || !(vaddr(3,0) +& PopCount(usSplitMask))(4)) && !issuePreIsSplit && (splitIdx === 0.U)// unit-stride uop don't need to split into two flow 284 val usSplitVaddr = genUSSplitAddr(vaddr, splitIdx) 285 val regOffset = vaddr(3,0) // offset in 256-bits vd 286 XSError((splitIdx > 1.U && usNoSplit) || (splitIdx > 1.U && !issuePreIsSplit) , "Unit-Stride addr split error!\n") 287 288 // data 289 io.out.bits match { case x => 290 x.uop := issueUop 291 x.vaddr := Mux(issuePreIsSplit, usSplitVaddr, vaddr) 292 x.alignedType := issueAlignedType 293 x.isvec := true.B 294 x.mask := Mux(issuePreIsSplit, usSplitMask, mask) 295 x.reg_offset := regOffset //for merge unit-stride 296 x.vecActive := vecActive 297 x.is_first_ele := DontCare 298 x.usSecondInv := usNoSplit 299 x.elemIdx := elemIdx 300 x.uop_unit_stride_fof := DontCare 301 x.isFirstIssue := DontCare 302 x.mBIndex := issueMbIndex 303 } 304 305 //update enqptr 306 when (redirectReg.valid && flushNumReg =/= 0.U) { 307 enqPtr := enqPtr - flushNumReg 308 }.otherwise { 309 when (io.in.fire) { 310 enqPtr := enqPtr + 1.U 311 } 312 } 313 314 // flush queue 315 for (i <- 0 until bufferSize) { 316 when(flushVecReg(i) && redirectReg.valid && flushNumReg =/= 0.U) { 317 valid(i) := false.B 318 flushed(i) := true.B 319 } 320 } 321 322 /* Execute logic */ 323 /** Issue to scala pipeline**/ 324 val canIssue = Wire(Bool()) 325 val allowIssue = io.out.ready 326 val doIssue = Wire(Bool()) 327 val issueCount = Mux(usNoSplit, 2.U,PopCount(doIssue)) // for dont need split unit-stride, issue two flow 328 329 // handshake 330 val thisPtr = deqPtr.value 331 canIssue := !issueUop.robIdx.needFlush(io.redirect) && deqPtr < enqPtr 332 doIssue := canIssue && allowIssue 333 when (!RegNext(io.redirect.valid) || distanceBetween(enqPtr, deqPtr) > flushNumReg) { 334 when (splitIdx < (issueFlowNum - issueCount)) { 335 // The uop has not been entirly splited yet 336 splitIdx := splitIdx + issueCount 337 strideOffsetReg := strideOffsetReg + issueStride 338 }.otherwise { 339 when (doIssue) { 340 // The uop is done spliting 341 splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx 342 strideOffsetReg := 0.U 343 deqPtr := deqPtr + 1.U 344 } 345 } 346 }.otherwise { 347 splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx 348 strideOffsetReg := 0.U 349 } 350 351 // out connect 352 io.out.valid := canIssue && vecActive 353} 354 355class VSSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = true){ 356 override lazy val bufferSize = SplitBufferSize 357 // split data 358 val flowData = GenVSData( 359 data = issueEntry.data.asUInt, 360 elemIdx = splitIdx, 361 alignedType = issueAlignedType 362 ) 363 val usSplitData = genUSSplitData(issueEntry.data.asUInt, splitIdx, vaddr(3,0)) 364 365 // send data to sq 366 val vstd = io.vstd.get 367 vstd.valid := canIssue 368 vstd.bits.uop := issueUop 369 vstd.bits.data := Mux(issuePreIsSplit, usSplitData, flowData) 370 vstd.bits.debug := DontCare 371 vstd.bits.vdIdx.get := DontCare 372 vstd.bits.vdIdxInField.get := DontCare 373} 374 375class VLSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = false){ 376 override lazy val bufferSize = SplitBufferSize 377} 378 379class VSSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = true){ 380} 381 382class VLSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = false){ 383} 384 385class VLSplitImp(implicit p: Parameters) extends VLSUModule{ 386 val io = IO(new VSplitIO(isVStore=false)) 387 val splitPipeline = Module(new VLSplitPipelineImp()) 388 val splitBuffer = Module(new VLSplitBufferImp()) 389 // Split Pipeline 390 splitPipeline.io.in <> io.in 391 splitPipeline.io.redirect <> io.redirect 392 io.toMergeBuffer <> splitPipeline.io.toMergeBuffer 393 394 // Split Buffer 395 splitBuffer.io.in <> splitPipeline.io.out 396 splitBuffer.io.redirect <> io.redirect 397 io.out <> splitBuffer.io.out 398} 399 400class VSSplitImp(implicit p: Parameters) extends VLSUModule{ 401 val io = IO(new VSplitIO(isVStore=true)) 402 val splitPipeline = Module(new VSSplitPipelineImp()) 403 val splitBuffer = Module(new VSSplitBufferImp()) 404 // Split Pipeline 405 splitPipeline.io.in <> io.in 406 splitPipeline.io.redirect <> io.redirect 407 io.toMergeBuffer <> splitPipeline.io.toMergeBuffer 408 409 // Split Buffer 410 splitBuffer.io.in <> splitPipeline.io.out 411 splitBuffer.io.redirect <> io.redirect 412 io.out <> splitBuffer.io.out 413 io.vstd.get <> splitBuffer.io.vstd.get 414} 415 416