1package xiangshan.backend.issue 2 3import chisel3._ 4import chisel3.util._ 5import xiangshan._ 6import xiangshan.utils._ 7 8trait IQConst{ 9 val iqSize = 8 10 val iqIdxWidth = log2Up(iqSize) 11 val layer1Size = iqSize 12 val layer2Size = iqSize/2 13 val debug = false 14} 15 16sealed abstract class IQBundle extends XSBundle with IQConst 17sealed abstract class IQModule extends XSModule with IQConst with NeedImpl 18 19sealed class CmpInputBundle extends IQBundle{ 20 val instRdy = Input(Bool()) 21 val roqIdx = Input(UInt(RoqIdxWidth.W)) 22 val iqIdx = Input(UInt(iqIdxWidth.W)) 23 24 def apply(instRdy: Bool,roqIdx: UInt,iqIdx: UInt ) = { 25 this.instRdy := instRdy 26 this.roqIdx := roqIdx 27 this.iqIdx := iqIdx 28 this 29 } 30} 31 32 33sealed class CompareCircuitUnit extends IQModule { 34 val io = IO(new Bundle(){ 35 val in1 = new CmpInputBundle 36 val in2 = new CmpInputBundle 37 val out = Flipped(new CmpInputBundle) 38 }) 39 40 val roqIdx1 = io.in1.roqIdx 41 val roqIdx2 = io.in2.roqIdx 42 val iqIdx1 = io.in1.iqIdx 43 val iqIdx2 = io.in2.iqIdx 44 45 val inst1Rdy = io.in1.instRdy 46 val inst2Rdy = io.in2.instRdy 47 48 io.out.instRdy := inst1Rdy | inst2Rdy 49 io.out.roqIdx := roqIdx2 50 io.out.iqIdx := iqIdx2 51 52 when((inst1Rdy && !inst2Rdy) || (inst1Rdy && inst2Rdy && (roqIdx1 < roqIdx2))){ 53 io.out.roqIdx := roqIdx1 54 io.out.iqIdx := iqIdx1 55 } 56 // if(debug && (layer==3)) { 57 // printf("(%d)[CCU(L%did%d)] in1.ready:%d in1.index:%d || in1.ready:%d in1.index:%d || out.ready:%d out.index:%d\n",GTimer(),layer.asUInt,id.asUInt,inst1Rdy,iqIdx1,inst2Rdy,iqIdx2,io.out.instRdy,io.out.iqIdx) 58 // } 59 60 61} 62 63object CCU{ 64 def apply(in1: CmpInputBundle, in2: CmpInputBundle) = { 65 val CCU = Module(new CompareCircuitUnit) 66 CCU.io.in1 <> in1 67 CCU.io.in2 <> in2 68 CCU.io.out 69 } 70} 71 72object ParallelSel { 73 def apply(iq: Seq[CmpInputBundle]): CmpInputBundle = { 74 iq match { 75 case Seq(a) => a 76 case Seq(a, b) => CCU(a, b) 77 case _ => 78 apply(Seq(apply(iq take iq.size/2), apply(iq drop iq.size/2))) 79 } 80 } 81} 82 83class IssueQueue(val fuTypeInt: BigInt, val wakeupCnt: Int, val bypassCnt: Int = 0, val fixedDelay: Int = 1) extends IQModule { 84 85 val useBypass = bypassCnt > 0 86 87 val io = IO(new Bundle() { 88 // flush Issue Queue 89 val redirect = Flipped(ValidIO(new Redirect)) 90 91 // enq Ctrl sigs at dispatch-2 92 val enqCtrl = Flipped(DecoupledIO(new MicroOp)) 93 // enq Data at next cycle (regfile has 1 cycle latency) 94 val enqData = Flipped(ValidIO(new ExuInput)) 95 96 // broadcast selected uop to other issue queues which has bypasses 97 val selectedUop = if(useBypass) ValidIO(new MicroOp) else null 98 99 // send to exu 100 val deq = DecoupledIO(new ExuInput) 101 102 // listen to write back bus 103 val wakeUpPorts = Vec(wakeupCnt, Flipped(ValidIO(new ExuOutput))) 104 105 // use bypass uops to speculative wake-up 106 val bypassUops = if(useBypass) Vec(bypassCnt, Flipped(ValidIO(new MicroOp))) else null 107 val bypassData = if(useBypass) Vec(bypassCnt, Flipped(ValidIO(new ExuOutput))) else null 108 }) 109 //--------------------------------------------------------- 110 // Issue Queue 111 //--------------------------------------------------------- 112 113 //Tag Queue 114 val ctrlFlow = Mem(iqSize,new CtrlFlow) 115 val ctrlSig = Mem(iqSize,new CtrlSignals) 116 val brMask = RegInit(VecInit(Seq.fill(iqSize)(0.U(BrqSize.W)))) 117 val brTag = RegInit(VecInit(Seq.fill(iqSize)(0.U(BrTagWidth.W)))) 118 val validReg = RegInit(VecInit(Seq.fill(iqSize)(false.B))) 119 val validWillFalse= WireInit(VecInit(Seq.fill(iqSize)(false.B))) 120 val valid = validReg.asUInt & ~validWillFalse.asUInt 121 val src1Rdy = RegInit(VecInit(Seq.fill(iqSize)(false.B))) 122 val src2Rdy = RegInit(VecInit(Seq.fill(iqSize)(false.B))) 123 val src3Rdy = RegInit(VecInit(Seq.fill(iqSize)(false.B))) 124 val prfSrc1 = Reg(Vec(iqSize, UInt(PhyRegIdxWidth.W))) 125 val prfSrc2 = Reg(Vec(iqSize, UInt(PhyRegIdxWidth.W))) 126 val prfSrc3 = Reg(Vec(iqSize, UInt(PhyRegIdxWidth.W))) 127 val prfDest = Reg(Vec(iqSize, UInt(PhyRegIdxWidth.W))) 128 val oldPDest = Reg(Vec(iqSize, UInt(PhyRegIdxWidth.W))) 129 val freelistAllocPtr = Reg(Vec(iqSize, UInt(PhyRegIdxWidth.W))) 130 val roqIdx = Reg(Vec(iqSize, UInt(RoqIdxWidth.W))) 131 132 val instRdy = WireInit(VecInit(List.tabulate(iqSize)(i => src1Rdy(i) && src2Rdy(i) && src3Rdy(i)&& valid(i)))) 133 134 135 //tag enqueue 136 val iqEmty = !valid.asUInt.orR 137 val iqFull = valid.asUInt.andR 138 val iqAllowIn = !iqFull 139 io.enqCtrl.ready := iqAllowIn 140 141 //enqueue pointer 142 val emptySlot = ~valid.asUInt 143 val enqueueSelect = PriorityEncoder(emptySlot) 144 //assert(!(io.enqCtrl.valid && io.redirect.valid),"enqueue valid should be false when redirect valid") 145 XSError(io.enqCtrl.valid && io.redirect.valid,"enqueue valid should be false when redirect valid") 146 val srcEnqRdy = WireInit(VecInit(false.B, false.B, false.B)) 147 148 srcEnqRdy(0) := Mux(io.enqCtrl.bits.ctrl.src1Type =/= SrcType.reg , true.B ,io.enqCtrl.bits.src1State === SrcState.rdy) 149 srcEnqRdy(1) := Mux(io.enqCtrl.bits.ctrl.src2Type =/= SrcType.reg , true.B ,io.enqCtrl.bits.src2State === SrcState.rdy) 150 srcEnqRdy(2) := Mux(io.enqCtrl.bits.ctrl.src3Type =/= SrcType.reg , true.B ,io.enqCtrl.bits.src3State === SrcState.rdy) 151 152 when (io.enqCtrl.fire()) { 153 ctrlFlow(enqueueSelect) := io.enqCtrl.bits.cf 154 ctrlSig(enqueueSelect) := io.enqCtrl.bits.ctrl 155 brMask(enqueueSelect) := io.enqCtrl.bits.brMask 156 brTag(enqueueSelect) := io.enqCtrl.bits.brTag 157 validReg(enqueueSelect) := true.B 158 src1Rdy(enqueueSelect) := srcEnqRdy(0) 159 src2Rdy(enqueueSelect) := srcEnqRdy(1) 160 src3Rdy(enqueueSelect) := srcEnqRdy(2) 161 prfSrc1(enqueueSelect) := io.enqCtrl.bits.psrc1 162 prfSrc2(enqueueSelect) := io.enqCtrl.bits.psrc2 163 prfSrc3(enqueueSelect) := io.enqCtrl.bits.psrc3 164 prfDest(enqueueSelect) := io.enqCtrl.bits.pdest 165 oldPDest(enqueueSelect) := io.enqCtrl.bits.old_pdest 166 freelistAllocPtr(enqueueSelect) := io.enqCtrl.bits.freelistAllocPtr 167 roqIdx(enqueueSelect) := io.enqCtrl.bits.roqIdx 168 if(debug) {XSDebug("[IQ enq]: enqSelect:%d | s1Rd:%d s2Rd:%d s3Rd:%d\n",enqueueSelect.asUInt, 169 (io.enqCtrl.bits.src1State === SrcState.rdy), 170 (io.enqCtrl.bits.src2State === SrcState.rdy), 171 (io.enqCtrl.bits.src3State === SrcState.rdy))} 172 173 } 174 175 //Data Queue 176 val src1Data = Reg(Vec(iqSize, UInt(XLEN.W))) 177 val src2Data = Reg(Vec(iqSize, UInt(XLEN.W))) 178 val src3Data = Reg(Vec(iqSize, UInt(XLEN.W))) 179 180 181 val enqSelNext = RegNext(enqueueSelect) 182 val enqFireNext = RegNext(io.enqCtrl.fire()) 183 184 // Read RegFile 185 //Ready data will written at next cycle 186 when (enqFireNext) { 187 when(src1Rdy(enqSelNext)){src1Data(enqSelNext) := io.enqData.bits.src1} 188 when(src2Rdy(enqSelNext)){src2Data(enqSelNext) := io.enqData.bits.src2} 189 when(src3Rdy(enqSelNext)){src3Data(enqSelNext) := io.enqData.bits.src3} 190 } 191 192 if(debug) { 193 194 XSDebug("[Reg info-ENQ] enqSelNext:%d | enqFireNext:%d \n",enqSelNext,enqFireNext) 195 XSDebug("[IQ content] valid vr vf| pc insruction | src1rdy src1 | src2Rdy src2 pdest \n") 196 for(i <- 0 to (iqSize -1)){ 197 val ins = ctrlFlow(i).instr 198 val pc = ctrlFlow(i).pc 199 when(valid(i)){XSDebug("[IQ content][%d] %d%d%d |%x %x| %x %x | %x %x | %d valid|\n",i.asUInt, valid(i), validReg(i), validWillFalse(i), pc,ins,src1Rdy(i), src1Data(i), src2Rdy(i), src2Data(i),prfDest(i))} 200 .elsewhen(validReg(i) && validWillFalse(i)){XSDebug("[IQ content][%d] %d%d%d |%x %x| %x %x | %x %x | %d valid will be False|\n",i.asUInt, valid(i), validReg(i), validWillFalse(i),pc,ins, src1Rdy(i), src1Data(i), src2Rdy(i), src2Data(i),prfDest(i))} 201 .otherwise {XSDebug("[IQ content][%d] %d%d%d |%x %x| %x %x | %x %x | %d\n",i.asUInt, valid(i), validReg(i), validWillFalse(i),pc,ins, src1Rdy(i), src1Data(i), src2Rdy(i), src2Data(i),prfDest(i))} 202 203 } 204 } 205 // From Common Data Bus(wakeUpPort) 206 // chisel claims that firrtl will optimize Mux1H to and/or tree 207 // TODO: ignore ALU'cdb srcRdy, for byPass has done it 208 if(wakeupCnt > 0) { 209 val cdbValid = List.tabulate(wakeupCnt)(i => io.wakeUpPorts(i).valid) 210 val cdbData = List.tabulate(wakeupCnt)(i => io.wakeUpPorts(i).bits.data) 211 val cdbPdest = List.tabulate(wakeupCnt)(i => io.wakeUpPorts(i).bits.uop.pdest) 212 213 val srcNum = 3 214 val prfSrc = List(prfSrc1, prfSrc2, prfSrc3) 215 val srcRdy = List(src1Rdy, src2Rdy, src3Rdy) 216 val srcData = List(src1Data, src2Data, src3Data) 217 val srcHitVec = List.tabulate(srcNum)(k => 218 List.tabulate(iqSize)(i => 219 List.tabulate(wakeupCnt)(j => 220 (prfSrc(k)(i) === cdbPdest(j)) && cdbValid(j)))) 221 val srcHit = List.tabulate(srcNum)(k => 222 List.tabulate(iqSize)(i => 223 ParallelOR(srcHitVec(k)(i)).asBool())) 224 // VecInit(srcHitVec(k)(i)).asUInt.orR)) 225 for(k <- 0 until srcNum){ 226 for(i <- 0 until iqSize)( when (valid(i)) { 227 when(!srcRdy(k)(i) && srcHit(k)(i)) { 228 srcRdy(k)(i) := true.B 229 // srcData(k)(i) := Mux1H(srcHitVec(k)(i), cdbData) 230 srcData(k)(i) := ParallelMux(srcHitVec(k)(i) zip cdbData) 231 } 232 }) 233 } 234 // From byPass [speculative] (just for ALU to listen to other ALU's res, include itself) 235 // just need Tag(Ctrl). send out Tag when Tag is decided. other ALUIQ listen to them and decide Tag 236 // byPassUops is one cycle before byPassDatas 237 if (bypassCnt > 0) { 238 val bypassPdest = List.tabulate(bypassCnt)(i => io.bypassUops(i).bits.pdest) 239 val bypassValid = List.tabulate(bypassCnt)(i => io.bypassUops(i).valid) // may only need valid not fire() 240 val bypassData = List.tabulate(bypassCnt)(i => io.bypassData(i).bits.data) 241 val srcBpHitVec = List.tabulate(srcNum)(k => 242 List.tabulate(iqSize)(i => 243 List.tabulate(bypassCnt)(j => 244 (prfSrc(k)(i) === bypassPdest(j)) && bypassValid(j)))) 245 val srcBpHit = List.tabulate(srcNum)(k => 246 List.tabulate(iqSize)(i => 247 ParallelOR(srcBpHitVec(k)(i)).asBool())) 248 // VecInit(srcBpHitVec(k)(i)).asUInt.orR)) 249 val srcBpHitVecNext = List.tabulate(srcNum)(k => 250 List.tabulate(iqSize)(i => 251 List.tabulate(bypassCnt)(j => RegNext(srcBpHitVec(k)(i)(j))))) 252 val srcBpHitNext = List.tabulate(srcNum)(k => 253 List.tabulate(iqSize)(i => 254 RegNext(srcBpHit(k)(i)))) 255 val srcBpData = List.tabulate(srcNum)(k => 256 List.tabulate(iqSize)(i => 257 ParallelMux(srcBpHitVecNext(k)(i) zip bypassData))) 258 // Mux1H(srcBpHitVecNext(k)(i), bypassData))) 259 for(k <- 0 until srcNum){ 260 for(i <- 0 until iqSize){ when (valid(i)) { 261 when(valid(i) && !srcRdy(k)(i) && srcBpHit(k)(i)) { srcRdy(k)(i) := true.B } 262 when(srcBpHitNext(k)(i)) { srcData(k)(i) := srcBpData(k)(i)} 263 }} 264 } 265 266 // Enqueue Bypass 267 val enqBypass = WireInit(VecInit(false.B, false.B, false.B)) 268 val enqBypassHitVec = List(List.tabulate(bypassCnt)(j => io.enqCtrl.bits.psrc1 === bypassPdest(j) && bypassValid(j) && io.enqCtrl.fire()), 269 List.tabulate(bypassCnt)(j => io.enqCtrl.bits.psrc2 === bypassPdest(j) && bypassValid(j) && io.enqCtrl.fire()), 270 List.tabulate(bypassCnt)(j => io.enqCtrl.bits.psrc3 === bypassPdest(j) && bypassValid(j) && io.enqCtrl.fire())) 271 val enqBypassHitVecNext = enqBypassHitVec.map(i => i.map(j => RegNext(j))) 272 enqBypass(0) := ParallelOR(enqBypassHitVec(0)) 273 enqBypass(1) := ParallelOR(enqBypassHitVec(1)) 274 enqBypass(2) := ParallelOR(enqBypassHitVec(2)) 275 when(enqBypass(0)) { src1Rdy(enqueueSelect) := true.B } 276 when(enqBypass(1)) { src2Rdy(enqueueSelect) := true.B } 277 when(enqBypass(2)) { src3Rdy(enqueueSelect) := true.B } 278 when(RegNext(enqBypass(0))) { src1Data(enqSelNext) := ParallelMux(enqBypassHitVecNext(0) zip bypassData)} 279 when(RegNext(enqBypass(1))) { src2Data(enqSelNext) := ParallelMux(enqBypassHitVecNext(1) zip bypassData)} 280 when(RegNext(enqBypass(2))) { src3Data(enqSelNext) := ParallelMux(enqBypassHitVecNext(2) zip bypassData)} 281 } 282 283 } 284 285 286 //--------------------------------------------------------- 287 // Select Circuit 288 //--------------------------------------------------------- 289 val selVec = List.tabulate(iqSize){ i => 290 Wire(new CmpInputBundle).apply(instRdy(i),roqIdx(i),i.U) 291 } 292 val selResult = ParallelSel(selVec) 293 if(debug) { 294 XSDebug("[Sel Result] ResReady:%d || ResultId:%d\n",selResult.instRdy,selResult.iqIdx.asUInt) 295 } 296 //--------------------------------------------------------- 297 // Redirect Logic 298 //--------------------------------------------------------- 299 val expRedirect = io.redirect.valid && io.redirect.bits.isException 300 val brRedirect = io.redirect.valid && !io.redirect.bits.isException 301 302 List.tabulate(iqSize)( i => 303 when(brRedirect && (UIntToOH(io.redirect.bits.brTag) & brMask(i)).orR && validReg(i) ){ 304 validReg(i) := false.B 305 validWillFalse(i) := true.B 306 307 } .elsewhen(expRedirect) { 308 validReg(i) := false.B 309 validWillFalse(i) := true.B 310 } 311 ) 312 //--------------------------------------------------------- 313 // Dequeue Logic 314 //--------------------------------------------------------- 315 //hold the sel-index to wait for data 316 val selInstIdx = RegInit(0.U(iqIdxWidth.W)) 317 val selInstRdy = RegInit(false.B) 318 319 //issue the select instruction 320 val dequeueSelect = Wire(UInt(iqIdxWidth.W)) 321 dequeueSelect := selInstIdx 322 323 val brRedirectMaskMatch = (UIntToOH(io.redirect.bits.brTag) & brMask(dequeueSelect)).orR 324 val IQreadyGo = selInstRdy && !expRedirect && (!brRedirect || !brRedirectMaskMatch) 325 326 io.deq.valid := IQreadyGo 327 328 io.deq.bits.uop.cf := ctrlFlow(dequeueSelect) 329 io.deq.bits.uop.ctrl := ctrlSig(dequeueSelect) 330 io.deq.bits.uop.brMask := brMask(dequeueSelect) 331 io.deq.bits.uop.brTag := brTag(dequeueSelect) 332 333 io.deq.bits.uop.psrc1 := prfSrc1(dequeueSelect) 334 io.deq.bits.uop.psrc2 := prfSrc2(dequeueSelect) 335 io.deq.bits.uop.psrc3 := prfSrc3(dequeueSelect) 336 io.deq.bits.uop.pdest := prfDest(dequeueSelect) 337 io.deq.bits.uop.old_pdest := oldPDest(dequeueSelect) 338 io.deq.bits.uop.src1State := SrcState.rdy 339 io.deq.bits.uop.src2State := SrcState.rdy 340 io.deq.bits.uop.src3State := SrcState.rdy 341 io.deq.bits.uop.freelistAllocPtr := freelistAllocPtr(dequeueSelect) 342 io.deq.bits.uop.roqIdx := roqIdx(dequeueSelect) 343 344 io.deq.bits.src1 := src1Data(dequeueSelect) 345 io.deq.bits.src2 := src2Data(dequeueSelect) 346 io.deq.bits.src3 := src3Data(dequeueSelect) 347 348 if(debug) { 349 XSDebug("[Reg Info-Sel] selInstRdy:%d || selIdx:%d\n",selInstRdy,selInstIdx.asUInt) 350 XSDebug(IQreadyGo,"[IQ dequeue] **dequeue fire:%d** roqIdx:%d dequeueSel:%d | src1Rd:%d src1:%d | src2Rd:%d src2:%d\n", io.deq.fire(), io.deq.bits.uop.roqIdx, dequeueSelect.asUInt, 351 (io.deq.bits.uop.src1State === SrcState.rdy), io.deq.bits.uop.psrc1, 352 (io.deq.bits.uop.src2State === SrcState.rdy), io.deq.bits.uop.psrc2 353 ) 354 } 355 356 //update the index register of instruction that can be issue, unless function unit not allow in 357 //then the issue will be stopped to wait the function unit 358 //clear the validBit of dequeued instruction in issuequeue 359 when(io.deq.fire()){ 360 validReg(dequeueSelect) := false.B 361 validWillFalse(dequeueSelect) := true.B 362 } 363 364 val selRegflush = expRedirect || (brRedirect && brRedirectMaskMatch) 365 366 selInstRdy := Mux(selRegflush,false.B,selResult.instRdy) 367 selInstIdx := Mux(selRegflush,0.U,selResult.iqIdx) 368 // SelectedUop (bypass / speculative) 369 if(useBypass) { 370 assert(fixedDelay==1) // only support fixedDelay is 1 now 371 def DelayPipe[T <: Data](a: T, delay: Int = 0) = { 372 // println(delay) 373 if(delay == 0) a 374 else { 375 val storage = Wire(VecInit(Seq.fill(delay+1)(a))) 376 // storage(0) := a 377 for(i <- 1 until delay) { 378 storage(i) := RegNext(storage(i-1)) 379 } 380 storage(delay) 381 } 382 } 383 val sel = io.selectedUop 384 val selIQIdx = selResult.iqIdx 385 val delayPipe = DelayPipe(VecInit(selResult.instRdy, prfDest(selIQIdx)), fixedDelay-1) 386 sel.bits := DontCare 387 sel.bits.pdest := delayPipe(fixedDelay-1)(1) 388 } 389} 390