From 17ee3faef9dc353336467ead89adb22c508b6bba Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Tue, 1 Aug 2023 14:54:04 +0800 Subject: [PATCH 1/9] [rtl] refactor lane use stage module. --- v/src/Bundles.scala | 3 +- v/src/Lane.scala | 1166 +++------------------ v/src/laneStage/LaneExecutionBridge.scala | 472 +++++++++ v/src/laneStage/LaneStage.scala | 51 + v/src/laneStage/LaneStage0.scala | 133 +++ v/src/laneStage/LaneStage1.scala | 316 ++++++ v/src/laneStage/LaneStage2.scala | 70 ++ v/src/laneStage/LaneStage3.scala | 170 +++ 8 files changed, 1341 insertions(+), 1040 deletions(-) create mode 100644 v/src/laneStage/LaneExecutionBridge.scala create mode 100644 v/src/laneStage/LaneStage.scala create mode 100644 v/src/laneStage/LaneStage0.scala create mode 100644 v/src/laneStage/LaneStage1.scala create mode 100644 v/src/laneStage/LaneStage2.scala create mode 100644 v/src/laneStage/LaneStage3.scala diff --git a/v/src/Bundles.scala b/v/src/Bundles.scala index da3aed6d5..4488bbeb7 100644 --- a/v/src/Bundles.scala +++ b/v/src/Bundles.scala @@ -607,8 +607,6 @@ class LaneExecuteStage(parameter: LaneParameter)(isLastSlot: Boolean) extends Bu * read result of vs2, for instructions that are not executed, pipe from s1 */ val pipeData: Option[UInt] = Option.when(isLastSlot)(UInt(parameter.datapathWidth.W)) - /** pipe vd for ffo */ - val pipeVD: Option[UInt] = Option.when(isLastSlot)(UInt(parameter.datapathWidth.W)) } // Record of temporary execution units @@ -621,6 +619,7 @@ class ExecutionUnitRecord(parameter: LaneParameter)(isLastSlot: Boolean) extends val crossReadSource: Option[UInt] = Option.when(isLastSlot)(UInt((parameter.datapathWidth * 2).W)) /** groupCounter need use to update `Lane.maskFormatResultForGroup` */ val groupCounter: UInt = UInt(parameter.groupNumberBits.W) + val sSendResponse: Option[Bool] = Option.when(isLastSlot)(Bool()) } class SlotRequestToVFU(parameter: LaneParameter) extends Bundle { diff --git a/v/src/Lane.scala b/v/src/Lane.scala index ddaad16d6..552438fb9 100644 --- a/v/src/Lane.scala +++ b/v/src/Lane.scala @@ -488,133 +488,57 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ slotCanShift(index) := pipeClear } - // --- stage 0 start --- - // todo: parameter register width for all stage - - // register for stage0 - val valid0: Bool = RegInit(false.B) - val groupCounterInStage0: UInt = RegInit(0.U(parameter.groupNumberBits.W)) - val maskInStage0: UInt = RegInit(0.U(4.W)) - val sSendResponseInStage0: Option[Bool] = Option.when(isLastSlot) {RegInit(true.B)} - - val s0Valid: Bool = Wire(Bool()) - val s0Ready: Bool = Wire(Bool()) - val s0Fire: Bool = s0Valid && s0Ready - - /** Filter by different sew */ - val filterVec: Seq[(Bool, UInt)] = Seq(0, 1, 2).map { filterSew => - // The lower 'dataGroupIndexSize' bits represent the offsets in the data group - val dataGroupIndexSize: Int = 2 - filterSew - // each group has '2 ** dataGroupIndexSize' elements - val dataGroupSize = 1 << dataGroupIndexSize - // The data group index of last data group - val groupIndex = (maskIndexVec(index) >> dataGroupIndexSize).asUInt - // Filtering data groups - val groupFilter: UInt = scanLeftOr(UIntToOH(groupIndex)) ## false.B - // Whether there are element in the data group that have not been masked - // TODO: use 'record.maskGroupedOrR' & update it - val maskForDataGroup: UInt = - VecInit(maskForMaskGroup.asBools.grouped(dataGroupSize).map(_.reduce(_ || _)).toSeq).asUInt - val groupFilterByMask = maskForDataGroup & groupFilter - // ffo next group - val nextDataGroupOH: UInt = ffo(groupFilterByMask) - // This mask group has the next data group to execute - val hasNextDataGroup = nextDataGroupOH.orR - val nextElementBaseIndex: UInt = (OHToUInt(nextDataGroupOH) << dataGroupIndexSize).asUInt - (hasNextDataGroup, nextElementBaseIndex) - } - - /** is there any data left in this group? */ - val nextOrR: Bool = Mux1H(vSew1H, filterVec.map(_._1)) - - // mask is exhausted - val maskExhausted: Bool = !nextOrR - - /** The index of next element in this mask group.(0-31) */ - val nextIndex: UInt = Mux(decodeResult(Decoder.maskLogic), 0.U, Mux1H(vSew1H, filterVec.map(_._2))) - - /** The mask group will be updated */ - val maskGroupWillUpdate: Bool = decodeResult(Decoder.maskLogic) || maskExhausted - - /** next mask group */ - val nextMaskGroupCount: UInt = maskGroupCountVec(index) + maskGroupWillUpdate - - /** The index of next execute element in whole instruction */ - val elementIndexForInstruction = maskGroupCountVec(index) ## Mux1H( - vSew1H, - Seq( - maskIndexVec(index)(parameter.datapathWidthBits - 1, 2) ## laneIndex ## maskIndexVec(index)(1, 0), - maskIndexVec(index)(parameter.datapathWidthBits - 1, 1) ## laneIndex ## maskIndexVec(index)(0), - maskIndexVec(index) ## laneIndex - ) - ) - - - /** The next element is out of execution range */ - val outOfExecutionRange = Mux( - decodeResult(Decoder.maskLogic), - (maskGroupCountVec(index) > record.lastGroupForInstruction), - elementIndexForInstruction >= record.csr.vl - ) || record.instructionFinished - - // todo: 如果这一部分时序不够,可以放到下一级去, 然后在下一级 kill nr类型的 - /** Encoding of different element lengths: 1, 8, 16, 32 */ - val elementLengthOH = Mux(decodeResult(Decoder.maskLogic), 1.U, vSew1H(2, 0) ## false.B) - - /** Which group of data will be accessed */ - val dataGroupIndex: UInt = Mux1H( - elementLengthOH, - Seq( - maskGroupCountVec(index), - maskGroupCountVec(index) ## maskIndexVec(index)(4, 2), - maskGroupCountVec(index) ## maskIndexVec(index)(4, 1), - maskGroupCountVec(index) ## maskIndexVec(index) - ) - ) - - /** Calculate the mask of the request that is in s0 */ - val maskEnqueueWireInStage0: UInt = (record.mask.bits >> maskIndexVec(index)).asUInt(3, 0) - - val isTheLastGroup = dataGroupIndex === record.lastGroupForInstruction - // update register in s0 - when(s0Fire) { - maskGroupCountVec(index) := nextMaskGroupCount + val laneState: LaneState = Wire(new LaneState(parameter)) + val stage0: LaneStage0 = Module(new LaneStage0(parameter, isLastSlot)) + val stage1 = Module(new LaneStage1(parameter, isLastSlot)) + val stage2 = Module(new LaneStage2(parameter, isLastSlot)) + val executionUnit: LaneExecutionBridge = Module(new LaneExecutionBridge(parameter, isLastSlot)) + val stage3 = Module(new LaneStage3(parameter, isLastSlot)) + + // slot state + laneState.vSew1H := vSew1H + laneState.loadStore := record.laneRequest.loadStore + laneState.laneIndex := laneIndex + laneState.decodeResult := record.laneRequest.decodeResult + laneState.lastGroupForInstruction := record.lastGroupForInstruction + laneState.instructionFinished := record.instructionFinished + laneState.csr := record.csr + laneState.maskType := record.laneRequest.mask + laneState.maskNotMaskedElement := !record.laneRequest.mask || + record.laneRequest.decodeResult(Decoder.maskSource) || + record.laneRequest.decodeResult(Decoder.maskLogic) + laneState.mask := record.mask + laneState.vs1 := record.laneRequest.vs1 + laneState.vs2 := record.laneRequest.vs2 + laneState.vd := record.laneRequest.vd + laneState.instructionIndex := record.laneRequest.instructionIndex + laneState.maskForMaskGroup := maskForMaskGroup + laneState.ffoByOtherLanes := record.ffoByOtherLanes + + stage0.enqueue.valid := slotActive(index) && (record.mask.valid || !record.laneRequest.mask) + stage0.enqueue.bits.maskIndex := maskIndexVec(index) + stage0.enqueue.bits.maskForMaskGroup := record.mask.bits + stage0.enqueue.bits.maskGroupCount := maskGroupCountVec(index) + stage0.state := laneState + + // update lane state + when(stage0.enqueue.fire) { + maskGroupCountVec(index) := stage0.updateLaneState.maskGroupCount // todo: handle all elements in first group are masked - maskIndexVec(index) := nextIndex - groupCounterInStage0 := dataGroupIndex - maskInStage0 := maskEnqueueWireInStage0 - sSendResponseInStage0.foreach(state => - state := - !(record.laneRequest.loadStore || - decodeResult(Decoder.readOnly) || - (decodeResult(Decoder.red) && isTheLastGroup) || - (decodeResult(Decoder.maskDestination) && (maskGroupWillUpdate || isTheLastGroup)) || - decodeResult(Decoder.ffo)) - ) - } - - // Handshake for s0 - s0Valid := slotActive(index) && !outOfExecutionRange && (record.mask.valid || !record.laneRequest.mask) - - when(!pipeFinishVec(index) && outOfExecutionRange) { - pipeFinishVec(index) := true.B - } - - instructionFinishedVec(index) := 0.U - when(slotOccupied(index) && pipeClear && pipeFinishVec(index)) { - slotOccupied(index) := false.B - instructionFinishedVec(index) := UIntToOH( - record.laneRequest.instructionIndex(parameter.instructionIndexBits - 2, 0) - ) + maskIndexVec(index) := stage0.updateLaneState.maskIndex + when(stage0.updateLaneState.outOfExecutionRange) { + pipeFinishVec(index) := true.B + } } // update mask todo: handle maskRequestFireOH - slotMaskRequestVec(index).valid := maskExhausted && record.laneRequest.mask && (s0Fire || !record.mask.valid) - slotMaskRequestVec(index).bits := nextMaskGroupCount + slotMaskRequestVec(index).valid := + stage0.updateLaneState.maskExhausted && record.laneRequest.mask && (stage0.enqueue.fire || !record.mask.valid) + slotMaskRequestVec(index).bits := stage0.updateLaneState.maskGroupCount // There are new masks val maskUpdateFire: Bool = slotMaskRequestVec(index).valid && maskRequestFireOH(index) // The old mask is used up - val maskFailure: Bool = maskExhausted && s0Fire + val maskFailure: Bool = stage0.updateLaneState.maskExhausted && stage0.enqueue.fire // update mask register when(maskUpdateFire) { record.mask.bits := maskInput @@ -623,942 +547,119 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ record.mask.valid := maskUpdateFire } - // --- stage 0 end & stage 1_0 start --- - - // stage 1_0 reg - val valid1: Bool = RegInit(false.B) - - /** schedule read src1 */ - val sRead0 = RegInit(true.B) - - /** schedule read src2 */ - val sRead1 = RegInit(true.B) - - /** schedule read vd */ - val sRead2 = RegInit(true.B) - - // pipe from stage0 - val groupCounterInStage1: UInt = RegInit(0.U(parameter.groupNumberBits.W)) - - // mask for group pipe from stage0 - val maskInStage1: UInt = RegInit(0.U(4.W)) - val maskForFilterInStage1: UInt = FillInterleaved(4, maskNotMaskedElement) | maskInStage1 - - // read result register - val readResult0: UInt = RegInit(0.U(parameter.datapathWidth.W)) - val readResult1: UInt = RegInit(0.U(parameter.datapathWidth.W)) - val readResult2: UInt = RegInit(0.U(parameter.datapathWidth.W)) - - /** schedule cross lane read LSB.(access VRF for cross read) */ - val sCrossReadLSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) - - /** schedule cross lane read MSB.(access VRF for cross read) */ - val sCrossReadMSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) - - /** schedule send cross lane read LSB result. */ - val sSendCrossReadResultLSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) - - /** schedule send cross lane read MSB result. */ - val sSendCrossReadResultMSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) - - /** wait for cross lane read LSB result. */ - val wCrossReadLSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) - - /** wait for cross lane read MSB result. */ - val wCrossReadMSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) - - // next for update cross read register - val sReadNext0: Bool = RegNext(sRead0, false.B) - val sReadNext1: Bool = RegNext(sRead1, false.B) - val sReadNext2: Bool = RegNext(sRead2, false.B) - val sCrossReadLSBNext: Option[Bool] = sCrossReadLSB.map(RegNext(_, false.B)) - val sCrossReadMSBNext: Option[Bool] = sCrossReadMSB.map(RegNext(_, false.B)) - // All read requests sent - val sReadFinish: Bool = sRead0 && sRead1 && sRead2 - // Waiting to read the response - val sReadFinishNext: Bool = sReadNext0 && sReadNext1 && sReadNext2 - // 'sReadFinishNext' may assert at the next cycle of 's1Fire', need sReadFinish - val readFinish: Bool = sReadFinish && sReadFinishNext - val stage1Finish: Bool = (Seq(readFinish) ++ sSendCrossReadResultLSB ++ - sSendCrossReadResultMSB ++ wCrossReadLSB ++ wCrossReadMSB).reduce(_ && _) - - // control wire - val s1Valid = valid0 - val s1Ready = Wire(Bool()) - val s1Fire = s1Valid && s1Ready - val sSendResponseInStage1 = Option.when(isLastSlot)(RegEnable(sSendResponseInStage0.get, true.B, s1Fire)) - - when(s1Fire ^ s0Fire) { valid0 := s0Fire } - s0Ready := s1Ready || !valid0 - - /** mask offset for this group, needs to be aligned with data group */ - val maskOffsetForNextGroup: UInt = maskIndexVec(index)(4, 2) ## Mux1H( - vSew1H(2, 0), - Seq( - 0.U(2.W), - maskIndexVec(index)(1) ## false.B, - maskIndexVec(index)(1, 0) - ) - ) - - /** mask for this group */ - val nextMaskForGroup: UInt = (record.mask.bits >> maskOffsetForNextGroup)(3, 0) - - // --- stage 1_0 end & stage 1_1 start --- - - // read port 0 - vrfReadRequest(index)(0).valid := !sRead0 && valid1 - vrfReadRequest(index)(0).bits.offset := groupCounterInStage1(parameter.vrfOffsetBits - 1, 0) - vrfReadRequest(index)(0).bits.vs := Mux( - // encodings with vm=0 are reserved for mask type logic - record.laneRequest.decodeResult(Decoder.maskLogic) && !record.laneRequest.decodeResult(Decoder.logic), - // read v0 for (15. Vector Mask Instructions) - 0.U, - record.laneRequest.vs1 + groupCounterInStage1( - parameter.groupNumberBits - 1, - parameter.vrfOffsetBits - ) - ) - // used for hazard detection - vrfReadRequest(index)(0).bits.instructionIndex := record.laneRequest.instructionIndex - - // read port 1 - if (isLastSlot) { - vrfReadRequest(index)(1).valid := !(sRead1 && sCrossReadLSB.get) && valid1 - vrfReadRequest(index)(1).bits.offset := Mux( - sRead1, - // cross lane LSB - groupCounterInStage1(parameter.vrfOffsetBits - 2, 0) ## false.B, - // normal read - groupCounterInStage1(parameter.vrfOffsetBits - 1, 0) - ) - vrfReadRequest(index)(1).bits.vs := Mux( - decodeResult(Decoder.vwmacc) && sRead1, - // cross read vd for vwmacc, since it need dual [[dataPathWidth]], use vs2 port to read LSB part of it. - record.laneRequest.vd, - // read vs2 for other instruction - record.laneRequest.vs2 - ) + Mux( - sRead1, - // cross lane - groupCounterInStage1(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1), - // no cross lane - groupCounterInStage1(parameter.groupNumberBits - 1, parameter.vrfOffsetBits) - ) - } else { - vrfReadRequest(index)(1).valid := !sRead1 && valid1 - vrfReadRequest(index)(1).bits.offset := groupCounterInStage1(parameter.vrfOffsetBits - 1, 0) - vrfReadRequest(index)(1).bits.vs := record.laneRequest.vs2 + - groupCounterInStage1(parameter.groupNumberBits - 1, parameter.vrfOffsetBits) - } - vrfReadRequest(index)(1).bits.instructionIndex := record.laneRequest.instructionIndex - - // read port 2 - if (isLastSlot) { - vrfReadRequest(index)(2).valid := !(sRead2 && sCrossReadMSB.get) && valid1 - vrfReadRequest(index)(2).bits.offset := Mux( - sRead2, - // cross lane MSB - groupCounterInStage1(parameter.vrfOffsetBits - 2, 0) ## true.B, - // normal read - groupCounterInStage1(parameter.vrfOffsetBits - 1, 0) + instructionFinishedVec(index) := 0.U + when(slotOccupied(index) && pipeClear && pipeFinishVec(index)) { + slotOccupied(index) := false.B + instructionFinishedVec(index) := UIntToOH( + record.laneRequest.instructionIndex(parameter.instructionIndexBits - 2, 0) ) - vrfReadRequest(index)(2).bits.vs := Mux( - sRead2 && !record.laneRequest.decodeResult(Decoder.vwmacc), - // cross lane access use vs2 - record.laneRequest.vs2, - // normal read vd or cross read vd for vwmacc - record.laneRequest.vd - ) + - Mux( - sRead2, - groupCounterInStage1(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1), - groupCounterInStage1(parameter.groupNumberBits - 1, parameter.vrfOffsetBits) - ) - } else { - vrfReadRequest(index)(2).valid := !sRead2 && valid1 - vrfReadRequest(index)(2).bits.offset := groupCounterInStage1(parameter.vrfOffsetBits - 1, 0) - vrfReadRequest(index)(2).bits.vs := record.laneRequest.vd + - groupCounterInStage1(parameter.groupNumberBits - 1, parameter.vrfOffsetBits) } - vrfReadRequest(index)(2).bits.instructionIndex := record.laneRequest.instructionIndex - - val readPortFire0: Bool = vrfReadRequest(index)(0).fire - val readPortFire1: Bool = vrfReadRequest(index)(1).fire - val readPortFire2: Bool = vrfReadRequest(index)(2).fire - // reg next for update result - val readPortFireNext0: Bool = RegNext(readPortFire0, false.B) - val readPortFireNext1: Bool = RegNext(readPortFire1, false.B) - val readPortFireNext2: Bool = RegNext(readPortFire2, false.B) - - // update read control register in stage 1 - when(s1Fire) { - // init register by decode result - sRead0 := !decodeResult(Decoder.vtype) - // todo: gather only read vs1? - sRead1 := false.B - sRead2 := decodeResult(Decoder.sReadVD) - val sCrossRead = !decodeResult(Decoder.crossRead) - ( - sCrossReadLSB ++ sCrossReadMSB ++ - sSendCrossReadResultLSB ++ sSendCrossReadResultMSB ++ - wCrossReadLSB ++ wCrossReadMSB - ).foreach(state => state := sCrossRead) - - // pipe reg from stage 0 - groupCounterInStage1 := groupCounterInStage0 - maskInStage1 := maskInStage0 - }.otherwise { - // change state machine when read source1 - when(readPortFire0) { - sRead0 := true.B - } - // the priority of `sRead1` is higher than `sCrossReadLSB` - when(readPortFire1) { - sRead1 := true.B - sCrossReadLSB.foreach(d => d := sRead1) - } - // the priority of `sRead2` is higher than `sCrossReadMSB` - when(readPortFire2) { - sRead2 := true.B - sCrossReadMSB.foreach(d => d := sRead2) - } - when(readBusDequeue.valid) { - when(readBusDequeue.bits.isTail) { - wCrossReadMSB.foreach(_ := true.B) - }.otherwise { - wCrossReadLSB.foreach(_ := true.B) - } - } + // stage 1: read stage + stage1.enqueue.valid := stage0.dequeue.valid + stage0.dequeue.ready := stage1.enqueue.ready + stage1.enqueue.bits.groupCounter := stage0.dequeue.bits.groupCounter + stage1.enqueue.bits.mask := stage0.dequeue.bits.mask + stage1.enqueue.bits.sSendResponse.zip(stage0.dequeue.bits.sSendResponse).foreach { case (sink, source) => + sink := source } + stage1.dequeue.bits.readBusDequeueGroup.foreach(data => readBusDequeueGroup := data) - // update read result register - when(readPortFireNext0) { - readResult0 := vrfReadResult(index)(0) + stage1.state := laneState + stage1.readFromScalar := record.laneRequest.readFromScalar + vrfReadRequest(index).zip(stage1.vrfReadRequest).foreach{ case (sink, source) => sink <> source } + vrfReadResult(index).zip(stage1.vrfReadResult).foreach{ case (source, sink) => sink := source } + // connect cross read bus + if(isLastSlot) { + crossLaneRead.valid := stage1.readBusRequest.get.valid + crossLaneRead.bits := stage1.readBusRequest.get.bits + stage1.readBusRequest.get.ready := crossLaneReadReady + stage1.readBusDequeue.get <> readBusDequeue } - when(readPortFireNext1) { - if (isLastSlot) { - when(sReadNext1) { - crossReadLSBOut := vrfReadResult(index)(1) - }.otherwise { - readResult1 := vrfReadResult(index)(1) - } - } else { - readResult1 := vrfReadResult(index)(1) - } - } + stage2.enqueue.valid := stage1.dequeue.valid && executionUnit.enqueue.ready + stage1.dequeue.ready := stage2.enqueue.ready && executionUnit.enqueue.ready + executionUnit.enqueue.valid := stage1.dequeue.valid && stage2.enqueue.ready - when(readPortFireNext2) { - if (isLastSlot) { - when(sReadNext2) { - crossReadMSBOut := vrfReadResult(index)(2) - }.otherwise { - readResult2 := vrfReadResult(index)(2) - } - } else { - readResult2 := vrfReadResult(index)(2) - } + stage2.state := laneState + stage2.enqueue.bits.groupCounter := stage1.dequeue.bits.groupCounter + stage2.enqueue.bits.mask := stage1.dequeue.bits.mask + stage2.enqueue.bits.maskForFilter := stage1.dequeue.bits.maskForFilter + stage2.enqueue.bits.src := stage1.dequeue.bits.src + stage2.enqueue.bits.sSendResponse.zip(stage1.dequeue.bits.sSendResponse).foreach { case (sink, source) => + sink := source } - if (isLastSlot) { - // cross read - /** for dequeue group counter match */ - readBusDequeueGroup := groupCounterInStage1 - /** The data to be sent is ready - * need sCrossReadLSB since sCrossReadLSBNext may assert after s1fire. - */ - val crossReadDataReadyLSB: Bool = (sCrossReadLSBNext ++ sCrossReadLSB).reduce(_ && _) - val crossReadDataReadyMSB: Bool = (sCrossReadMSBNext ++ sCrossReadMSB).reduce(_ && _) - - /** read data from RF, try to send cross lane read LSB data to ring */ - val tryCrossReadSendLSB: Bool = crossReadDataReadyLSB && !sSendCrossReadResultLSB.get && valid1 - - /** read data from RF, try to send cross lane read MSB data to ring */ - val tryCrossReadSendMSB: Bool = crossReadDataReadyMSB && !sSendCrossReadResultMSB.get && valid1 - // TODO: use [[record.state.sSendCrossReadResultLSB]] - crossLaneRead.bits.sinkIndex := (!tryCrossReadSendLSB) ## laneIndex(parameter.laneNumberBits - 1, 1) - crossLaneRead.bits.isTail := laneIndex(0) - crossLaneRead.bits.sourceIndex := laneIndex - crossLaneRead.bits.instructionIndex := record.laneRequest.instructionIndex - crossLaneRead.bits.counter := groupCounterInStage1 - // TODO: use [[record.state.sSendCrossReadResultLSB]] -> MSB may be ready earlier - crossLaneRead.bits.data := Mux(tryCrossReadSendLSB, crossReadLSBOut, crossReadMSBOut) - crossLaneRead.valid := tryCrossReadSendLSB || tryCrossReadSendMSB - - when(crossLaneReadReady && crossLaneRead.valid) { - when(tryCrossReadSendLSB) { - sSendCrossReadResultLSB.foreach(_ := true.B) - }.otherwise { - sSendCrossReadResultMSB.foreach(_ := true.B) - } - } - - // cross read receive. todo: move out slot - when(readBusDequeue.valid) { - assert(readBusDequeue.bits.instructionIndex === record.laneRequest.instructionIndex) - when(readBusDequeue.bits.isTail) { - crossReadMSBIn := readBusDequeue.bits.data - }.otherwise { - crossReadLSBIn := readBusDequeue.bits.data - } - } + executionUnit.state := laneState + executionUnit.enqueue.bits.src := stage1.dequeue.bits.src + executionUnit.enqueue.bits.bordersForMaskLogic := + (stage1.dequeue.bits.groupCounter === record.lastGroupForInstruction && record.isLastLaneForMaskLogic) + executionUnit.enqueue.bits.mask := stage1.dequeue.bits.mask + executionUnit.enqueue.bits.groupCounter := stage1.dequeue.bits.groupCounter + executionUnit.enqueue.bits.sSendResponse.zip(stage1.dequeue.bits.sSendResponse).foreach { case (sink, source) => + sink := source } - - // --- stage 1_1 end & stage 2 start --- - val executionQueue: Queue[LaneExecuteStage] = - Module(new Queue(new LaneExecuteStage(parameter)(isLastSlot), parameter.executionQueueSize)) - - val s2Ready = Wire(Bool()) - val s2Valid = valid1 && stage1Finish - val s2Fire: Bool = s2Ready && s2Valid - val valid2 = RegInit(false.B) - // need clear mask format result when mask group change - val updateMaskResult: Option[Bool] = Option.when(isLastSlot)(Wire(Bool())) - // backpressure for stage 1 - s1Ready := !valid1 || (stage1Finish && s2Ready) - // update 'valid1' - when(s1Fire ^ s2Fire) {valid1 := s1Fire} - val s2ExecuteOver = Wire(Bool()) - - // execution result from execute unit - val executionResult = RegInit(0.U(parameter.datapathWidth.W)) - - /** mask format result for current `mask group` */ - val maskFormatResultForGroup: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(parameter.maskGroupWidth.W))) - - /** cross write LSB mask to send out to other lanes. */ - val Stage2crossWriteLSB = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) - - /** cross write MSB data to send out to other lanes. */ - val Stage2crossWriteMSB = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) - // pipe from stage 0 - val sSendResponseInStage2 = Option.when(isLastSlot)(RegEnable(sSendResponseInStage1.get, true.B, s2Fire)) - // ffo success in current data group? - val ffoSuccessImStage2: Option[Bool] = Option.when(isLastSlot)(RegInit(false.B)) - - // executionQueue enqueue - executionQueue.io.enq.bits.pipeData.foreach { data => - data := Mux( - // pipe source1 for gather, pipe v0 for ffo - decodeResult(Decoder.gather) || decodeResult(Decoder.ffo), - readResult0, - readResult1 - ) + executionUnit.enqueue.bits.crossReadSource.zip(stage1.dequeue.bits.crossReadSource).foreach { case (sink, source) => + sink := source } - executionQueue.io.enq.bits.pipeVD.foreach(_ := readResult2) - executionQueue.io.enq.bits.groupCounter := groupCounterInStage1 - executionQueue.io.enq.bits.mask := Mux1H( - vSew1H, - Seq( - maskForFilterInStage1, - FillInterleaved(2, maskForFilterInStage1(1, 0)), - // todo: handle first masked - FillInterleaved(4, maskForFilterInStage1(0)) - ) - ) - - - // 先用一个伪装的执行单元 todo: 等执行单元重构需要替换 - if (true) { - val executionRecord: ExecutionUnitRecord = RegInit(0.U.asTypeOf(new ExecutionUnitRecord(parameter)(isLastSlot))) - - val executeIndex1H: UInt = UIntToOH(executionRecord.executeIndex) - - // state register - val sSendExecuteRequest = RegInit(true.B) - val wExecuteResult = RegInit(true.B) - val executeRequestStateValid: Bool = !sSendExecuteRequest - s2ExecuteOver := sSendExecuteRequest && wExecuteResult - - val source1Select: UInt = Mux(decodeResult(Decoder.vtype), readResult0, record.laneRequest.readFromScalar) - // init register when s2Fire - when(s2Fire) { - executionRecord.crossReadVS2 := decodeResult(Decoder.crossRead) && !decodeResult(Decoder.vwmacc) - executionRecord.bordersForMaskLogic := - (groupCounterInStage1 === record.lastGroupForInstruction && record.isLastLaneForMaskLogic) - executionRecord.mask := maskInStage1 - executionRecord.source := VecInit(Seq(source1Select, readResult1, readResult2)) - executionRecord.crossReadSource.foreach(_ := crossReadMSBIn ## crossReadLSBIn) - executionRecord.groupCounter := groupCounterInStage1 - sSendExecuteRequest := decodeResult(Decoder.dontNeedExecuteInLane) - wExecuteResult := decodeResult(Decoder.dontNeedExecuteInLane) - ffoSuccessImStage2.foreach(_ := false.B) - } - - /** the byte-level mask of current execution. - * sew match: - * 0: - * executeIndex match: - * 0: 0001 - * 1: 0010 - * 2: 0100 - * 3: 1000 - * 1: - * executeIndex(0) match: - * 0: 0011 - * 1: 1100 - * 2: - * 1111 - */ - val byteMaskForExecution = Mux1H( - vSew1H(2, 0), - Seq( - executeIndex1H, - executionRecord.executeIndex(1) ## executionRecord.executeIndex(1) ## - !executionRecord.executeIndex(1) ## !executionRecord.executeIndex(1), - 15.U(4.W) - ) - ) - - /** the bit-level mask of current execution. */ - val bitMaskForExecution: UInt = FillInterleaved(8, byteMaskForExecution) - - def CollapseOperand(data: UInt, enable: Bool = true.B, sign: Bool = false.B): UInt = { - val dataMasked: UInt = data & bitMaskForExecution - val select: UInt = Mux(enable, vSew1H(2, 0), 4.U(3.W)) - // when sew = 0 - val collapse0 = Seq.tabulate(4)(i => dataMasked(8 * i + 7, 8 * i)).reduce(_ | _) - // when sew = 1 - val collapse1 = Seq.tabulate(2)(i => dataMasked(16 * i + 15, 16 * i)).reduce(_ | _) - Mux1H( - select, - Seq( - Fill(25, sign && collapse0(7)) ## collapse0, - Fill(17, sign && collapse1(15)) ## collapse1, - (sign && data(31)) ## data - ) - ) - } - - // 有2 * sew 的操作数需要折叠 - def CollapseDoubleOperand(sign: Bool = false.B): UInt = { - val doubleBitEnable = FillInterleaved(16, byteMaskForExecution) - val doubleDataMasked: UInt = executionRecord.crossReadSource.get & doubleBitEnable - val select: UInt = vSew1H(1, 0) - // when sew = 0 - val collapse0 = Seq.tabulate(4)(i => doubleDataMasked(16 * i + 15, 16 * i)).reduce(_ | _) - // when sew = 1 - val collapse1 = Seq.tabulate(2)(i => doubleDataMasked(32 * i + 31, 32 * i)).reduce(_ | _) - Mux1H( - select, - Seq( - Fill(16, sign && collapse0(15)) ## collapse0, - collapse1 - ) - ) - } - - /** collapse the dual SEW size operand for cross read. - * it can be vd or src2. - */ - val doubleCollapse = Option.when(isLastSlot)(CollapseDoubleOperand(!decodeResult(Decoder.unsigned1))) - - /** src1 for the execution - * src1 has three types: V, I, X. - * only V type need to use [[CollapseOperand]] - */ - val finalSource1 = CollapseOperand( - // A will be updated every time it is executed, so you can only choose here - Mux(decodeResult(Decoder.red) && !decodeResult(Decoder.maskLogic), reduceResult, executionRecord.source.head), - decodeResult(Decoder.vtype) && (!decodeResult(Decoder.red) || decodeResult(Decoder.maskLogic)), - !decodeResult(Decoder.unsigned0) - ) - - /** src2 for the execution, - * need to take care of cross read. - */ - val finalSource2 = if (isLastSlot) { - Mux( - executionRecord.crossReadVS2, - doubleCollapse.get, - CollapseOperand(executionRecord.source(1), true.B, !decodeResult(Decoder.unsigned1)) - ) - } else { - CollapseOperand(executionRecord.source(1), true.B, !decodeResult(Decoder.unsigned1)) - } - - /** source3 有两种:adc & ma, c等处理mask的时候再处理 - * two types of source3: - * - multiplier accumulate - * - the third input of add with carry - * - * this line only handle the first type. - */ - val finalSource3: UInt = if (isLastSlot) { - Mux( - decodeResult(Decoder.vwmacc), - doubleCollapse.get, - CollapseOperand(executionRecord.source(2)) - ) - }else { - CollapseOperand(executionRecord.source(2)) - } - val maskAsInput = Mux1H( - vSew1H(2, 0), - Seq( - (UIntToOH(executionRecord.executeIndex) & executionRecord.mask).orR, - Mux(executionRecord.executeIndex(1), executionRecord.mask(1), executionRecord.mask(0)), - executionRecord.mask(0) - ) - ) - - /** use mask to fix the case that `vl` is not in the multiple of [[parameter.datapathWidth]]. - * it will fill the LSB of mask to `0`, mask it to not execute those elements. - */ - val lastGroupMask = scanRightOr(UIntToOH(record.csr.vl(parameter.datapathWidthBits - 1, 0))) >> 1 - - /** if [[executionRecord.bordersForMaskLogic]], - * use [[lastGroupMask]] to mask the result otherwise use [[fullMask]]. */ - val maskCorrect = Mux(executionRecord.bordersForMaskLogic, lastGroupMask, fullMask) - - val requestToVFU: SlotRequestToVFU = Wire(new SlotRequestToVFU(parameter)) - requestToVFU.src := VecInit(Seq(finalSource1, finalSource2, finalSource3, maskCorrect)) - requestToVFU.opcode := decodeResult(Decoder.uop) - requestToVFU.mask := Mux( - decodeResult(Decoder.adder), - maskAsInput && decodeResult(Decoder.maskSource), - maskAsInput || !record.laneRequest.mask - ) - requestToVFU.sign := !decodeResult(Decoder.unsigned1) - requestToVFU.reverse := decodeResult(Decoder.reverse) - requestToVFU.average := decodeResult(Decoder.average) - requestToVFU.saturate := decodeResult(Decoder.saturate) - requestToVFU.vxrm := record.csr.vxrm - requestToVFU.vSew := record.csr.vSew - requestToVFU.shifterSize := Mux1H( - Mux(executionRecord.crossReadVS2, vSew1H(1, 0), vSew1H(2, 1)), - Seq(false.B ## finalSource1(3), finalSource1(4, 3)) - ) ## finalSource1(2, 0) - requestToVFU.rem := decodeResult(Decoder.uop)(0) - requestToVFU.executeIndex := executionRecord.executeIndex - requestToVFU.popInit := reduceResult - requestToVFU.groupIndex := executionRecord.groupCounter - requestToVFU.laneIndex := laneIndex - requestToVFU.complete := record.ffoByOtherLanes || record.selfCompleted - requestToVFU.maskType := record.laneRequest.mask - - requestToVFU.unitSelet.foreach(_ := decodeResult(Decoder.fpExecutionType)) - requestToVFU.floatMul.foreach(_ := decodeResult(Decoder.floatMul)) - // from float csr - requestToVFU.roundingMode.foreach(_ := record.csr.vxrm) - - requestVec(index) := requestToVFU - - executeEnqueueValid(index) := executeRequestStateValid - - /** select from VFU, send to [[executionResult]], [[Stage2crossWriteLSB]], [[Stage2crossWriteMSB]]. */ - val dataDequeue: UInt = responseVec(index).bits.data - - val executeEnqueueFireForSlot: Bool = executeEnqueueFire(index) - - /** fire of [[dataDequeue]] */ - val executeDequeueFireForSlot: Bool = - Mux(decodeResult(Decoder.multiCycle), responseVec(index).valid, executeEnqueueFireForSlot) - - // mask reg for filtering - val maskForFilter = FillInterleaved(4, maskNotMaskedElement) | executionRecord.mask - // current one hot depends on execute index - val currentOHForExecuteGroup: UInt = UIntToOH(executionRecord.executeIndex) - // Remaining to be requested - val remainder: UInt = maskForFilter & (~scanRightOr(currentOHForExecuteGroup)).asUInt - // Finds the first unfiltered execution. - val nextIndex1H: UInt = ffo(remainder) - - // There are no more left. - val isLastRequestForThisGroup: Bool = - Mux1H(vSew1H, Seq(!remainder.orR, !remainder(1, 0).orR, true.B)) - - /** the next index to execute. - * @note Requests into this disguised execution unit are not executed on the spot - * */ - val nextExecuteIndex: UInt = Mux1H( - vSew1H(1, 0), - Seq( - OHToUInt(nextIndex1H), - // Mux(remainder(0), 0.U, 2.U) - !remainder(0) ## false.B - ) - ) - - // next execute index if data group change - val nextExecuteIndexForNextGroup = Mux1H( - vSew1H(1, 0), - Seq( - OHToUInt(ffo(maskForFilterInStage1)), - !maskForFilterInStage1(0) ## false.B, - ) - ) - - // update execute index - when(executeEnqueueFireForSlot || s2Fire) { - executionRecord.executeIndex := Mux(s2Fire, nextExecuteIndexForNextGroup, nextExecuteIndex) - } - - when(executeEnqueueFireForSlot && isLastRequestForThisGroup) { - sSendExecuteRequest := true.B - } + executionUnit.ffoByOtherLanes := record.ffoByOtherLanes + executionUnit.selfCompleted := record.selfCompleted - // execute response finish - val responseFinish: Bool = Mux( - decodeResult(Decoder.multiCycle), - executeDequeueFireForSlot && sSendExecuteRequest, - executeEnqueueFireForSlot && isLastRequestForThisGroup - ) + // executionUnit <> vfu + requestVec(index) := executionUnit.vfuRequest.bits + executeEnqueueValid(index) := executionUnit.vfuRequest.valid + executionUnit.vfuRequest.ready := executeEnqueueFire(index) + executionUnit.dataResponse := responseVec(index) - when(responseFinish) { - wExecuteResult := true.B - } - - val divWriteIndexLatch: UInt = RegEnable(responseVec(index).bits.executeIndex, 0.U(2.W), responseVec(index).valid) - val divWriteIndex = Mux(responseVec(index).valid, responseVec(index).bits.executeIndex, divWriteIndexLatch) - /** the index to write to VRF in [[parameter.dataPathByteWidth]]. - * for long latency pipe, the index will follow the pipeline. - */ - val writeIndex = Mux( - record.laneRequest.decodeResult(Decoder.multiCycle), - divWriteIndex, - executionRecord.executeIndex - ) + when(executionUnit.dequeue.valid)(assert(stage2.dequeue.valid)) + stage3.enqueue.valid := executionUnit.dequeue.valid + executionUnit.dequeue.ready := stage3.enqueue.ready + stage2.dequeue.ready := executionUnit.dequeue.fire - val writeIndex1H = UIntToOH(writeIndex) - - /** VRF byte level mask */ - val writeMaskInByte = Mux1H( - vSew1H(2, 0), - Seq( - writeIndex1H, - writeIndex(1) ## writeIndex(1) ## !writeIndex(1) ## !writeIndex(1), - "b1111".U(4.W) - ) - ) - - /** VRF bit level mask */ - val writeMaskInBit: UInt = FillInterleaved(8, writeMaskInByte) - - /** output of execution unit need to align to VRF in bit level(used in dynamic shift) - * TODO: fix me - */ - val dataOffset: UInt = writeIndex ## 0.U(3.W) - - // TODO: this is a dynamic shift logic, but if we switch to parallel execution unit, we don't need it anymore. - val executeResult = (dataDequeue << dataOffset).asUInt(parameter.datapathWidth - 1, 0) - - // execute 1,2,4 times based on SEW, only write VRF when 32 bits is ready. - val resultUpdate: UInt = (executeResult & writeMaskInBit) | (executionResult & (~writeMaskInBit).asUInt) - - // update execute result - when(executeDequeueFireForSlot) { - // update the [[executionResult]] - executionResult := resultUpdate - - // the find first one instruction is finished in this lane - ffoSuccessImStage2.foreach(_ := responseVec(index).bits.ffoSuccess) - when(responseVec(index).bits.ffoSuccess && !record.selfCompleted) { - ffoIndexReg := executionRecord.groupCounter ## Mux1H( - vSew1H, - Seq( - executionRecord.executeIndex ## responseVec(index).bits.data(2, 0), - executionRecord.executeIndex(1) ## responseVec(index).bits.data(3, 0), - responseVec(index).bits.data(4, 0) - ) - ) - } - - // update cross-lane write data - /** sew: - * 0: - * executeIndex: - * 0: mask = 0011, head - * 1: mask = 1100, head - * 2: mask = 0011, tail - * 3: mask = 1100, tail - * 1: - * executeIndex: - * 0: mask = 1111, head - * 2: mask = 1111, tail - * - * 2: not valid in SEW = 2 - */ - if (isLastSlot) { - when(executionRecord.executeIndex(1)) { - Stage2crossWriteMSB.foreach { crossWriteData => - // update tail - crossWriteData := - Mux( - record.csr.vSew(0), - dataDequeue(parameter.datapathWidth - 1, parameter.halfDatapathWidth), - Mux( - executionRecord.executeIndex(0), - dataDequeue(parameter.halfDatapathWidth - 1, 0), - crossWriteData(parameter.datapathWidth - 1, parameter.halfDatapathWidth) - ) - ) ## Mux( - !executionRecord.executeIndex(0) || record.csr.vSew(0), - dataDequeue(parameter.halfDatapathWidth - 1, 0), - crossWriteData(parameter.halfDatapathWidth - 1, 0) - ) - } - }.otherwise { - Stage2crossWriteLSB.foreach { crossWriteData => - crossWriteData := - Mux( - record.csr.vSew(0), - dataDequeue(parameter.datapathWidth - 1, parameter.halfDatapathWidth), - Mux( - executionRecord.executeIndex(0), - dataDequeue(parameter.halfDatapathWidth - 1, 0), - crossWriteData(parameter.datapathWidth - 1, parameter.halfDatapathWidth) - ) - ) ## Mux( - !executionRecord.executeIndex(0) || record.csr.vSew(0), - dataDequeue(parameter.halfDatapathWidth - 1, 0), - crossWriteData(parameter.halfDatapathWidth - 1, 0) - ) - } - } - } - } - - // update mask result - if (isLastSlot) { - val current1HInGroup = Mux1H( - vSew1H(2, 0), - Seq( - // 32bit, 4 bit per data group, it will had 8 data groups -> executeIndex1H << 4 * groupCounter(2, 0) - executeIndex1H << (executionRecord.groupCounter(2, 0) ## 0.U(2.W)), - // 2 bit per data group, it will had 16 data groups -> executeIndex1H << 2 * groupCounter(3, 0) - (executionRecord.executeIndex(1) ## !executionRecord.executeIndex(1)) << - (executionRecord.groupCounter(3, 0) ## false.B), - // 1 bit per data group, it will had 32 data groups -> executeIndex1H << 1 * groupCounter(4, 0) - 1.U << executionRecord.groupCounter(4, 0) - ) - ).asUInt - - /** update value for [[maskFormatResultUpdate]], - * it comes from ALU. - */ - val elementMaskFormatResult: UInt = Mux(responseVec(index).bits.adderMaskResp , current1HInGroup, 0.U) - - /** update value for [[maskFormatResultForGroup]] */ - val maskFormatResultUpdate: UInt = maskFormatResultForGroup.get | elementMaskFormatResult - - // update `maskFormatResultForGroup` - when(executeDequeueFireForSlot || updateMaskResult.get) { - maskFormatResultForGroup.foreach(_ := Mux(executeDequeueFireForSlot, maskFormatResultUpdate, 0.U)) - } - // masked element don't update 'reduceResult' - val updateReduceResult = (maskNotMaskedElement || maskAsInput) && executeDequeueFireForSlot - // update `reduceResult` - when( updateReduceResult || updateMaskResult.get) { - reduceResult := Mux(updateReduceResult && decodeResult(Decoder.red), dataDequeue, 0.U) - } - } + if (!isLastSlot) { + stage3.enqueue.bits := DontCare } - - // --- stage 2 end & stage 3 start --- - // Since top has only one mask processing unit, - // all instructions that interact with top are placed in a single slot - - val s3Valid = valid2 && s2ExecuteOver - val s3Ready = Wire(Bool()) - val s3Fire = s3Valid && s3Ready - // Used to update valid3 without writing vrf - val s3DequeueFire: Option[Bool] = Option.when(isLastSlot)(Wire(Bool())) - val valid3: Option[Bool] = Option.when(isLastSlot)(RegInit(0.U(false.B))) - // use for cross-lane write - val groupCounterInStage3: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(7.W))) - val maskInStage3: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(4.W))) - val executionResultInStage3 = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) - val pipeDataInStage3 = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) - // result for vfirst type instruction - val ffoIndexRegInStage3 = Option.when(isLastSlot)(RegInit(0.U(log2Ceil(parameter.vLen / 8).W))) - // pipe vd for ff0 - val pipeVDInStage3: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) - updateMaskResult.foreach(_ := s3Fire && !sSendResponseInStage2.get) - // cross write result - val Stage3crossWriteLSB = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) - val Stage3crossWriteMSB = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) - - // cross write state - /** schedule cross lane write LSB */ - val sCrossWriteLSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) - - /** schedule cross lane write MSB */ - val sCrossWriteMSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) - - // data for response to scheduler - val schedulerResponseData: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) - - // state for response to scheduler - /** schedule send [[LaneResponse]] to scheduler */ - val sSendResponse: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) - - /** wait scheduler send [[LaneResponseFeedback]] */ - val wResponseFeedback: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) - - val vrfWriteVundle: VRFWriteRequest = new VRFWriteRequest( - parameter.vrfParam.regNumBits, - parameter.vrfOffsetBits, - parameter.instructionIndexBits, - parameter.datapathWidth - ) - - val vrfWriteQueue: Queue[VRFWriteRequest] = - Module(new Queue(vrfWriteVundle, entries = 1, pipe = false, flow = false)) - valid3.foreach {data => when(s3DequeueFire.get ^ s3Fire) { data := s3Fire }} - - /** Write queue ready or not need to write. */ - val vrfWriteReady: Bool = vrfWriteQueue.io.enq.ready || decodeResult(Decoder.sWrite) - + stage3.state := laneState + stage3.enqueue.bits.groupCounter := stage2.dequeue.bits.groupCounter + stage3.enqueue.bits.mask := stage2.dequeue.bits.mask if (isLastSlot) { - // VRF cross write - /** execute in ALU, try to send cross lane write LSB data to ring */ - val tryCrossWriteSendLSB = valid3.get && !sCrossWriteLSB.get - - /** execute in ALU, try to send cross lane write MSB data to ring */ - val tryCrossWriteSendMSB = valid3.get && !sCrossWriteMSB.get - crossLaneWrite.bits.sinkIndex := laneIndex(parameter.laneNumberBits - 2, 0) ## (!tryCrossWriteSendLSB) - crossLaneWrite.bits.sourceIndex := laneIndex - crossLaneWrite.bits.isTail := laneIndex(parameter.laneNumberBits - 1) - crossLaneWrite.bits.instructionIndex := record.laneRequest.instructionIndex - crossLaneWrite.bits.counter := groupCounterInStage3.get - crossLaneWrite.bits.data := Mux(tryCrossWriteSendLSB, Stage3crossWriteLSB.get, Stage3crossWriteMSB.get) - crossLaneWrite.bits.mask := Mux(tryCrossWriteSendLSB, maskInStage3.get(1, 0), maskInStage3.get(3, 2)) - crossLaneWrite.valid := tryCrossWriteSendLSB || tryCrossWriteSendMSB - - when(crossLaneWriteReady && crossLaneWrite.valid) { - sCrossWriteLSB.foreach(_ := true.B) - when(sCrossWriteLSB.get) { - sCrossWriteMSB.foreach(_ := true.B) - } - } - // scheduler synchronization - val schedulerFinish: Bool = (sSendResponse ++ wResponseFeedback).reduce(_ && _) - - // mask request - laneResponse.valid := valid3.get && !sSendResponse.get - laneResponse.bits.data := Mux(decodeResult(Decoder.ffo), ffoIndexRegInStage3.get, pipeDataInStage3.get) - laneResponse.bits.toLSU := record.laneRequest.loadStore - laneResponse.bits.instructionIndex := record.laneRequest.instructionIndex - laneResponse.bits.ffoSuccess := record.selfCompleted - - sSendResponse.foreach(state => when(laneResponse.valid) { state := true.B}) - wResponseFeedback.foreach(state => when(laneResponseFeedback.valid) { state := true.B}) - + stage3.enqueue.bits.sSendResponse := stage2.dequeue.bits.sSendResponse.get + stage3.enqueue.bits.ffoSuccess := executionUnit.dequeue.bits.ffoSuccess.get + } + stage3.enqueue.bits.data := executionUnit.dequeue.bits.data + stage3.enqueue.bits.pipeData := stage2.dequeue.bits.pipeData.getOrElse(DontCare) + stage3.enqueue.bits.ffoIndex := executionUnit.dequeue.bits.ffoIndex + executionUnit.dequeue.bits.crossWriteData.foreach(data => stage3.enqueue.bits.crossWriteData := data) + stage2.dequeue.bits.sSendResponse.foreach(_ => stage3.enqueue.bits.sSendResponse := _) + executionUnit.dequeue.bits.ffoSuccess.foreach(_ => stage3.enqueue.bits.ffoSuccess := _) + + if (isLastSlot){ when(laneResponseFeedback.valid && slotOccupied(index)) { - when(laneResponseFeedback.bits.complete) { record.ffoByOtherLanes := true.B } - assert(laneResponseFeedback.bits.instructionIndex === record.laneRequest.instructionIndex) - } - - // enqueue write for last slot - vrfWriteQueue.io.enq.valid := valid3.get && schedulerFinish && !decodeResult(Decoder.sWrite) - - // UInt(5.W) + UInt(3.W), use `+` here - vrfWriteQueue.io.enq.bits.vd := record.laneRequest.vd + groupCounterInStage3.get( - parameter.groupNumberBits - 1, - parameter.vrfOffsetBits - ) - - vrfWriteQueue.io.enq.bits.offset := groupCounterInStage3.get - - /** what will write into vrf when ffo type instruction finished by other lanes */ - val completeWrite: UInt = Mux(record.laneRequest.mask, (~pipeDataInStage3.get).asUInt & pipeVDInStage3.get, 0.U) - vrfWriteQueue.io.enq.bits.data := Mux( - decodeResult(Decoder.nr), - pipeDataInStage3.get, - Mux( - record.ffoByOtherLanes, - completeWrite, - executionResultInStage3.get - ) - ) - vrfWriteQueue.io.enq.bits.last := DontCare - vrfWriteQueue.io.enq.bits.instructionIndex := record.laneRequest.instructionIndex - vrfWriteQueue.io.enq.bits.mask := maskInStage3.get - - // Handshake - /** Cross-lane writing is over */ - val CrossLaneWriteOver: Bool = (sCrossWriteLSB ++ sCrossWriteMSB).reduce(_ && _) - - s3Ready := !valid3.get || (CrossLaneWriteOver && schedulerFinish && vrfWriteReady) - s3DequeueFire.foreach(_ := valid3.get && CrossLaneWriteOver && schedulerFinish && vrfWriteReady) - - //Update the registers of stage3 - when(s3Fire) { - groupCounterInStage3.foreach(_ := executionQueue.io.deq.bits.groupCounter) - maskInStage3.foreach(_ := executionQueue.io.deq.bits.mask) - executionResultInStage3.foreach(_ := executionResult) - // todo: update maskFormatResult & reduceResult - pipeDataInStage3.foreach(_ := Mux( - decodeResult(Decoder.maskDestination), - maskFormatResultForGroup.get, - Mux( - decodeResult(Decoder.red), - reduceResult, - executionQueue.io.deq.bits.pipeData.get - ) - )) - ffoIndexRegInStage3.foreach(_ := ffoIndexReg) - pipeVDInStage3.foreach(_ := executionQueue.io.deq.bits.pipeVD.get) - // cross write data - Stage3crossWriteLSB.foreach(_ := Stage2crossWriteLSB.get) - Stage3crossWriteMSB.foreach(_ := Stage2crossWriteMSB.get) - // init state - (sCrossWriteLSB ++ sCrossWriteMSB).foreach(_ := !decodeResult(Decoder.crossWrite)) - // todo: save mask destination result if needSendResponse at stage 2? - (sSendResponse ++ wResponseFeedback).foreach( - _ := decodeResult(Decoder.scheduler) || sSendResponseInStage2.get - ) - - // save scheduler data, todo: select result when update 'executionResultInStage3' - schedulerResponseData.foreach { data => - data := Mux( - record.laneRequest.decodeResult(Decoder.maskDestination), - maskFormatResultForGroup.get, - executionResultInStage3.get - ) + when(laneResponseFeedback.bits.complete) { + record.ffoByOtherLanes := true.B } - - ffoSuccessImStage2.foreach(record.selfCompleted := _) + } + when(stage3.enqueue.fire) { + executionUnit.dequeue.bits.ffoSuccess.foreach(record.selfCompleted := _) // This group found means the next group ended early record.ffoByOtherLanes := record.ffoByOtherLanes || record.selfCompleted } - } else { - // Normal will be one level less - vrfWriteQueue.io.enq.valid := s3Fire - - // UInt(5.W) + UInt(3.W), use `+` here - vrfWriteQueue.io.enq.bits.vd := record.laneRequest.vd + executionQueue.io.deq.bits.groupCounter( - parameter.groupNumberBits - 1, - parameter.vrfOffsetBits - ) - - vrfWriteQueue.io.enq.bits.offset := executionQueue.io.deq.bits.groupCounter - - vrfWriteQueue.io.enq.bits.data := executionResult - vrfWriteQueue.io.enq.bits.last := DontCare - vrfWriteQueue.io.enq.bits.instructionIndex := record.laneRequest.instructionIndex - vrfWriteQueue.io.enq.bits.mask := executionQueue.io.deq.bits.mask + crossLaneWrite.valid := stage3.crossWritePort.get.valid + crossLaneWrite.bits := stage3.crossWritePort.get.bits + stage3.crossWritePort.get.ready := crossLaneWriteReady - // Handshake - s3Ready := vrfWriteQueue.io.enq.ready + laneResponse <> stage3.laneResponse.get + stage3.laneResponseFeedback.get <> laneResponseFeedback } - s2Ready := !valid2 || (s2ExecuteOver && s3Ready && executionQueue.io.enq.ready) - when(s2Fire ^ s3Fire) {valid2 := s2Fire} - // s2 enqueue valid & s2 all ready except executionQueue - executionQueue.io.enq.valid := s2Valid && ((s2ExecuteOver && s3Ready) || !valid2) - executionQueue.io.deq.ready := s3Ready && s2ExecuteOver // --- stage 3 end & stage 4 start --- // vrfWriteQueue try to write vrf - vrfWriteArbiter(index).valid := vrfWriteQueue.io.deq.valid - vrfWriteArbiter(index).bits := vrfWriteQueue.io.deq.bits - vrfWriteQueue.io.deq.ready := vrfWriteFire(index) + vrfWriteArbiter(index).valid := stage3.vrfWriteRequest.valid + vrfWriteArbiter(index).bits := stage3.vrfWriteRequest.bits + stage3.vrfWriteRequest.ready := vrfWriteFire(index) - pipeClear := !(Seq(valid0, valid1, valid2, vrfWriteQueue.io.deq.valid) ++ valid3).reduce(_ || _) + pipeClear := !Seq(stage0.stageValid, stage1.stageValid, stage2.stageValid, stage3.stageValid).reduce(_ || _) } // Read Ring @@ -1641,17 +742,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // VFU // TODO: reuse logic, adder, multiplier datapath { - /** - * /** enqueue valid for execution unit */ - * val executeEnqueueValid: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool())) - * - * /** request from slot to vfu. */ - * val requestVec: Vec[LaneRequestToVFU] = Wire(Vec(parameter.chainingSize, new LaneRequestToVFU(parameter))) - * - * /** enqueue fire signal for execution unit */ - * val executeEnqueueFire: UInt = Wire(UInt(parameter.chainingSize.W)) - * - * */ val decodeResultVec: Seq[DecodeBundle] = slotControl.map(_.laneRequest.decodeResult) vfu.vfuConnect(parameter.vfuInstantiateParameter)( diff --git a/v/src/laneStage/LaneExecutionBridge.scala b/v/src/laneStage/LaneExecutionBridge.scala new file mode 100644 index 000000000..049f93342 --- /dev/null +++ b/v/src/laneStage/LaneExecutionBridge.scala @@ -0,0 +1,472 @@ +package v + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.decode.DecodeBundle + +class LaneExecuteRequest(parameter: LaneParameter, isLastSlot: Boolean) extends Bundle { + val src: Vec[UInt] = Vec(3, UInt(parameter.datapathWidth.W)) + val crossReadSource: Option[UInt] = Option.when(isLastSlot)(UInt((parameter.datapathWidth * 2).W)) + val bordersForMaskLogic: Bool = Bool() + val mask: UInt = UInt((parameter.datapathWidth / 8).W) + val groupCounter: UInt = UInt(parameter.groupNumberBits.W) + val sSendResponse: Option[Bool] = Option.when(isLastSlot)(Bool()) +} + +class LaneExecuteResponse(parameter: LaneParameter, isLastSlot: Boolean) extends Bundle { + val data: UInt = UInt(parameter.datapathWidth.W) + val ffoIndex: UInt = UInt(log2Ceil(parameter.vLen / 8).W) + val crossWriteData: Option[Vec[UInt]] = Option.when(isLastSlot)(Vec(2, UInt(parameter.datapathWidth.W))) + val ffoSuccess: Option[Bool] = Option.when(isLastSlot)(Bool()) +} + +class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean) extends Module { + // request from lane slot + val enqueue: DecoupledIO[LaneExecuteRequest] = IO(Flipped(Decoupled(new LaneExecuteRequest(parameter, isLastSlot)))) + // request from lane slot + val dequeue: DecoupledIO[LaneExecuteResponse] = IO(Decoupled(new LaneExecuteResponse(parameter, isLastSlot))) + // request to vfu + val vfuRequest: DecoupledIO[SlotRequestToVFU] = IO(Decoupled(new SlotRequestToVFU(parameter))) + // response from vfu + val dataResponse: ValidIO[VFUResponseToSlot] = IO(Flipped(Valid(new VFUResponseToSlot(parameter)))) + val state: LaneState = IO(Input(new LaneState(parameter))) + val ffoByOtherLanes: Bool = IO(Input(Bool())) + val selfCompleted: Bool = IO(Input(Bool())) + + val decodeResult: DecodeBundle = state.decodeResult + + val executionRecord: ExecutionUnitRecord = RegInit(0.U.asTypeOf(new ExecutionUnitRecord(parameter)(isLastSlot))) + + val executeIndex1H: UInt = UIntToOH(executionRecord.executeIndex) + + // ffo success in current data group? + val ffoSuccess: Option[Bool] = Option.when(isLastSlot)(RegInit(false.B)) + /** result of reduce instruction. */ + val reduceResult: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) + // execution result from execute unit + val executionResult = RegInit(0.U(parameter.datapathWidth.W)) + // todo: only slot 0? + val ffoIndexReg: UInt = RegInit(0.U(log2Ceil(parameter.vLen / 8).W)) + val crossWriteMSB: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) + val crossWriteLSB: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) + + /** mask format result for current `mask group` */ + val maskFormatResultForGroup: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(parameter.maskGroupWidth.W))) + + // state register + val stageValidReg: Bool = RegInit(false.B) + val sSendExecuteRequest: Bool = RegInit(true.B) + val wExecuteResult: Bool = RegInit(true.B) + val executeRequestStateValid: Bool = !sSendExecuteRequest + val executeOver: Bool = sSendExecuteRequest && wExecuteResult + dequeue.valid := executeOver && stageValidReg + when(enqueue.fire ^ dequeue.fire) { + stageValidReg := enqueue.fire + } + enqueue.ready := !stageValidReg || (executeOver && dequeue.ready) + + when(enqueue.fire) { + executionRecord.crossReadVS2 := decodeResult(Decoder.crossRead) && !decodeResult(Decoder.vwmacc) + executionRecord.bordersForMaskLogic := enqueue.bits.bordersForMaskLogic + executionRecord.mask := enqueue.bits.mask + executionRecord.source := enqueue.bits.src + executionRecord.crossReadSource.foreach(_ := enqueue.bits.crossReadSource.get) + executionRecord.sSendResponse.foreach(_ := enqueue.bits.sSendResponse.get) + executionRecord.groupCounter := enqueue.bits.groupCounter + sSendExecuteRequest := decodeResult(Decoder.dontNeedExecuteInLane) + wExecuteResult := decodeResult(Decoder.dontNeedExecuteInLane) + ffoSuccess.foreach(_ := false.B) + } + + /** the byte-level mask of current execution. + * sew match: + * 0: + * executeIndex match: + * 0: 0001 + * 1: 0010 + * 2: 0100 + * 3: 1000 + * 1: + * executeIndex(0) match: + * 0: 0011 + * 1: 1100 + * 2: + * 1111 + */ + val byteMaskForExecution = Mux1H( + state.vSew1H(2, 0), + Seq( + executeIndex1H, + executionRecord.executeIndex(1) ## executionRecord.executeIndex(1) ## + !executionRecord.executeIndex(1) ## !executionRecord.executeIndex(1), + 15.U(4.W) + ) + ) + + /** the bit-level mask of current execution. */ + val bitMaskForExecution: UInt = FillInterleaved(8, byteMaskForExecution) + + def CollapseOperand(data: UInt, enable: Bool = true.B, sign: Bool = false.B): UInt = { + val dataMasked: UInt = data & bitMaskForExecution + val select: UInt = Mux(enable, state.vSew1H(2, 0), 4.U(3.W)) + // when sew = 0 + val collapse0 = Seq.tabulate(4)(i => dataMasked(8 * i + 7, 8 * i)).reduce(_ | _) + // when sew = 1 + val collapse1 = Seq.tabulate(2)(i => dataMasked(16 * i + 15, 16 * i)).reduce(_ | _) + Mux1H( + select, + Seq( + Fill(25, sign && collapse0(7)) ## collapse0, + Fill(17, sign && collapse1(15)) ## collapse1, + (sign && data(31)) ## data + ) + ) + } + + // 有2 * sew 的操作数需要折叠 + def CollapseDoubleOperand(sign: Bool = false.B): UInt = { + val doubleBitEnable = FillInterleaved(16, byteMaskForExecution) + val doubleDataMasked: UInt = executionRecord.crossReadSource.get & doubleBitEnable + val select: UInt = state.vSew1H(1, 0) + // when sew = 0 + val collapse0 = Seq.tabulate(4)(i => doubleDataMasked(16 * i + 15, 16 * i)).reduce(_ | _) + // when sew = 1 + val collapse1 = Seq.tabulate(2)(i => doubleDataMasked(32 * i + 31, 32 * i)).reduce(_ | _) + Mux1H( + select, + Seq( + Fill(16, sign && collapse0(15)) ## collapse0, + collapse1 + ) + ) + } + + /** collapse the dual SEW size operand for cross read. + * it can be vd or src2. + */ + val doubleCollapse = Option.when(isLastSlot)(CollapseDoubleOperand(!decodeResult(Decoder.unsigned1))) + + /** src1 for the execution + * src1 has three types: V, I, X. + * only V type need to use [[CollapseOperand]] + */ + val finalSource1 = CollapseOperand( + // A will be updated every time it is executed, so you can only choose here + Mux( + decodeResult(Decoder.red) && !decodeResult(Decoder.maskLogic), + reduceResult.getOrElse(0.U), + executionRecord.source.head + ), + decodeResult(Decoder.vtype) && (!decodeResult(Decoder.red) || decodeResult(Decoder.maskLogic)), + !decodeResult(Decoder.unsigned0) + ) + + /** src2 for the execution, + * need to take care of cross read. + */ + val finalSource2 = if (isLastSlot) { + Mux( + executionRecord.crossReadVS2, + doubleCollapse.get, + CollapseOperand(executionRecord.source(1), true.B, !decodeResult(Decoder.unsigned1)) + ) + } else { + CollapseOperand(executionRecord.source(1), true.B, !decodeResult(Decoder.unsigned1)) + } + + /** source3 有两种:adc & ma, c等处理mask的时候再处理 + * two types of source3: + * - multiplier accumulate + * - the third input of add with carry + * + * this line only handle the first type. + */ + val finalSource3: UInt = if (isLastSlot) { + Mux( + decodeResult(Decoder.vwmacc), + doubleCollapse.get, + CollapseOperand(executionRecord.source(2)) + ) + } else { + CollapseOperand(executionRecord.source(2)) + } + + val maskAsInput = Mux1H( + state.vSew1H(2, 0), + Seq( + (UIntToOH(executionRecord.executeIndex) & executionRecord.mask).orR, + Mux(executionRecord.executeIndex(1), executionRecord.mask(1), executionRecord.mask(0)), + executionRecord.mask(0) + ) + ) + + /** use mask to fix the case that `vl` is not in the multiple of [[parameter.datapathWidth]]. + * it will fill the LSB of mask to `0`, mask it to not execute those elements. + */ + val lastGroupMask = scanRightOr(UIntToOH(state.csr.vl(parameter.datapathWidthBits - 1, 0))) >> 1 + + val fullMask: UInt = (-1.S(parameter.datapathWidth.W)).asUInt + /** if [[executionRecord.bordersForMaskLogic]], + * use [[lastGroupMask]] to mask the result otherwise use [[fullMask]]. */ + val maskCorrect = Mux(executionRecord.bordersForMaskLogic, lastGroupMask, fullMask) + + vfuRequest.bits.src := VecInit(Seq(finalSource1, finalSource2, finalSource3, maskCorrect)) + vfuRequest.bits.opcode := decodeResult(Decoder.uop) + vfuRequest.bits.mask := Mux( + decodeResult(Decoder.adder), + maskAsInput && decodeResult(Decoder.maskSource), + maskAsInput || !state.maskType + ) + vfuRequest.bits.sign := !decodeResult(Decoder.unsigned1) + vfuRequest.bits.reverse := decodeResult(Decoder.reverse) + vfuRequest.bits.average := decodeResult(Decoder.average) + vfuRequest.bits.saturate := decodeResult(Decoder.saturate) + vfuRequest.bits.vxrm := state.csr.vxrm + vfuRequest.bits.vSew := state.csr.vSew + vfuRequest.bits.shifterSize := Mux1H( + Mux(executionRecord.crossReadVS2, state.vSew1H(1, 0), state.vSew1H(2, 1)), + Seq(false.B ## finalSource1(3), finalSource1(4, 3)) + ) ## finalSource1(2, 0) + vfuRequest.bits.rem := decodeResult(Decoder.uop)(0) + vfuRequest.bits.executeIndex := executionRecord.executeIndex + vfuRequest.bits.popInit := reduceResult.getOrElse(0.U) + vfuRequest.bits.groupIndex := executionRecord.groupCounter + vfuRequest.bits.laneIndex := state.laneIndex + vfuRequest.bits.complete := ffoByOtherLanes || selfCompleted + vfuRequest.bits.maskType := state.maskType + vfuRequest.bits.unitSelet.foreach(_ := decodeResult(Decoder.fpExecutionType)) + vfuRequest.bits.floatMul.foreach(_ := decodeResult(Decoder.floatMul)) + + // from float csr + vfuRequest.bits.roundingMode.foreach(_ := state.csr.vxrm) + + vfuRequest.valid := executeRequestStateValid + + /** select from VFU, send to [[executionResult]], [[crossWriteLSB]], [[crossWriteMSB]]. */ + val dataDequeue = dataResponse.bits.data + + val executeRequestFire: Bool = vfuRequest.fire + + val executeResponseFire: Bool = Mux(decodeResult(Decoder.multiCycle), dataResponse.valid, executeRequestFire) + + // mask reg for filtering + val maskForFilter = FillInterleaved(4, state.maskNotMaskedElement) | + Mux(enqueue.fire, enqueue.bits.mask, executionRecord.mask) + // current one hot depends on execute index + val currentOHForExecuteGroup: UInt = UIntToOH(executionRecord.executeIndex) + // Remaining to be requested + val remainder: UInt = maskForFilter & (~scanRightOr(currentOHForExecuteGroup)).asUInt + // Finds the first unfiltered execution. + val nextIndex1H: UInt = ffo(remainder) + + // There are no more left. + val isLastRequestForThisGroup: Bool = + Mux1H(state.vSew1H, Seq(!remainder.orR, !remainder(1, 0).orR, true.B)) + + /** the next index to execute. + * + * @note Requests into this disguised execution unit are not executed on the spot + * */ + val nextExecuteIndex: UInt = Mux1H( + state.vSew1H(1, 0), + Seq( + OHToUInt(nextIndex1H), + // Mux(remainder(0), 0.U, 2.U) + !remainder(0) ## false.B + ) + ) + + // next execute index if data group change + val nextExecuteIndexForNextGroup: UInt = Mux1H( + state.vSew1H(1, 0), + Seq( + OHToUInt(ffo(maskForFilter)), + !maskForFilter(0) ## false.B, + ) + ) + + // update execute index + when(executeRequestFire || enqueue.fire) { + executionRecord.executeIndex := Mux(enqueue.fire, nextExecuteIndexForNextGroup, nextExecuteIndex) + } + + when(executeRequestFire && isLastRequestForThisGroup) { + sSendExecuteRequest := true.B + } + + // execute response finish + val responseFinish: Bool = Mux( + decodeResult(Decoder.multiCycle), + executeResponseFire && sSendExecuteRequest, + executeRequestFire && isLastRequestForThisGroup + ) + + when(responseFinish) { + wExecuteResult := true.B + } + + val multiCycleWriteIndexLatch: UInt = + RegEnable(dataResponse.bits.executeIndex, 0.U(2.W), dataResponse.valid) + val multiCycleWriteIndex = Mux(dataResponse.valid, dataResponse.bits.executeIndex, multiCycleWriteIndexLatch) + /** the index to write to VRF in [[parameter.dataPathByteWidth]]. + * for long latency pipe, the index will follow the pipeline. + */ + val writeIndex = Mux( + decodeResult(Decoder.multiCycle), + multiCycleWriteIndex, + executionRecord.executeIndex + ) + + val writeIndex1H = UIntToOH(writeIndex) + + /** VRF byte level mask */ + val writeMaskInByte = Mux1H( + state.vSew1H(2, 0), + Seq( + writeIndex1H, + writeIndex(1) ## writeIndex(1) ## !writeIndex(1) ## !writeIndex(1), + "b1111".U(4.W) + ) + ) + + /** VRF bit level mask */ + val writeMaskInBit: UInt = FillInterleaved(8, writeMaskInByte) + + /** output of execution unit need to align to VRF in bit level(used in dynamic shift) + * TODO: fix me + */ + val dataOffset: UInt = writeIndex ## 0.U(3.W) + + // TODO: this is a dynamic shift logic, but if we switch to parallel execution unit, we don't need it anymore. + val executeResult = (dataDequeue << dataOffset).asUInt(parameter.datapathWidth - 1, 0) + + // execute 1,2,4 times based on SEW, only write VRF when 32 bits is ready. + val resultUpdate: UInt = (executeResult & writeMaskInBit) | (executionResult & (~writeMaskInBit).asUInt) + + // update execute result + when(executeResponseFire) { + // update the [[executionResult]] + executionResult := resultUpdate + + // the find first one instruction is finished in this lane + ffoSuccess.foreach(_ := dataResponse.bits.ffoSuccess) + when(dataResponse.bits.ffoSuccess && !selfCompleted) { + ffoIndexReg := executionRecord.groupCounter ## Mux1H( + state.vSew1H, + Seq( + executionRecord.executeIndex ## dataResponse.bits.data(2, 0), + executionRecord.executeIndex(1) ## dataResponse.bits.data(3, 0), + dataResponse.bits.data(4, 0) + ) + ) + } + + // update cross-lane write data + /** sew: + * 0: + * executeIndex: + * 0: mask = 0011, head + * 1: mask = 1100, head + * 2: mask = 0011, tail + * 3: mask = 1100, tail + * 1: + * executeIndex: + * 0: mask = 1111, head + * 2: mask = 1111, tail + * + * 2: not valid in SEW = 2 + */ + if (isLastSlot) { + when(executionRecord.executeIndex(1)) { + crossWriteMSB.foreach { crossWriteData => + // update tail + crossWriteData := + Mux( + state.csr.vSew(0), + dataDequeue(parameter.datapathWidth - 1, parameter.halfDatapathWidth), + Mux( + executionRecord.executeIndex(0), + dataDequeue(parameter.halfDatapathWidth - 1, 0), + crossWriteData(parameter.datapathWidth - 1, parameter.halfDatapathWidth) + ) + ) ## Mux( + !executionRecord.executeIndex(0) || state.csr.vSew(0), + dataDequeue(parameter.halfDatapathWidth - 1, 0), + crossWriteData(parameter.halfDatapathWidth - 1, 0) + ) + } + }.otherwise { + crossWriteLSB.foreach { crossWriteData => + crossWriteData := + Mux( + state.csr.vSew(0), + dataDequeue(parameter.datapathWidth - 1, parameter.halfDatapathWidth), + Mux( + executionRecord.executeIndex(0), + dataDequeue(parameter.halfDatapathWidth - 1, 0), + crossWriteData(parameter.datapathWidth - 1, parameter.halfDatapathWidth) + ) + ) ## Mux( + !executionRecord.executeIndex(0) || state.csr.vSew(0), + dataDequeue(parameter.halfDatapathWidth - 1, 0), + crossWriteData(parameter.halfDatapathWidth - 1, 0) + ) + } + } + } + } + + // update mask result + if (isLastSlot) { + val current1HInGroup = Mux1H( + state.vSew1H(2, 0), + Seq( + // 32bit, 4 bit per data group, it will had 8 data groups -> executeIndex1H << 4 * groupCounter(2, 0) + executeIndex1H << (executionRecord.groupCounter(2, 0) ## 0.U(2.W)), + // 2 bit per data group, it will had 16 data groups -> executeIndex1H << 2 * groupCounter(3, 0) + (executionRecord.executeIndex(1) ## !executionRecord.executeIndex(1)) << + (executionRecord.groupCounter(3, 0) ## false.B), + // 1 bit per data group, it will had 32 data groups -> executeIndex1H << 1 * groupCounter(4, 0) + 1.U << executionRecord.groupCounter(4, 0) + ) + ).asUInt + + /** update value for [[maskFormatResultUpdate]], + * it comes from ALU. + */ + val elementMaskFormatResult: UInt = Mux(dataResponse.bits.adderMaskResp, current1HInGroup, 0.U) + + /** update value for [[maskFormatResultForGroup]] */ + val maskFormatResultUpdate: UInt = maskFormatResultForGroup.get | elementMaskFormatResult + + val updateMaskResult: Option[Bool] = executionRecord.sSendResponse.map(!_ && dequeue.fire) + + // update `maskFormatResultForGroup` + when(executeResponseFire || updateMaskResult.get) { + maskFormatResultForGroup.foreach(_ := Mux(executeResponseFire, maskFormatResultUpdate, 0.U)) + } + // masked element don't update 'reduceResult' + val updateReduceResult = (state.maskNotMaskedElement || maskAsInput) && executeResponseFire + // update `reduceResult` + when(updateReduceResult || updateMaskResult.get) { + reduceResult.get := Mux(updateReduceResult && decodeResult(Decoder.red), dataDequeue, 0.U) + } + } + + if (isLastSlot) { + dequeue.bits.data := Mux( + decodeResult(Decoder.maskDestination), + maskFormatResultForGroup.get, + Mux( + decodeResult(Decoder.red), + reduceResult.get, + executionResult + ) + ) + } else { + dequeue.bits.data := executionResult + } + dequeue.bits.ffoIndex := ffoIndexReg + dequeue.bits.crossWriteData.foreach(_ := VecInit((crossWriteLSB ++ crossWriteMSB).toSeq)) + dequeue.bits.ffoSuccess.foreach(_ := ffoSuccess.get) +} diff --git a/v/src/laneStage/LaneStage.scala b/v/src/laneStage/LaneStage.scala new file mode 100644 index 000000000..baf2b1ca2 --- /dev/null +++ b/v/src/laneStage/LaneStage.scala @@ -0,0 +1,51 @@ +package v + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.decode.DecodeBundle + +class LaneState(parameter: LaneParameter) extends Bundle { + val vSew1H: UInt = UInt(3.W) + val loadStore: Bool = Bool() + val laneIndex: UInt = UInt(parameter.laneNumberBits.W) + val decodeResult: DecodeBundle = Decoder.bundle(parameter.fpuEnable) + /** which group is the last group for instruction. */ + val lastGroupForInstruction: UInt = UInt(parameter.groupNumberBits.W) + val instructionFinished: Bool = Bool() + val csr: CSRInterface = new CSRInterface(parameter.vlMaxBits) + // vm = 0 + val maskType: Bool = Bool() + val maskNotMaskedElement: Bool = Bool() + val maskForMaskGroup: UInt = UInt(parameter.datapathWidth.W) + val mask: ValidIO[UInt] = Valid(UInt(parameter.datapathWidth.W)) + val ffoByOtherLanes: Bool = Bool() + + /** vs1 or imm */ + val vs1: UInt = UInt(5.W) + + /** vs2 or rs2 */ + val vs2: UInt = UInt(5.W) + + /** vd or rd */ + val vd: UInt = UInt(5.W) + + val instructionIndex: UInt = UInt(parameter.instructionIndexBits.W) +} + +abstract class LaneStage[A <: Data, B <:Data](pipe: Boolean)(input: A, output: B) extends Module{ + val enqueue: DecoupledIO[A] = IO(Flipped(Decoupled(input))) + val dequeue: DecoupledIO[B] = IO(Decoupled(output)) + val stageValid = IO(Output(Bool())) + val stageFinish: Bool = WireDefault(true.B) + val stageValidReg: Bool = RegInit(false.B) + dontTouch(enqueue) + dontTouch(dequeue) + if(pipe) { + enqueue.ready := !stageValidReg || (dequeue.ready && stageFinish) + } else { + enqueue.ready := !stageValidReg + } + + dequeue.valid := stageValidReg && stageFinish + stageValid := stageValidReg +} diff --git a/v/src/laneStage/LaneStage0.scala b/v/src/laneStage/LaneStage0.scala new file mode 100644 index 000000000..b4b555c98 --- /dev/null +++ b/v/src/laneStage/LaneStage0.scala @@ -0,0 +1,133 @@ +package v + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.decode.DecodeBundle + +// stage 0 +class LaneStage0Enqueue(parameter: LaneParameter) extends Bundle { + val maskIndex: UInt = UInt(log2Ceil(parameter.maskGroupWidth).W) + val maskForMaskGroup: UInt = UInt(parameter.datapathWidth.W) + val maskGroupCount: UInt = UInt(parameter.maskGroupSizeBits.W) +} + +class LaneStage0StateUpdate(parameter: LaneParameter) extends Bundle { + val maskGroupCount: UInt = UInt(parameter.maskGroupSizeBits.W) + val maskIndex: UInt = UInt(log2Ceil(parameter.maskGroupWidth).W) + val outOfExecutionRange: Bool = Bool() + val maskExhausted: Bool = Bool() +} + +class LaneStage0Dequeue(parameter: LaneParameter, isLastSlot: Boolean) extends Bundle { + val mask: UInt = UInt((parameter.datapathWidth/8).W) + val sSendResponse: Option[Bool] = Option.when(isLastSlot)(Bool()) + val groupCounter: UInt = UInt(parameter.groupNumberBits.W) +} + +/** 这一级由 lane slot 里的 maskIndex maskGroupCount 来计算对应的 data group counter + * 同时也会维护指令的结束与mask的更新 + */ +class LaneStage0(parameter: LaneParameter, isLastSlot: Boolean) extends + LaneStage(true)( + new LaneStage0Enqueue(parameter), + new LaneStage0Dequeue(parameter, isLastSlot) + ) { + val state: LaneState = IO(Input(new LaneState(parameter))) + val updateLaneState: LaneStage0StateUpdate = IO(Output(new LaneStage0StateUpdate(parameter))) + + // 超出范围的一组不压到流水里面去 + val enqFire: Bool = enqueue.fire && !updateLaneState.outOfExecutionRange + val stageWire: LaneStage0Dequeue = Wire(new LaneStage0Dequeue(parameter, isLastSlot)) + val stageDataReg: Data = RegEnable(stageWire, 0.U.asTypeOf(stageWire), enqFire) + val filterVec: Seq[(Bool, UInt)] = Seq(0, 1, 2).map { filterSew => + // The lower 'dataGroupIndexSize' bits represent the offsets in the data group + val dataGroupIndexSize: Int = 2 - filterSew + // each group has '2 ** dataGroupIndexSize' elements + val dataGroupSize = 1 << dataGroupIndexSize + // The data group index of last data group + val groupIndex = (enqueue.bits.maskIndex >> dataGroupIndexSize).asUInt + // Filtering data groups + val groupFilter: UInt = scanLeftOr(UIntToOH(groupIndex)) ## false.B + // Whether there are element in the data group that have not been masked + // TODO: use 'record.maskGroupedOrR' & update it + val maskForDataGroup: UInt = + VecInit(state.maskForMaskGroup.asBools.grouped(dataGroupSize).map(_.reduce(_ || _)).toSeq).asUInt + val groupFilterByMask = maskForDataGroup & groupFilter + // ffo next group + val nextDataGroupOH: UInt = ffo(groupFilterByMask) + // This mask group has the next data group to execute + val hasNextDataGroup = nextDataGroupOH.orR + val nextElementBaseIndex: UInt = (OHToUInt(nextDataGroupOH) << dataGroupIndexSize).asUInt + (hasNextDataGroup, nextElementBaseIndex) + } + + /** is there any data left in this group? */ + val nextOrR: Bool = Mux1H(state.vSew1H, filterVec.map(_._1)) + + // mask is exhausted + updateLaneState.maskExhausted := !nextOrR + + /** The mask group will be updated */ + val maskGroupWillUpdate: Bool = state.decodeResult(Decoder.maskLogic) || updateLaneState.maskExhausted + + /** The index of next execute element in whole instruction */ + val elementIndexForInstruction = enqueue.bits.maskGroupCount ## Mux1H( + state.vSew1H, + Seq( + enqueue.bits.maskIndex(parameter.datapathWidthBits - 1, 2) ## state.laneIndex ## enqueue.bits.maskIndex(1, 0), + enqueue.bits.maskIndex(parameter.datapathWidthBits - 1, 1) ## state.laneIndex ## enqueue.bits.maskIndex(0), + enqueue.bits.maskIndex ## state.laneIndex + ) + ) + + /** The next element is out of execution range */ + updateLaneState.outOfExecutionRange := Mux( + state.decodeResult(Decoder.maskLogic), + (enqueue.bits.maskGroupCount > state.lastGroupForInstruction), + elementIndexForInstruction >= state.csr.vl + ) || state.instructionFinished + + /** Encoding of different element lengths: 1, 8, 16, 32 */ + val elementLengthOH = Mux(state.decodeResult(Decoder.maskLogic), 1.U, state.vSew1H(2, 0) ## false.B) + + /** Which group of data will be accessed */ + val dataGroupIndex: UInt = Mux1H( + elementLengthOH, + Seq( + enqueue.bits.maskGroupCount, + enqueue.bits.maskGroupCount ## enqueue.bits.maskIndex(4, 2), + enqueue.bits.maskGroupCount ## enqueue.bits.maskIndex(4, 1), + enqueue.bits.maskGroupCount ## enqueue.bits.maskIndex + ) + ) + + val isTheLastGroup = dataGroupIndex === state.lastGroupForInstruction + + stageWire.mask := (state.mask.bits >> enqueue.bits.maskIndex).asUInt(3, 0) + + /** The index of next element in this mask group.(0-31) */ + updateLaneState.maskIndex := Mux( + state.decodeResult(Decoder.maskLogic), + 0.U, + Mux1H(state.vSew1H, filterVec.map(_._2)) + ) + + stageWire.groupCounter := dataGroupIndex + + /** next mask group */ + updateLaneState.maskGroupCount := enqueue.bits.maskGroupCount + maskGroupWillUpdate + + stageWire.sSendResponse.foreach { data => + data := + !(state.loadStore || + state.decodeResult(Decoder.readOnly) || + (state.decodeResult(Decoder.red) && isTheLastGroup) || + (state.decodeResult(Decoder.maskDestination) && (maskGroupWillUpdate || isTheLastGroup)) || + state.decodeResult(Decoder.ffo)) + } + when(enqFire ^ dequeue.fire) { + stageValidReg := enqFire + } + + dequeue.bits := stageDataReg +} diff --git a/v/src/laneStage/LaneStage1.scala b/v/src/laneStage/LaneStage1.scala new file mode 100644 index 000000000..6583e0aeb --- /dev/null +++ b/v/src/laneStage/LaneStage1.scala @@ -0,0 +1,316 @@ +package v + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.decode.DecodeBundle + +class LaneStage1Enqueue(parameter: LaneParameter, isLastSlot: Boolean) extends Bundle { + val groupCounter: UInt = UInt(parameter.groupNumberBits.W) + val mask: UInt = UInt((parameter.datapathWidth / 8).W) + val sSendResponse: Option[Bool] = Option.when(isLastSlot)(Bool()) +} + +class LaneStage1Dequeue(parameter: LaneParameter, isLastSlot: Boolean) extends Bundle { + /** for dequeue group counter match */ + val readBusDequeueGroup: Option[UInt] = Option.when(isLastSlot)(UInt(parameter.groupNumberBits.W)) + val maskForFilter: UInt = UInt((parameter.datapathWidth / 8).W) + val mask: UInt = UInt((parameter.datapathWidth / 8).W) + val groupCounter: UInt = UInt(parameter.groupNumberBits.W) + val sSendResponse: Option[Bool] = Option.when(isLastSlot)(Bool()) + // read result + val src: Vec[UInt] = Vec(3, UInt(parameter.datapathWidth.W)) + val crossReadSource: Option[UInt] = Option.when(isLastSlot)(UInt((parameter.datapathWidth * 2).W)) +} + +/** 这一个stage 分两级流水, 分别是 读vrf 等vrf结果 + * */ +class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends + LaneStage(true)( + new LaneStage1Enqueue(parameter, isLastSlot), + new LaneStage1Dequeue(parameter, isLastSlot) + ) { + val state: LaneState = IO(Input(new LaneState(parameter))) + val vrfReadRequest: Vec[DecoupledIO[VRFReadRequest]] = IO( + Vec( + 3, + Decoupled( + new VRFReadRequest(parameter.vrfParam.regNumBits, parameter.vrfOffsetBits, parameter.instructionIndexBits) + ) + ) + ) + + /** VRF read result for each slot, + * 3 is for [[source1]] [[source2]] [[source3]] + */ + val vrfReadResult: Vec[UInt] = IO(Input(Vec(3, UInt(parameter.datapathWidth.W)))) + + val readBusDequeue: Option[ValidIO[ReadBusData]] = Option.when(isLastSlot)(IO( + Flipped(Valid(new ReadBusData(parameter: LaneParameter))) + )) + + val readBusRequest: Option[DecoupledIO[ReadBusData]] = + Option.when(isLastSlot)(IO(Decoupled(new ReadBusData(parameter)))) + val readFromScalar: UInt = IO(Input(UInt(parameter.datapathWidth.W))) + + val pipeEnqueue: LaneStage1Enqueue = RegInit(0.U.asTypeOf(enqueue.bits)) + + val maskedWrite: Bool = WireDefault(false.B) + + // read state + /** schedule read src1 */ + val sRead0: Bool = RegInit(true.B) + + /** schedule read src2 */ + val sRead1: Bool = RegInit(true.B) + + /** schedule read vd */ + val sRead2: Bool = RegInit(true.B) + + // pipe read result + val readResult0: UInt = RegInit(0.U(parameter.datapathWidth.W)) + val readResult1: UInt = RegInit(0.U(parameter.datapathWidth.W)) + val readResult2: UInt = RegInit(0.U(parameter.datapathWidth.W)) + + val crossReadLSBReg: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) + val crossReadMSBReg: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) + + val crossReadLSBIn: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) + val crossReadMSBIn: Option[UInt] = Option.when(isLastSlot)(RegInit(0.U(parameter.datapathWidth.W))) + + // state for cross read + /** schedule cross lane read LSB.(access VRF for cross read) */ + val sCrossReadLSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) + + /** schedule cross lane read MSB.(access VRF for cross read) */ + val sCrossReadMSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) + + /** schedule send cross lane read LSB result. */ + val sSendCrossReadResultLSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) + + /** schedule send cross lane read MSB result. */ + val sSendCrossReadResultMSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) + + /** wait for cross lane read LSB result. */ + val wCrossReadLSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) + + /** wait for cross lane read MSB result. */ + val wCrossReadMSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) + + // next for update cross read register + val sReadNext0: Bool = RegNext(sRead0, false.B) + val sReadNext1: Bool = RegNext(sRead1, false.B) + val sReadNext2: Bool = RegNext(sRead2, false.B) + val sCrossReadLSBNext: Option[Bool] = sCrossReadLSB.map(RegNext(_, false.B)) + val sCrossReadMSBNext: Option[Bool] = sCrossReadMSB.map(RegNext(_, false.B)) + + // All read requests sent + val sReadFinish: Bool = sRead0 && sRead1 && sRead2 + // Waiting to read the response + val sReadFinishNext: Bool = sReadNext0 && sReadNext1 && sReadNext2 + // 'sReadFinishNext' may assert at the next cycle of 's1Fire', need sReadFinish + val readFinish: Bool = sReadFinish && sReadFinishNext + stageFinish := (Seq(readFinish) ++ sSendCrossReadResultLSB ++ + sSendCrossReadResultMSB ++ wCrossReadLSB ++ wCrossReadMSB).reduce(_ && _) + + // read vrf + // read port 0 + vrfReadRequest(0).valid := !sRead0 && stageValidReg + vrfReadRequest(0).bits.offset := pipeEnqueue.groupCounter(parameter.vrfOffsetBits - 1, 0) + vrfReadRequest(0).bits.vs := Mux( + // encodings with vm=0 are reserved for mask type logic + state.decodeResult(Decoder.maskLogic) && !state.decodeResult(Decoder.logic), + // read v0 for (15. Vector Mask Instructions) + 0.U, + state.vs1 + pipeEnqueue.groupCounter( + parameter.groupNumberBits - 1, + parameter.vrfOffsetBits + ) + ) + // used for hazard detection + vrfReadRequest(0).bits.instructionIndex := state.instructionIndex + + // read port 1 + if (isLastSlot) { + vrfReadRequest(1).valid := !(sRead1 && sCrossReadLSB.get) && stageValidReg + vrfReadRequest(1).bits.offset := Mux( + sRead1, + // cross lane LSB + pipeEnqueue.groupCounter(parameter.vrfOffsetBits - 2, 0) ## false.B, + // normal read + pipeEnqueue.groupCounter(parameter.vrfOffsetBits - 1, 0) + ) + vrfReadRequest(1).bits.vs := Mux( + state.decodeResult(Decoder.vwmacc) && sRead1, + // cross read vd for vwmacc, since it need dual [[dataPathWidth]], use vs2 port to read LSB part of it. + state.vd, + // read vs2 for other instruction + state.vs2 + ) + Mux( + sRead1, + // cross lane + pipeEnqueue.groupCounter(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1), + // no cross lane + pipeEnqueue.groupCounter(parameter.groupNumberBits - 1, parameter.vrfOffsetBits) + ) + } else { + vrfReadRequest(1).valid := !sRead1 && stageValidReg + vrfReadRequest(1).bits.offset := pipeEnqueue.groupCounter(parameter.vrfOffsetBits - 1, 0) + vrfReadRequest(1).bits.vs := state.vs2 + + pipeEnqueue.groupCounter(parameter.groupNumberBits - 1, parameter.vrfOffsetBits) + } + vrfReadRequest(1).bits.instructionIndex := state.instructionIndex + + // read port 2 + if (isLastSlot) { + vrfReadRequest(2).valid := !(sRead2 && sCrossReadMSB.get) && stageValidReg + vrfReadRequest(2).bits.offset := Mux( + sRead2, + // cross lane MSB + pipeEnqueue.groupCounter(parameter.vrfOffsetBits - 2, 0) ## true.B, + // normal read + pipeEnqueue.groupCounter(parameter.vrfOffsetBits - 1, 0) + ) + vrfReadRequest(2).bits.vs := Mux( + sRead2 && !state.decodeResult(Decoder.vwmacc), + // cross lane access use vs2 + state.vs2, + // normal read vd or cross read vd for vwmacc + state.vd + ) + + Mux( + sRead2, + pipeEnqueue.groupCounter(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1), + pipeEnqueue.groupCounter(parameter.groupNumberBits - 1, parameter.vrfOffsetBits) + ) + } else { + vrfReadRequest(2).valid := !sRead2 && stageValidReg + vrfReadRequest(2).bits.offset := pipeEnqueue.groupCounter(parameter.vrfOffsetBits - 1, 0) + vrfReadRequest(2).bits.vs := state.vd + + pipeEnqueue.groupCounter(parameter.groupNumberBits - 1, parameter.vrfOffsetBits) + } + vrfReadRequest(2).bits.instructionIndex := state.instructionIndex + + val readPortFire0: Bool = vrfReadRequest(0).fire + val readPortFire1: Bool = vrfReadRequest(1).fire + val readPortFire2: Bool = vrfReadRequest(2).fire + // reg next for update result + val readPortFireNext0: Bool = RegNext(readPortFire0, false.B) + val readPortFireNext1: Bool = RegNext(readPortFire1, false.B) + val readPortFireNext2: Bool = RegNext(readPortFire2, false.B) + + // init state + when(enqueue.fire) { + pipeEnqueue := enqueue.bits + sRead0 := !state.decodeResult(Decoder.vtype) + sRead1 := false.B + // todo: mask write need read vd + sRead2 := state.decodeResult(Decoder.sReadVD) + val sCrossRead = !state.decodeResult(Decoder.crossRead) + (sCrossReadLSB ++ sCrossReadMSB ++ sSendCrossReadResultLSB ++ + sSendCrossReadResultMSB ++ wCrossReadLSB ++ wCrossReadMSB).foreach(s => s := sCrossRead) + }.otherwise { + when(readPortFire0) { + sRead0 := true.B + } + // the priority of `sRead1` is higher than `sCrossReadLSB` + when(readPortFire1) { + sRead1 := true.B + sCrossReadLSB.foreach(d => d := sRead1) + } + // the priority of `sRead2` is higher than `sCrossReadMSB` + when(readPortFire2) { + sRead2 := true.B + sCrossReadMSB.foreach(d => d := sRead2) + } + + readBusDequeue.foreach { crossReadDequeue => + when(crossReadDequeue.valid) { + when(crossReadDequeue.bits.isTail) { + wCrossReadMSB.foreach(_ := true.B) + crossReadMSBIn.foreach(_ := crossReadDequeue.bits.data) + }.otherwise { + wCrossReadLSB.foreach(_ := true.B) + crossReadLSBIn.foreach(_ := crossReadDequeue.bits.data) + } + } + } + } + + // update read result register + when(readPortFireNext0) { + readResult0 := vrfReadResult(0) + } + + when(readPortFireNext1) { + if (isLastSlot) { + when(sReadNext1) { + crossReadLSBReg.foreach(d => d := vrfReadResult(1)) + }.otherwise { + readResult1 := vrfReadResult(1) + } + } else { + readResult1 := vrfReadResult(1) + } + } + + when(readPortFireNext2) { + if (isLastSlot) { + when(sReadNext2) { + crossReadMSBReg.foreach(d => d := vrfReadResult(2)) + }.otherwise { + readResult2 := vrfReadResult(2) + } + } else { + readResult2 := vrfReadResult(2) + } + } + + // connect cross read + if (isLastSlot) { + dequeue.bits.readBusDequeueGroup.foreach(d => d := pipeEnqueue.groupCounter) + + val crossLaneRead: DecoupledIO[ReadBusData] = Wire(Decoupled(new ReadBusData(parameter))) + /** The data to be sent is ready + * need sCrossReadLSB since sCrossReadLSBNext may assert after s1fire. + */ + val crossReadDataReadyLSB: Bool = (sCrossReadLSBNext ++ sCrossReadLSB).reduce(_ && _) + val crossReadDataReadyMSB: Bool = (sCrossReadMSBNext ++ sCrossReadMSB).reduce(_ && _) + + /** read data from RF, try to send cross lane read LSB data to ring */ + val tryCrossReadSendLSB: Bool = crossReadDataReadyLSB && !sSendCrossReadResultLSB.get && stageValidReg + + /** read data from RF, try to send cross lane read MSB data to ring */ + val tryCrossReadSendMSB: Bool = crossReadDataReadyMSB && !sSendCrossReadResultMSB.get && stageValidReg + + crossLaneRead.bits.sinkIndex := (!tryCrossReadSendLSB) ## state.laneIndex(parameter.laneNumberBits - 1, 1) + crossLaneRead.bits.isTail := state.laneIndex(0) + crossLaneRead.bits.sourceIndex := state.laneIndex + crossLaneRead.bits.instructionIndex := state.instructionIndex + crossLaneRead.bits.counter := pipeEnqueue.groupCounter + // TODO: use [[record.state.sSendCrossReadResultLSB]] -> MSB may be ready earlier + crossLaneRead.bits.data := Mux(tryCrossReadSendLSB, crossReadLSBReg.get, crossReadMSBReg.get) + crossLaneRead.valid := tryCrossReadSendLSB || tryCrossReadSendMSB + readBusRequest.foreach(_ <> crossLaneRead) + + when(crossLaneRead.fire) { + when(tryCrossReadSendLSB) { + sSendCrossReadResultLSB.foreach(_ := true.B) + }.otherwise { + sSendCrossReadResultMSB.foreach(_ := true.B) + } + } + } + + val source1Select: UInt = Mux(state.decodeResult(Decoder.vtype), readResult0, readFromScalar) + dequeue.bits.mask := pipeEnqueue.mask + dequeue.bits.groupCounter := pipeEnqueue.groupCounter + dequeue.bits.src := VecInit(Seq(source1Select, readResult1, readResult2)) + dequeue.bits.crossReadSource.foreach(_ := crossReadMSBIn.get ## crossReadLSBIn.get) + dequeue.bits.sSendResponse.foreach(_ := pipeEnqueue.sSendResponse.get) + + dequeue.bits.maskForFilter := FillInterleaved(4, state.maskNotMaskedElement) | pipeEnqueue.mask + + when(enqueue.fire ^ dequeue.fire) { + stageValidReg := enqueue.fire + } +} diff --git a/v/src/laneStage/LaneStage2.scala b/v/src/laneStage/LaneStage2.scala new file mode 100644 index 000000000..e42120671 --- /dev/null +++ b/v/src/laneStage/LaneStage2.scala @@ -0,0 +1,70 @@ +package v + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.decode.DecodeBundle + +class LaneStage2Enqueue(parameter: LaneParameter, isLastSlot: Boolean) extends Bundle { + val src: Vec[UInt] = Vec(3, UInt(parameter.datapathWidth.W)) + val groupCounter: UInt = UInt(parameter.groupNumberBits.W) + val maskForFilter: UInt = UInt((parameter.datapathWidth / 8).W) + val mask: UInt = UInt((parameter.datapathWidth / 8).W) + val sSendResponse: Option[Bool] = Option.when(isLastSlot)(Bool()) +} + +class LaneStage2Dequeue(parameter: LaneParameter, isLastSlot: Boolean) extends Bundle { + val groupCounter: UInt = UInt(parameter.groupNumberBits.W) + val mask: UInt = UInt((parameter.datapathWidth / 8).W) + val sSendResponse: Option[Bool] = Option.when(isLastSlot)(Bool()) + val pipeData: Option[UInt] = Option.when(isLastSlot)(UInt(parameter.datapathWidth.W)) +} + +// s2 执行 +class LaneStage2(parameter: LaneParameter, isLastSlot: Boolean) extends + LaneStage(true)( + new LaneStage2Enqueue(parameter, isLastSlot), + new LaneStage2Dequeue(parameter, isLastSlot) + ){ + val state: LaneState = IO(Input(new LaneState(parameter))) + + val decodeResult: DecodeBundle = state.decodeResult + + val executionQueue: Queue[LaneExecuteStage] = + Module(new Queue(new LaneExecuteStage(parameter)(isLastSlot), parameter.executionQueueSize)) + + // pipe from stage 0 + val sSendResponseInStage2 = Option.when(isLastSlot)(RegEnable(enqueue.bits.sSendResponse.get, true.B, enqueue.fire)) + // ffo success in current data group? + val ffoSuccess: Option[Bool] = Option.when(isLastSlot)(RegInit(false.B)) + + val ffoCompleteWrite: UInt = Mux(state.maskType, (~enqueue.bits.src(0)).asUInt & enqueue.bits.src(2), 0.U) + // executionQueue enqueue + executionQueue.io.enq.bits.pipeData.foreach { data => + data := Mux( + // pipe source1 for gather, pipe ~v0 & vd for ffo + decodeResult(Decoder.gather) || decodeResult(Decoder.ffo), + Mux(decodeResult(Decoder.gather), enqueue.bits.src(0), ffoCompleteWrite), + enqueue.bits.src(1) + ) + } + executionQueue.io.enq.bits.groupCounter := enqueue.bits.groupCounter + executionQueue.io.enq.bits.mask := Mux1H( + state.vSew1H, + Seq( + enqueue.bits.maskForFilter, + FillInterleaved(2, enqueue.bits.maskForFilter(1, 0)), + // todo: handle first masked + FillInterleaved(4, enqueue.bits.maskForFilter(0)) + ) + ) + executionQueue.io.enq.valid := enqueue.valid + enqueue.ready := executionQueue.io.enq.ready + dequeue.valid := executionQueue.io.deq.valid + executionQueue.io.deq.ready := dequeue.ready + + dequeue.bits.pipeData.foreach(_ := executionQueue.io.deq.bits.pipeData.get) + dequeue.bits.groupCounter := executionQueue.io.deq.bits.groupCounter + dequeue.bits.mask := executionQueue.io.deq.bits.mask + dequeue.bits.sSendResponse.foreach(_ := sSendResponseInStage2.get) + stageValid := executionQueue.io.deq.valid +} diff --git a/v/src/laneStage/LaneStage3.scala b/v/src/laneStage/LaneStage3.scala new file mode 100644 index 000000000..7689ca873 --- /dev/null +++ b/v/src/laneStage/LaneStage3.scala @@ -0,0 +1,170 @@ +package v + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.decode.DecodeBundle + +class LaneStage3Enqueue(parameter: LaneParameter) extends Bundle { + val groupCounter: UInt = UInt(parameter.groupNumberBits.W) + val data: UInt = UInt(parameter.datapathWidth.W) + val pipeData: UInt = UInt(parameter.datapathWidth.W) + val mask: UInt = UInt((parameter.datapathWidth/8).W) + val ffoIndex: UInt = UInt(log2Ceil(parameter.vLen / 8).W) + val crossWriteData: Vec[UInt] = Vec(2, UInt(parameter.datapathWidth.W)) + val sSendResponse: Bool = Bool() + val ffoSuccess: Bool = Bool() +} + +class LaneStage3(parameter: LaneParameter, isLastSlot: Boolean) extends Module { + val enqueue: DecoupledIO[LaneStage3Enqueue] = IO(Flipped(Decoupled(new LaneStage3Enqueue(parameter)))) + val vrfWriteBundle: VRFWriteRequest = new VRFWriteRequest( + parameter.vrfParam.regNumBits, + parameter.vrfOffsetBits, + parameter.instructionIndexBits, + parameter.datapathWidth + ) + val vrfWriteRequest: DecoupledIO[VRFWriteRequest] = IO(Decoupled(vrfWriteBundle)) + + val state: LaneState = IO(Input(new LaneState(parameter))) + val pipeEnqueue: Option[LaneStage3Enqueue] = Option.when(isLastSlot)(RegInit(0.U.asTypeOf(enqueue.bits))) + /** response to [[V.lsu]] or mask unit in [[V]] */ + val laneResponse: Option[ValidIO[LaneResponse]] = Option.when(isLastSlot)(IO(Valid(new LaneResponse(parameter)))) + val stageValid: Bool = IO(Output(Bool())) + /** feedback from [[V]] to [[Lane]] for [[laneResponse]] */ + val laneResponseFeedback: Option[ValidIO[LaneResponseFeedback]] = + Option.when(isLastSlot)(IO(Flipped(Valid(new LaneResponseFeedback(parameter))))) + val crossWritePort: Option[DecoupledIO[WriteBusData]] = + Option.when(isLastSlot)(IO(Decoupled(new WriteBusData(parameter)))) + + val stageValidReg: Option[Bool] = Option.when(isLastSlot) (RegInit(false.B)) + + /** schedule cross lane write LSB */ + val sCrossWriteLSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) + + /** schedule cross lane write MSB */ + val sCrossWriteMSB: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) + + // state for response to scheduler + /** schedule send [[LaneResponse]] to scheduler */ + val sSendResponse: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) + + /** wait scheduler send [[LaneResponseFeedback]] */ + val wResponseFeedback: Option[Bool] = Option.when(isLastSlot)(RegInit(true.B)) + + // update register + when(enqueue.fire) { + pipeEnqueue.foreach(_ := enqueue.bits) + (sCrossWriteLSB ++ sCrossWriteMSB).foreach(_ := !state.decodeResult(Decoder.crossWrite)) + (sSendResponse ++ wResponseFeedback).foreach( + _ := state.decodeResult(Decoder.scheduler) || enqueue.bits.sSendResponse + ) + } + + val vrfWriteQueue: Queue[VRFWriteRequest] = + Module(new Queue(vrfWriteBundle, entries = 1, pipe = false, flow = false)) + + /** Write queue ready or not need to write. */ + val vrfWriteReady: Bool = vrfWriteQueue.io.enq.ready || state.decodeResult(Decoder.sWrite) + + if (isLastSlot) { + // VRF cross write + /** data for enqueuing [[writeBusPort]] + * [[crossLaneWrite.valid]] indicate there is a slot try to enqueue [[writeBusPort]] + */ + val crossLaneWrite: DecoupledIO[WriteBusData] = Wire(Decoupled(new WriteBusData(parameter))) + crossWritePort.get <> crossLaneWrite + + /** execute in ALU, try to send cross lane write LSB data to ring */ + val tryCrossWriteSendLSB = stageValidReg.get && !sCrossWriteLSB.get + + /** execute in ALU, try to send cross lane write MSB data to ring */ + val tryCrossWriteSendMSB = stageValidReg.get && !sCrossWriteMSB.get + crossLaneWrite.bits.sinkIndex := state.laneIndex(parameter.laneNumberBits - 2, 0) ## (!tryCrossWriteSendLSB) + crossLaneWrite.bits.sourceIndex := state.laneIndex + crossLaneWrite.bits.isTail := state.laneIndex(parameter.laneNumberBits - 1) + crossLaneWrite.bits.instructionIndex := state.instructionIndex + crossLaneWrite.bits.counter := pipeEnqueue.get.groupCounter + crossLaneWrite.bits.data := + Mux(tryCrossWriteSendLSB, pipeEnqueue.get.crossWriteData.head, pipeEnqueue.get.crossWriteData.last) + crossLaneWrite.bits.mask := Mux(tryCrossWriteSendLSB, pipeEnqueue.get.mask(1, 0), pipeEnqueue.get.mask(3, 2)) + crossLaneWrite.valid := tryCrossWriteSendLSB || tryCrossWriteSendMSB + + when(crossLaneWrite.fire) { + sCrossWriteLSB.foreach(_ := true.B) + when(sCrossWriteLSB.get) { + sCrossWriteMSB.foreach(_ := true.B) + } + } + // scheduler synchronization + val schedulerFinish: Bool = (sSendResponse ++ wResponseFeedback).reduce(_ && _) + + val dataSelect: Option[UInt] = Option.when(isLastSlot) { + Mux(state.decodeResult(Decoder.nr) || state.ffoByOtherLanes || state.decodeResult(Decoder.dontNeedExecuteInLane), + pipeEnqueue.get.pipeData, + pipeEnqueue.get.data + ) + } + // mask request + laneResponse.head.valid := stageValidReg.get && !sSendResponse.get + laneResponse.head.bits.data := Mux(state.decodeResult(Decoder.ffo), pipeEnqueue.get.ffoIndex, dataSelect.get) + laneResponse.head.bits.toLSU := state.loadStore + laneResponse.head.bits.instructionIndex := state.instructionIndex + laneResponse.head.bits.ffoSuccess := pipeEnqueue.get.ffoSuccess + + sSendResponse.foreach(state => when(laneResponse.head.valid) { + state := true.B + }) + wResponseFeedback.foreach(state => when(laneResponseFeedback.head.valid) { + state := true.B + }) + + // enqueue write for last slot + vrfWriteQueue.io.enq.valid := stageValidReg.get && schedulerFinish && !state.decodeResult(Decoder.sWrite) + + // UInt(5.W) + UInt(3.W), use `+` here + vrfWriteQueue.io.enq.bits.vd := state.vd + pipeEnqueue.get.groupCounter( + parameter.groupNumberBits - 1, + parameter.vrfOffsetBits + ) + + vrfWriteQueue.io.enq.bits.offset := pipeEnqueue.get.groupCounter + vrfWriteQueue.io.enq.bits.data := dataSelect.get + vrfWriteQueue.io.enq.bits.last := DontCare + vrfWriteQueue.io.enq.bits.instructionIndex := state.instructionIndex + vrfWriteQueue.io.enq.bits.mask := pipeEnqueue.get.mask + + // Handshake + /** Cross-lane writing is over */ + val CrossLaneWriteOver: Bool = (sCrossWriteLSB ++ sCrossWriteMSB).reduce(_ && _) + + enqueue.ready := !stageValidReg.get || (CrossLaneWriteOver && schedulerFinish && vrfWriteReady) + val dequeueFire = stageValidReg.get && CrossLaneWriteOver && schedulerFinish && vrfWriteReady + stageValidReg.foreach{data => + when(dequeueFire ^ enqueue.fire) { + data := enqueue.fire + } + } + stageValid := stageValidReg.get || vrfWriteQueue.io.deq.valid + } else { + // Normal will be one level less + vrfWriteQueue.io.enq.valid := enqueue.valid + + // UInt(5.W) + UInt(3.W), use `+` here + vrfWriteQueue.io.enq.bits.vd := state.vd + enqueue.bits.groupCounter( + parameter.groupNumberBits - 1, + parameter.vrfOffsetBits + ) + + vrfWriteQueue.io.enq.bits.offset := enqueue.bits.groupCounter + + vrfWriteQueue.io.enq.bits.data := enqueue.bits.data + vrfWriteQueue.io.enq.bits.last := DontCare + vrfWriteQueue.io.enq.bits.instructionIndex := state.instructionIndex + vrfWriteQueue.io.enq.bits.mask := enqueue.bits.mask + + // Handshake + enqueue.ready := vrfWriteQueue.io.enq.ready + stageValid := vrfWriteQueue.io.deq.valid + } + vrfWriteRequest <> vrfWriteQueue.io.deq +} \ No newline at end of file From 6926d3ac61f5e3ba1130a3761639c8fba8a3f02e Mon Sep 17 00:00:00 2001 From: Yanqi Yang Date: Thu, 3 Aug 2023 15:16:49 +0800 Subject: [PATCH 2/9] fix v1024l8b2fp-test --- build.sc | 2 +- configs/v1024l8b2fp-test.json | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/build.sc b/build.sc index 85cb28e71..d45e2ee8f 100644 --- a/build.sc +++ b/build.sc @@ -231,7 +231,7 @@ def emulatorTarget: Seq[String] = os.walk(os.pwd / "configs") .filter(cfg => { var filename = cfg.baseName // TODO: remove fp filter after fp is supported - filename.contains("test") && !filename.contains("fp") + filename.contains("test") }) .map(_.baseName) diff --git a/configs/v1024l8b2fp-test.json b/configs/v1024l8b2fp-test.json index 261ba897b..49436961d 100644 --- a/configs/v1024l8b2fp-test.json +++ b/configs/v1024l8b2fp-test.json @@ -107,6 +107,17 @@ }, [0, 1, 2, 3] ] + ], + "floatModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32 + }, + "generator": "v.LaneFloat" + }, + [0, 1, 2, 3] + ] ] } }, From 7fb334e1e49550cadb9cbd6aac1ac509f4a62dc6 Mon Sep 17 00:00:00 2001 From: Yanqi Yang Date: Thu, 3 Aug 2023 16:13:18 +0800 Subject: [PATCH 3/9] add fp cases to tests/configs --- tests/configs/vfadd.vf-codegen.json | 8 ++++++++ tests/configs/vfadd.vv-codegen.json | 8 ++++++++ tests/configs/vfclass.v-codegen.json | 8 ++++++++ tests/configs/vfcvt.f.x.v-codegen.json | 8 ++++++++ tests/configs/vfcvt.f.xu.v-codegen.json | 8 ++++++++ tests/configs/vfcvt.rtz.x.f.v-codegen.json | 8 ++++++++ tests/configs/vfcvt.rtz.xu.f.v-codegen.json | 8 ++++++++ tests/configs/vfcvt.x.f.v-codegen.json | 8 ++++++++ tests/configs/vfcvt.xu.f.v-codegen.json | 8 ++++++++ tests/configs/vfdiv.vf-codegen.json | 8 ++++++++ tests/configs/vfdiv.vv-codegen.json | 8 ++++++++ tests/configs/vfmacc.vf-codegen.json | 8 ++++++++ tests/configs/vfmacc.vv-codegen.json | 8 ++++++++ tests/configs/vfmadd.vf-codegen.json | 8 ++++++++ tests/configs/vfmadd.vv-codegen.json | 8 ++++++++ tests/configs/vfmax.vf-codegen.json | 8 ++++++++ tests/configs/vfmax.vv-codegen.json | 8 ++++++++ tests/configs/vfmerge.vfm-codegen.json | 8 ++++++++ tests/configs/vfmin.vf-codegen.json | 8 ++++++++ tests/configs/vfmin.vv-codegen.json | 8 ++++++++ tests/configs/vfmsac.vf-codegen.json | 8 ++++++++ tests/configs/vfmsac.vv-codegen.json | 8 ++++++++ tests/configs/vfmsub.vf-codegen.json | 8 ++++++++ tests/configs/vfmsub.vv-codegen.json | 8 ++++++++ tests/configs/vfmul.vf-codegen.json | 8 ++++++++ tests/configs/vfmul.vv-codegen.json | 8 ++++++++ tests/configs/vfmv.f.s-codegen.json | 8 ++++++++ tests/configs/vfmv.s.f-codegen.json | 8 ++++++++ tests/configs/vfmv.v.f-codegen.json | 8 ++++++++ tests/configs/vfnmacc.vf-codegen.json | 8 ++++++++ tests/configs/vfnmacc.vv-codegen.json | 8 ++++++++ tests/configs/vfnmadd.vf-codegen.json | 8 ++++++++ tests/configs/vfnmadd.vv-codegen.json | 8 ++++++++ tests/configs/vfnmsac.vf-codegen.json | 8 ++++++++ tests/configs/vfnmsac.vv-codegen.json | 8 ++++++++ tests/configs/vfnmsub.vf-codegen.json | 8 ++++++++ tests/configs/vfnmsub.vv-codegen.json | 8 ++++++++ tests/configs/vfrdiv.vf-codegen.json | 8 ++++++++ tests/configs/vfrec7.v-codegen.json | 8 ++++++++ tests/configs/vfrsqrt7.v-codegen.json | 8 ++++++++ tests/configs/vfrsub.vf-codegen.json | 8 ++++++++ tests/configs/vfsgnj.vf-codegen.json | 8 ++++++++ tests/configs/vfsgnj.vv-codegen.json | 8 ++++++++ tests/configs/vfsgnjn.vf-codegen.json | 8 ++++++++ tests/configs/vfsgnjn.vv-codegen.json | 8 ++++++++ tests/configs/vfsgnjx.vf-codegen.json | 8 ++++++++ tests/configs/vfsgnjx.vv-codegen.json | 8 ++++++++ tests/configs/vfsqrt.v-codegen.json | 8 ++++++++ tests/configs/vfsub.vf-codegen.json | 8 ++++++++ tests/configs/vfsub.vv-codegen.json | 8 ++++++++ tests/configs/vmfeq.vf-codegen.json | 8 ++++++++ tests/configs/vmfeq.vv-codegen.json | 8 ++++++++ tests/configs/vmfge.vf-codegen.json | 8 ++++++++ tests/configs/vmfgt.vf-codegen.json | 8 ++++++++ tests/configs/vmflt.vf-codegen.json | 8 ++++++++ tests/configs/vmflt.vv-codegen.json | 8 ++++++++ tests/configs/vmfne.vf-codegen.json | 8 ++++++++ tests/configs/vmfne.vv-codegen.json | 8 ++++++++ 58 files changed, 464 insertions(+) create mode 100644 tests/configs/vfadd.vf-codegen.json create mode 100644 tests/configs/vfadd.vv-codegen.json create mode 100644 tests/configs/vfclass.v-codegen.json create mode 100644 tests/configs/vfcvt.f.x.v-codegen.json create mode 100644 tests/configs/vfcvt.f.xu.v-codegen.json create mode 100644 tests/configs/vfcvt.rtz.x.f.v-codegen.json create mode 100644 tests/configs/vfcvt.rtz.xu.f.v-codegen.json create mode 100644 tests/configs/vfcvt.x.f.v-codegen.json create mode 100644 tests/configs/vfcvt.xu.f.v-codegen.json create mode 100644 tests/configs/vfdiv.vf-codegen.json create mode 100644 tests/configs/vfdiv.vv-codegen.json create mode 100644 tests/configs/vfmacc.vf-codegen.json create mode 100644 tests/configs/vfmacc.vv-codegen.json create mode 100644 tests/configs/vfmadd.vf-codegen.json create mode 100644 tests/configs/vfmadd.vv-codegen.json create mode 100644 tests/configs/vfmax.vf-codegen.json create mode 100644 tests/configs/vfmax.vv-codegen.json create mode 100644 tests/configs/vfmerge.vfm-codegen.json create mode 100644 tests/configs/vfmin.vf-codegen.json create mode 100644 tests/configs/vfmin.vv-codegen.json create mode 100644 tests/configs/vfmsac.vf-codegen.json create mode 100644 tests/configs/vfmsac.vv-codegen.json create mode 100644 tests/configs/vfmsub.vf-codegen.json create mode 100644 tests/configs/vfmsub.vv-codegen.json create mode 100644 tests/configs/vfmul.vf-codegen.json create mode 100644 tests/configs/vfmul.vv-codegen.json create mode 100644 tests/configs/vfmv.f.s-codegen.json create mode 100644 tests/configs/vfmv.s.f-codegen.json create mode 100644 tests/configs/vfmv.v.f-codegen.json create mode 100644 tests/configs/vfnmacc.vf-codegen.json create mode 100644 tests/configs/vfnmacc.vv-codegen.json create mode 100644 tests/configs/vfnmadd.vf-codegen.json create mode 100644 tests/configs/vfnmadd.vv-codegen.json create mode 100644 tests/configs/vfnmsac.vf-codegen.json create mode 100644 tests/configs/vfnmsac.vv-codegen.json create mode 100644 tests/configs/vfnmsub.vf-codegen.json create mode 100644 tests/configs/vfnmsub.vv-codegen.json create mode 100644 tests/configs/vfrdiv.vf-codegen.json create mode 100644 tests/configs/vfrec7.v-codegen.json create mode 100644 tests/configs/vfrsqrt7.v-codegen.json create mode 100644 tests/configs/vfrsub.vf-codegen.json create mode 100644 tests/configs/vfsgnj.vf-codegen.json create mode 100644 tests/configs/vfsgnj.vv-codegen.json create mode 100644 tests/configs/vfsgnjn.vf-codegen.json create mode 100644 tests/configs/vfsgnjn.vv-codegen.json create mode 100644 tests/configs/vfsgnjx.vf-codegen.json create mode 100644 tests/configs/vfsgnjx.vv-codegen.json create mode 100644 tests/configs/vfsqrt.v-codegen.json create mode 100644 tests/configs/vfsub.vf-codegen.json create mode 100644 tests/configs/vfsub.vv-codegen.json create mode 100644 tests/configs/vmfeq.vf-codegen.json create mode 100644 tests/configs/vmfeq.vv-codegen.json create mode 100644 tests/configs/vmfge.vf-codegen.json create mode 100644 tests/configs/vmfgt.vf-codegen.json create mode 100644 tests/configs/vmflt.vf-codegen.json create mode 100644 tests/configs/vmflt.vv-codegen.json create mode 100644 tests/configs/vmfne.vf-codegen.json create mode 100644 tests/configs/vmfne.vv-codegen.json diff --git a/tests/configs/vfadd.vf-codegen.json b/tests/configs/vfadd.vf-codegen.json new file mode 100644 index 000000000..38d14fea9 --- /dev/null +++ b/tests/configs/vfadd.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfadd.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfadd.vv-codegen.json b/tests/configs/vfadd.vv-codegen.json new file mode 100644 index 000000000..06f540a96 --- /dev/null +++ b/tests/configs/vfadd.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfadd.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfclass.v-codegen.json b/tests/configs/vfclass.v-codegen.json new file mode 100644 index 000000000..147781135 --- /dev/null +++ b/tests/configs/vfclass.v-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfclass.v", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfcvt.f.x.v-codegen.json b/tests/configs/vfcvt.f.x.v-codegen.json new file mode 100644 index 000000000..f60c34983 --- /dev/null +++ b/tests/configs/vfcvt.f.x.v-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfcvt.f.x.v", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfcvt.f.xu.v-codegen.json b/tests/configs/vfcvt.f.xu.v-codegen.json new file mode 100644 index 000000000..126fccb75 --- /dev/null +++ b/tests/configs/vfcvt.f.xu.v-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfcvt.f.xu.v", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfcvt.rtz.x.f.v-codegen.json b/tests/configs/vfcvt.rtz.x.f.v-codegen.json new file mode 100644 index 000000000..3fdbe87e5 --- /dev/null +++ b/tests/configs/vfcvt.rtz.x.f.v-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfcvt.rtz.x.f.v", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfcvt.rtz.xu.f.v-codegen.json b/tests/configs/vfcvt.rtz.xu.f.v-codegen.json new file mode 100644 index 000000000..df5ee6afc --- /dev/null +++ b/tests/configs/vfcvt.rtz.xu.f.v-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfcvt.rtz.xu.f.v", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfcvt.x.f.v-codegen.json b/tests/configs/vfcvt.x.f.v-codegen.json new file mode 100644 index 000000000..c052c7c9d --- /dev/null +++ b/tests/configs/vfcvt.x.f.v-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfcvt.x.f.v", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfcvt.xu.f.v-codegen.json b/tests/configs/vfcvt.xu.f.v-codegen.json new file mode 100644 index 000000000..4d502f2e9 --- /dev/null +++ b/tests/configs/vfcvt.xu.f.v-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfcvt.xu.f.v", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfdiv.vf-codegen.json b/tests/configs/vfdiv.vf-codegen.json new file mode 100644 index 000000000..47a70d618 --- /dev/null +++ b/tests/configs/vfdiv.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfdiv.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfdiv.vv-codegen.json b/tests/configs/vfdiv.vv-codegen.json new file mode 100644 index 000000000..c35fccccb --- /dev/null +++ b/tests/configs/vfdiv.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfdiv.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmacc.vf-codegen.json b/tests/configs/vfmacc.vf-codegen.json new file mode 100644 index 000000000..5da542272 --- /dev/null +++ b/tests/configs/vfmacc.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmacc.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmacc.vv-codegen.json b/tests/configs/vfmacc.vv-codegen.json new file mode 100644 index 000000000..b42884ff2 --- /dev/null +++ b/tests/configs/vfmacc.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmacc.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmadd.vf-codegen.json b/tests/configs/vfmadd.vf-codegen.json new file mode 100644 index 000000000..ad6caa5fc --- /dev/null +++ b/tests/configs/vfmadd.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmadd.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmadd.vv-codegen.json b/tests/configs/vfmadd.vv-codegen.json new file mode 100644 index 000000000..1af160c6e --- /dev/null +++ b/tests/configs/vfmadd.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmadd.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmax.vf-codegen.json b/tests/configs/vfmax.vf-codegen.json new file mode 100644 index 000000000..3c8056d84 --- /dev/null +++ b/tests/configs/vfmax.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmax.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmax.vv-codegen.json b/tests/configs/vfmax.vv-codegen.json new file mode 100644 index 000000000..55ed8bceb --- /dev/null +++ b/tests/configs/vfmax.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmax.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmerge.vfm-codegen.json b/tests/configs/vfmerge.vfm-codegen.json new file mode 100644 index 000000000..77d3b602e --- /dev/null +++ b/tests/configs/vfmerge.vfm-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmerge.vfm", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmin.vf-codegen.json b/tests/configs/vfmin.vf-codegen.json new file mode 100644 index 000000000..86b07f1c6 --- /dev/null +++ b/tests/configs/vfmin.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmin.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmin.vv-codegen.json b/tests/configs/vfmin.vv-codegen.json new file mode 100644 index 000000000..883c87a1c --- /dev/null +++ b/tests/configs/vfmin.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmin.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmsac.vf-codegen.json b/tests/configs/vfmsac.vf-codegen.json new file mode 100644 index 000000000..b7191ab62 --- /dev/null +++ b/tests/configs/vfmsac.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmsac.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmsac.vv-codegen.json b/tests/configs/vfmsac.vv-codegen.json new file mode 100644 index 000000000..b6c916083 --- /dev/null +++ b/tests/configs/vfmsac.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmsac.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmsub.vf-codegen.json b/tests/configs/vfmsub.vf-codegen.json new file mode 100644 index 000000000..2a50def2f --- /dev/null +++ b/tests/configs/vfmsub.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmsub.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmsub.vv-codegen.json b/tests/configs/vfmsub.vv-codegen.json new file mode 100644 index 000000000..fb8730040 --- /dev/null +++ b/tests/configs/vfmsub.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmsub.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmul.vf-codegen.json b/tests/configs/vfmul.vf-codegen.json new file mode 100644 index 000000000..1e17ea5e9 --- /dev/null +++ b/tests/configs/vfmul.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmul.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmul.vv-codegen.json b/tests/configs/vfmul.vv-codegen.json new file mode 100644 index 000000000..49c54c63f --- /dev/null +++ b/tests/configs/vfmul.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmul.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmv.f.s-codegen.json b/tests/configs/vfmv.f.s-codegen.json new file mode 100644 index 000000000..7d9737070 --- /dev/null +++ b/tests/configs/vfmv.f.s-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmv.f.s", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmv.s.f-codegen.json b/tests/configs/vfmv.s.f-codegen.json new file mode 100644 index 000000000..2120dfd70 --- /dev/null +++ b/tests/configs/vfmv.s.f-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmv.s.f", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfmv.v.f-codegen.json b/tests/configs/vfmv.v.f-codegen.json new file mode 100644 index 000000000..453320b88 --- /dev/null +++ b/tests/configs/vfmv.v.f-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfmv.v.f", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfnmacc.vf-codegen.json b/tests/configs/vfnmacc.vf-codegen.json new file mode 100644 index 000000000..594326e28 --- /dev/null +++ b/tests/configs/vfnmacc.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfnmacc.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfnmacc.vv-codegen.json b/tests/configs/vfnmacc.vv-codegen.json new file mode 100644 index 000000000..3fa924c05 --- /dev/null +++ b/tests/configs/vfnmacc.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfnmacc.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfnmadd.vf-codegen.json b/tests/configs/vfnmadd.vf-codegen.json new file mode 100644 index 000000000..b39405fb8 --- /dev/null +++ b/tests/configs/vfnmadd.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfnmadd.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfnmadd.vv-codegen.json b/tests/configs/vfnmadd.vv-codegen.json new file mode 100644 index 000000000..cc2603e6d --- /dev/null +++ b/tests/configs/vfnmadd.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfnmadd.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfnmsac.vf-codegen.json b/tests/configs/vfnmsac.vf-codegen.json new file mode 100644 index 000000000..2cc4da001 --- /dev/null +++ b/tests/configs/vfnmsac.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfnmsac.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfnmsac.vv-codegen.json b/tests/configs/vfnmsac.vv-codegen.json new file mode 100644 index 000000000..baf4aaa45 --- /dev/null +++ b/tests/configs/vfnmsac.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfnmsac.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfnmsub.vf-codegen.json b/tests/configs/vfnmsub.vf-codegen.json new file mode 100644 index 000000000..dc03b0c28 --- /dev/null +++ b/tests/configs/vfnmsub.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfnmsub.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfnmsub.vv-codegen.json b/tests/configs/vfnmsub.vv-codegen.json new file mode 100644 index 000000000..3bd91cbe1 --- /dev/null +++ b/tests/configs/vfnmsub.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfnmsub.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfrdiv.vf-codegen.json b/tests/configs/vfrdiv.vf-codegen.json new file mode 100644 index 000000000..64f251970 --- /dev/null +++ b/tests/configs/vfrdiv.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfrdiv.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfrec7.v-codegen.json b/tests/configs/vfrec7.v-codegen.json new file mode 100644 index 000000000..c93ee3325 --- /dev/null +++ b/tests/configs/vfrec7.v-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfrec7.v", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfrsqrt7.v-codegen.json b/tests/configs/vfrsqrt7.v-codegen.json new file mode 100644 index 000000000..bd170bb17 --- /dev/null +++ b/tests/configs/vfrsqrt7.v-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfrsqrt7.v", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfrsub.vf-codegen.json b/tests/configs/vfrsub.vf-codegen.json new file mode 100644 index 000000000..0ee9860b6 --- /dev/null +++ b/tests/configs/vfrsub.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfrsub.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfsgnj.vf-codegen.json b/tests/configs/vfsgnj.vf-codegen.json new file mode 100644 index 000000000..9461742c2 --- /dev/null +++ b/tests/configs/vfsgnj.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfsgnj.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfsgnj.vv-codegen.json b/tests/configs/vfsgnj.vv-codegen.json new file mode 100644 index 000000000..e7fac0e3c --- /dev/null +++ b/tests/configs/vfsgnj.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfsgnj.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfsgnjn.vf-codegen.json b/tests/configs/vfsgnjn.vf-codegen.json new file mode 100644 index 000000000..b8c5736b5 --- /dev/null +++ b/tests/configs/vfsgnjn.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfsgnjn.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfsgnjn.vv-codegen.json b/tests/configs/vfsgnjn.vv-codegen.json new file mode 100644 index 000000000..fa94e6885 --- /dev/null +++ b/tests/configs/vfsgnjn.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfsgnjn.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfsgnjx.vf-codegen.json b/tests/configs/vfsgnjx.vf-codegen.json new file mode 100644 index 000000000..137e1d13e --- /dev/null +++ b/tests/configs/vfsgnjx.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfsgnjx.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfsgnjx.vv-codegen.json b/tests/configs/vfsgnjx.vv-codegen.json new file mode 100644 index 000000000..410edc413 --- /dev/null +++ b/tests/configs/vfsgnjx.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfsgnjx.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfsqrt.v-codegen.json b/tests/configs/vfsqrt.v-codegen.json new file mode 100644 index 000000000..911322086 --- /dev/null +++ b/tests/configs/vfsqrt.v-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfsqrt.v", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfsub.vf-codegen.json b/tests/configs/vfsub.vf-codegen.json new file mode 100644 index 000000000..ff7f0f8d7 --- /dev/null +++ b/tests/configs/vfsub.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfsub.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vfsub.vv-codegen.json b/tests/configs/vfsub.vv-codegen.json new file mode 100644 index 000000000..a3c13a327 --- /dev/null +++ b/tests/configs/vfsub.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vfsub.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vmfeq.vf-codegen.json b/tests/configs/vmfeq.vf-codegen.json new file mode 100644 index 000000000..fd69e104e --- /dev/null +++ b/tests/configs/vmfeq.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vmfeq.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vmfeq.vv-codegen.json b/tests/configs/vmfeq.vv-codegen.json new file mode 100644 index 000000000..7aa9eea2c --- /dev/null +++ b/tests/configs/vmfeq.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vmfeq.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vmfge.vf-codegen.json b/tests/configs/vmfge.vf-codegen.json new file mode 100644 index 000000000..2e084d97f --- /dev/null +++ b/tests/configs/vmfge.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vmfge.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vmfgt.vf-codegen.json b/tests/configs/vmfgt.vf-codegen.json new file mode 100644 index 000000000..cb04afae9 --- /dev/null +++ b/tests/configs/vmfgt.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vmfgt.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vmflt.vf-codegen.json b/tests/configs/vmflt.vf-codegen.json new file mode 100644 index 000000000..5f3edbb2a --- /dev/null +++ b/tests/configs/vmflt.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vmflt.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vmflt.vv-codegen.json b/tests/configs/vmflt.vv-codegen.json new file mode 100644 index 000000000..35438f6db --- /dev/null +++ b/tests/configs/vmflt.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vmflt.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vmfne.vf-codegen.json b/tests/configs/vmfne.vf-codegen.json new file mode 100644 index 000000000..7c23797c8 --- /dev/null +++ b/tests/configs/vmfne.vf-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vmfne.vf", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file diff --git a/tests/configs/vmfne.vv-codegen.json b/tests/configs/vmfne.vv-codegen.json new file mode 100644 index 000000000..2aaea8ca2 --- /dev/null +++ b/tests/configs/vmfne.vv-codegen.json @@ -0,0 +1,8 @@ +{ + "name": "vmfne.vv", + "type": "codegen", + "vlen": 1024, + "xlen": 32, + "fp": true, + "compileOptions": [ "-mabi=ilp32f", "-march=rv32gcv", "-mno-relax", "-static", "-mcmodel=medany", "-fvisibility=hidden", "-nostdlib", "-fno-PIC" ] +} \ No newline at end of file From db26afd172df9e79085cd431192e62450d705b88 Mon Sep 17 00:00:00 2001 From: Yanqi Yang Date: Thu, 3 Aug 2023 16:26:06 +0800 Subject: [PATCH 4/9] add fp cases to CI passed --- .../passed/v1023l8b2fp-test/debug/passed.txt | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 .github/passed/v1023l8b2fp-test/debug/passed.txt diff --git a/.github/passed/v1023l8b2fp-test/debug/passed.txt b/.github/passed/v1023l8b2fp-test/debug/passed.txt new file mode 100644 index 000000000..1b276e18e --- /dev/null +++ b/.github/passed/v1023l8b2fp-test/debug/passed.txt @@ -0,0 +1,58 @@ +vfadd.vv +vfadd.vf +vfsub.vv +vfsub.vf +vfrsub.vf +vfmul.vv +vfmul.vf +vfdiv.vv +vfdiv.vf +vfrdiv.vf +vfmacc.vv +vfmacc.vf +vfnmacc.vv +vfnmacc.vf +vfmsac.vv +vfmsac.vf +vfnmsac.vv +vfnmsac.vf +vfmadd.vv +vfmadd.vf +vfnmadd.vv +vfnmadd.vf +vfmsub.vv +vfmsub.vf +vfnmsub.vv +vfnmsub.vf +vfsqrt.v +vfrsqrt7.v +vfrec7.v +vfmin.vv +vfmin.vf +vfmax.vv +vfmax.vf +vfsgnj.vv +vfsgnj.vf +vfsgnjn.vv +vfsgnjn.vf +vfsgnjx.vv +vfsgnjx.vf +vmfeq.vv +vmfeq.vf +vmfne.vv +vmfne.vf +vmflt.vv +vmflt.vf +vmfgt.vf +vmfge.vf +vfclass.v +vfmerge.vfm +vfmv.v.f +vfmv.f.s +vfmv.s.f +vfcvt.xu.f.v +vfcvt.x.f.v +vfcvt.rtz.xu.f.v +vfcvt.rtz.x.f.v +vfcvt.f.xu.v +vfcvt.f.x.v \ No newline at end of file From 2f60932f7644f30c5f1656157ee8cc7a60047c26 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 3 Aug 2023 22:32:46 +0800 Subject: [PATCH 5/9] [ci] fix invalid fp test config Signed-off-by: Avimitin --- .../passed/v1023l8b2fp-test/debug/passed.txt | 58 ------------------- .../passed/v1024l8b2fp-test/debug/passed.txt | 58 +++++++++++++++++++ build.sc | 7 +-- 3 files changed, 60 insertions(+), 63 deletions(-) delete mode 100644 .github/passed/v1023l8b2fp-test/debug/passed.txt create mode 100644 .github/passed/v1024l8b2fp-test/debug/passed.txt diff --git a/.github/passed/v1023l8b2fp-test/debug/passed.txt b/.github/passed/v1023l8b2fp-test/debug/passed.txt deleted file mode 100644 index 1b276e18e..000000000 --- a/.github/passed/v1023l8b2fp-test/debug/passed.txt +++ /dev/null @@ -1,58 +0,0 @@ -vfadd.vv -vfadd.vf -vfsub.vv -vfsub.vf -vfrsub.vf -vfmul.vv -vfmul.vf -vfdiv.vv -vfdiv.vf -vfrdiv.vf -vfmacc.vv -vfmacc.vf -vfnmacc.vv -vfnmacc.vf -vfmsac.vv -vfmsac.vf -vfnmsac.vv -vfnmsac.vf -vfmadd.vv -vfmadd.vf -vfnmadd.vv -vfnmadd.vf -vfmsub.vv -vfmsub.vf -vfnmsub.vv -vfnmsub.vf -vfsqrt.v -vfrsqrt7.v -vfrec7.v -vfmin.vv -vfmin.vf -vfmax.vv -vfmax.vf -vfsgnj.vv -vfsgnj.vf -vfsgnjn.vv -vfsgnjn.vf -vfsgnjx.vv -vfsgnjx.vf -vmfeq.vv -vmfeq.vf -vmfne.vv -vmfne.vf -vmflt.vv -vmflt.vf -vmfgt.vf -vmfge.vf -vfclass.v -vfmerge.vfm -vfmv.v.f -vfmv.f.s -vfmv.s.f -vfcvt.xu.f.v -vfcvt.x.f.v -vfcvt.rtz.xu.f.v -vfcvt.rtz.x.f.v -vfcvt.f.xu.v -vfcvt.f.x.v \ No newline at end of file diff --git a/.github/passed/v1024l8b2fp-test/debug/passed.txt b/.github/passed/v1024l8b2fp-test/debug/passed.txt new file mode 100644 index 000000000..7fe35bc08 --- /dev/null +++ b/.github/passed/v1024l8b2fp-test/debug/passed.txt @@ -0,0 +1,58 @@ +vfadd.vv-codegen +vfadd.vf-codegen +vfsub.vv-codegen +vfsub.vf-codegen +vfrsub.vf-codegen +vfmul.vv-codegen +vfmul.vf-codegen +vfdiv.vv-codegen +vfdiv.vf-codegen +vfrdiv.vf-codegen +vfmacc.vv-codegen +vfmacc.vf-codegen +vfnmacc.vv-codegen +vfnmacc.vf-codegen +vfmsac.vv-codegen +vfmsac.vf-codegen +vfnmsac.vv-codegen +vfnmsac.vf-codegen +vfmadd.vv-codegen +vfmadd.vf-codegen +vfnmadd.vv-codegen +vfnmadd.vf-codegen +vfmsub.vv-codegen +vfmsub.vf-codegen +vfnmsub.vv-codegen +vfnmsub.vf-codegen +vfsqrt.v-codegen +vfrsqrt7.v-codegen +vfrec7.v-codegen +vfmin.vv-codegen +vfmin.vf-codegen +vfmax.vv-codegen +vfmax.vf-codegen +vfsgnj.vv-codegen +vfsgnj.vf-codegen +vfsgnjn.vv-codegen +vfsgnjn.vf-codegen +vfsgnjx.vv-codegen +vfsgnjx.vf-codegen +vmfeq.vv-codegen +vmfeq.vf-codegen +vmfne.vv-codegen +vmfne.vf-codegen +vmflt.vv-codegen +vmflt.vf-codegen +vmfgt.vf-codegen +vmfge.vf-codegen +vfclass.v-codegen +vfmerge.vfm-codegen +vfmv.v.f-codegen +vfmv.f.s-codegen +vfmv.s.f-codegen +vfcvt.xu.f.v-codegen +vfcvt.x.f.v-codegen +vfcvt.rtz.xu.f.v-codegen +vfcvt.rtz.x.f.v-codegen +vfcvt.f.xu.v-codegen +vfcvt.f.x.v-codegen diff --git a/build.sc b/build.sc index d45e2ee8f..304227b26 100644 --- a/build.sc +++ b/build.sc @@ -228,11 +228,8 @@ class Release(config: String) extends Module { } def emulatorTarget: Seq[String] = os.walk(os.pwd / "configs") - .filter(cfg => { - var filename = cfg.baseName - // TODO: remove fp filter after fp is supported - filename.contains("test") - }) + .filter(_.ext == "json") + .filter(_.baseName.contains("test")) .map(_.baseName) object emulator extends mill.Cross[emulator](emulatorTarget: _*) From 0d256fe4d10688c922986718d0f096a4f6c378d5 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 3 Aug 2023 22:34:31 +0800 Subject: [PATCH 6/9] [ci] allow generate test case from multiple passed file Signed-off-by: Avimitin --- .github/passed/default.txt | 2 ++ .github/scripts/ci.sc | 44 ++++++++++++++++++++++++++++++-------- .github/workflows/pr.yml | 2 +- Makefile | 2 +- 4 files changed, 39 insertions(+), 11 deletions(-) create mode 100644 .github/passed/default.txt diff --git a/.github/passed/default.txt b/.github/passed/default.txt new file mode 100644 index 000000000..bb66ef6aa --- /dev/null +++ b/.github/passed/default.txt @@ -0,0 +1,2 @@ +v1024l8b2fp-test/debug/passed.txt +v1024l8b2-test/debug/passed.txt \ No newline at end of file diff --git a/.github/scripts/ci.sc b/.github/scripts/ci.sc index 80c91b404..056736557 100644 --- a/.github/scripts/ci.sc +++ b/.github/scripts/ci.sc @@ -2,11 +2,13 @@ // A valid passedFile path should be like: /path/to/v1024l8b2-test/debug/passed.txt. // // @param passedFile Path to the passed.txt file -def passed(passedFile: os.Path): Seq[String] = { - println(s"Generate tests from file: $passedFile") - val Seq(_, runType, verilatorType) = passedFile.segments.toSeq.reverse.slice(0, 3) - os.read.lines(passedFile) - .map(test => s"verilatorEmulator[$verilatorType,$test,$runType].run") +def genRunTask(passedFiles: Seq[os.Path]): Seq[String] = { + passedFiles.flatMap(file => { + println(s"Generate tests from file: $file") + val Seq(_, runType, verilatorType) = file.segments.toSeq.reverse.slice(0, 3) + os.read.lines(file) + .map(test => s"verilatorEmulator[$verilatorType,$test,$runType].run") + }) } // Resolve all the executable verilatorEmulator[$vtype,$ttype,$rtype].run object and execute them all. @@ -29,13 +31,36 @@ def writeJson(buckets: Seq[String], outputFile: os.Path) = os.write.over(outputFile, ujson.Obj("include" -> buckets.map(a => ujson.Obj(s"name" -> ujson.Str(a))))) -// Read the passed.txt file, split the content into list of String and packed them up using the `bucket` function with specified bucket size. +// Read the passed.txt file path from the given defaultPassed file, +// split the content of the passed.txt file into list of String and packed them up using the `bucket` function with specified bucket size. // Write the generated json into given outputFile path. @main -def passedJson(bucketSize: Int, passedFile: os.Path, outputFile: os.Path) = writeJson(buckets(passed(passedFile),bucketSize),outputFile) +def passedJson(bucketSize: Int, defaultPassed: os.Path, outputFile: os.Path) = + writeJson( + buckets( + genRunTask( + os.read.lines(defaultPassed).map(defaultPassed / os.up / os.RelPath(_)) + ), + bucketSize + ), + outputFile + ) @main -def unpassedJson(bucketSize: Int, root: os.Path, passedFile: os.Path, outputFile: os.Path) = writeJson(buckets((all(root).toSet -- passed(passedFile).toSet).toSeq,bucketSize),outputFile) +def unpassedJson( + bucketSize: Int, + root: os.Path, + defaultPassed: os.Path, + outputFile: os.Path +) = writeJson( + buckets( + (all(root).toSet -- genRunTask( + os.read.lines(defaultPassed).map(defaultPassed / os.up / os.RelPath(_)) + ).toSet).toSeq, + bucketSize + ), + outputFile +) @main def allJson(bucketSize: Int, root: os.Path, outputFile: os.Path) = writeJson(buckets(all(root),bucketSize),outputFile) @@ -67,13 +92,14 @@ def runTest(root: os.Path, jobs: String, loggingDir: Option[os.Path]) = { os.makeDir.all(logDir) os.makeDir.all(logDir / "fail") val totalJobs = jobs.split(";") + // TODO: Use sliding(n, n) and scala.concurrent to run multiple test in parallel val failed = totalJobs.zipWithIndex .foldLeft(IndexedSeq[String]())( (failed, elem) => { val (job, i) = elem val logPath = logDir / s"$job.log" println(s"[$i/${totalJobs.length}] Running test case $job") - val handle = os.proc("mill", "--no-server", "-j", "0", job).call( + val handle = os.proc("mill", "--no-server", job).call( cwd=root, check=false, stdout=logPath, diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 5bda59f39..66a837827 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -29,7 +29,7 @@ jobs: trusted-public-keys = cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY= minio.inner.fi.c-3.moe:gDg5SOIH65O0tTV89dUawME5BTmduWWaA7as/cqvevM= extra-substituters = https://${{secrets.CACHE_DOMAIN}}/nix - id: ci-tests - run: nix develop .#testcase -c make ci-passed-tests PASSEDFILE=.github/passed/v1024l8b2-test/debug/passed.txt + run: nix develop .#testcase -c make ci-passed-tests DEFAULT_PASSED=.github/passed/default.txt ci: name: "CI" diff --git a/Makefile b/Makefile index 0d3da44ce..c7ca98e08 100644 --- a/Makefile +++ b/Makefile @@ -40,7 +40,7 @@ ci-run: ci-passed-tests: echo -n matrix= >> $$GITHUB_OUTPUT - amm .github/scripts/ci.sc passedJson $(RUNNERS) $(PASSEDFILE) ./passed.json + amm .github/scripts/ci.sc passedJson $(RUNNERS) $(DEFAULT_PASSED) ./passed.json cat ./passed.json >> $$GITHUB_OUTPUT ci-unpassed-tests: From e2a08ff8e612cb4505fdfdd105148518c0955d87 Mon Sep 17 00:00:00 2001 From: Yanqi Yang Date: Fri, 4 Aug 2023 10:40:17 +0800 Subject: [PATCH 7/9] [rtl] fix decoder for vfsgnj, fix vfmin --- v/src/Decoder.scala | 2 +- v/src/LaneFloat.scala | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/v/src/Decoder.scala b/v/src/Decoder.scala index 93f69adfe..947246daf 100644 --- a/v/src/Decoder.scala +++ b/v/src/Decoder.scala @@ -226,9 +226,9 @@ object Decoder { "vfcvt.rtz.x.f.v" -> 14, "vfcvt.x.f.v" -> 10, "vfcvt.xu.f.v" -> 9, - "vfsgnj" -> 1, "vfsgnjn" -> 2, "vfsgnjx" -> 3, + "vfsgnj" -> 1, "vfclass" -> 4, "vfrsqrt7" -> 7, "vfrec7" -> 6, diff --git a/v/src/LaneFloat.scala b/v/src/LaneFloat.scala index bc57211d0..15d7eac45 100644 --- a/v/src/LaneFloat.scala +++ b/v/src/LaneFloat.scala @@ -195,8 +195,8 @@ class LaneFloat(val parameter: LaneFloatParam) extends VFUModule(parameter) with val compareflags = Wire(UInt(5.W)) assert(!unitSeleOH(2) || (uop === "b0001".U || uop === "b0000".U || uop === "b0010".U || uop === "b0011".U || uop === "b0100".U || uop === "b0101".U || uop === "b1000".U || uop === "b1100".U)) - compareResult := Mux(uop === "b1000".U , Mux(compareModule.io.lt, request.src(0), request.src(1)), - Mux(uop === "b1100".U, Mux(compareModule.io.gt, request.src(0), request.src(1)), + compareResult := Mux(uop === "b1000".U , Mux(compareModule.io.lt, request.src(1), request.src(0)), + Mux(uop === "b1100".U, Mux(compareModule.io.gt, request.src(1), request.src(0)), Mux(uop === "b0011".U, compareModule.io.lt || compareModule.io.eq, Mux(uop === "b0101".U, compareModule.io.gt || compareModule.io.eq, Mux(uop === "b0010".U, compareModule.io.lt, @@ -258,9 +258,9 @@ class LaneFloat(val parameter: LaneFloatParam) extends VFUModule(parameter) with * }}} */ val sgnjresult = Wire(UInt(32.W)) - val sgnjSign = Mux(otherEn && uop === 1.U, request.src(0)(31), - Mux(otherEn && uop === 2.U, !request.src(0)(31), - Mux(otherEn && uop ===3.U, request.src(0)(31) ^ request.src(1)(31), false.B))) + val sgnjSign = Mux(otherEn && uop === "b0001".U, request.src(0)(31), + Mux(otherEn && uop === "b0010".U, !request.src(0)(31), + Mux(otherEn && uop ==="b0011".U, request.src(0)(31) ^ request.src(1)(31), false.B))) sgnjresult := Cat(sgnjSign, request.src(1)(30,0)) val in1classify = classifyRecFN(8, 24, recIn1) From 6ec34eba5a6ac59f201ccafc0d70ff9cd949650d Mon Sep 17 00:00:00 2001 From: Yanqi Yang Date: Fri, 4 Aug 2023 13:31:46 +0800 Subject: [PATCH 8/9] [rtl] fix fp divider logic --- v/src/LaneFloat.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/v/src/LaneFloat.scala b/v/src/LaneFloat.scala index 15d7eac45..0acad6bae 100644 --- a/v/src/LaneFloat.scala +++ b/v/src/LaneFloat.scala @@ -154,11 +154,12 @@ class LaneFloat(val parameter: LaneFloatParam) extends VFUModule(parameter) with * }}} */ val div = divEn && (uop === "b0001".U) + val rdiv = divEn && (uop === "b0010".U) val sqrt = divEn && (uop === "b1000".U) val divSqrt = Module(new DivSqrtRecFN_small(8, 24,0)) - val divIn0 = Mux(div, recIn0, recIn1) - val divIn1 = Mux(div, recIn1, recIn0) + val divIn0 = Mux(rdiv, recIn0, recIn1) + val divIn1 = Mux(rdiv, recIn1, recIn0) divSqrt.io.a := divIn0 divSqrt.io.b := divIn1 From a2b5f81e7b01c291f1d1f9bd8ca00f8160063a84 Mon Sep 17 00:00:00 2001 From: Yanqi Yang Date: Fri, 4 Aug 2023 13:37:47 +0800 Subject: [PATCH 9/9] [configs] update fp-trance and fp-release --- configs/v1024l8b2fp-release.json | 72 +++++++++-------------------- configs/v1024l8b2fp-test-trace.json | 23 ++++++--- 2 files changed, 40 insertions(+), 55 deletions(-) diff --git a/configs/v1024l8b2fp-release.json b/configs/v1024l8b2fp-release.json index 05fc70371..8120a47c2 100644 --- a/configs/v1024l8b2fp-release.json +++ b/configs/v1024l8b2fp-release.json @@ -19,12 +19,7 @@ }, "generator": "v.MaskedLogic" }, - [ - 0, - 1, - 2, - 3 - ] + [0, 1, 2, 3] ] ], "aluModuleParameters": [ @@ -35,9 +30,7 @@ }, "generator": "v.LaneAdder" }, - [ - 0 - ] + [0] ], [ { @@ -46,9 +39,7 @@ }, "generator": "v.LaneAdder" }, - [ - 1 - ] + [1] ], [ { @@ -57,9 +48,7 @@ }, "generator": "v.LaneAdder" }, - [ - 2 - ] + [2] ], [ { @@ -68,9 +57,7 @@ }, "generator": "v.LaneAdder" }, - [ - 3 - ] + [3] ] ], "shifterModuleParameters": [ @@ -81,12 +68,7 @@ }, "generator": "v.LaneShifter" }, - [ - 0, - 1, - 2, - 3 - ] + [0, 1, 2, 3] ] ], "mulModuleParameters": [ @@ -97,12 +79,7 @@ }, "generator": "v.LaneMul" }, - [ - 0, - 1, - 2, - 3 - ] + [0, 1, 2, 3] ] ], "divModuleParameters": [ @@ -113,12 +90,7 @@ }, "generator": "v.LaneDiv" }, - [ - 0, - 1, - 2, - 3 - ] + [0, 1, 2, 3] ] ], "otherModuleParameters": [ @@ -133,12 +105,18 @@ }, "generator": "v.OtherUnit" }, - [ - 0, - 1, - 2, - 3 - ] + [0, 1, 2, 3] + ] + ], + "floatModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32 + }, + "generator": "v.LaneFloat" + }, + [0, 1, 2, 3] ] ] } @@ -147,15 +125,11 @@ }, "mfcArgs": [ "-dedup", - "-O=release", - "--disable-all-randomization", + "-O=debug", "--split-verilog", - "--strip-debug-info", - "--preserve-values=none", - "--preserve-aggregate=all", + "--preserve-values=named", "--output-annotation-file=mfc.anno.json", - "--repl-seq-mem", - "--repl-seq-mem-file=repl-seq-mem.txt" + "--lowering-options=verifLabels" ], "testbench": false } diff --git a/configs/v1024l8b2fp-test-trace.json b/configs/v1024l8b2fp-test-trace.json index 383ed4ddc..99b8c60fe 100644 --- a/configs/v1024l8b2fp-test-trace.json +++ b/configs/v1024l8b2fp-test-trace.json @@ -107,18 +107,29 @@ }, [0, 1, 2, 3] ] + ], + "floatModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32 + }, + "generator": "v.LaneFloat" + }, + [0, 1, 2, 3] + ] ] } }, "generator": "v.V" }, "mfcArgs": [ - "-dedup", - "-O=debug", - "--split-verilog", - "--preserve-values=named", - "--output-annotation-file=mfc.anno.json", - "--lowering-options=verifLabels" + "-dedup", + "-O=debug", + "--split-verilog", + "--preserve-values=named", + "--output-annotation-file=mfc.anno.json", + "--lowering-options=verifLabels" ], "testbench": true, "trace": true