O3 Cpu Rename

Posted May 29, 2021

By Jaehyuk Lee 50 min read

Rename

It maintains the rename history of all instructions with destination registers, storing the arch register, the new physical register, and the old physical register. The information is required to enable undoing of mappings if squashing happens, or freeing up registers upon commit. Rename stage can be blocked when the ROB, IQ, or LSQ is going to be full. Rename also handles barriers and serializing instructions by stalling them in rename until the back-end drains. It blocks the stage until the ROB is empty, and there are no instructions in flight to the ROB.

Interface of rename stage

cpu/o3/rename_impl.hh

        
      
template <class Impl>
void
DefaultRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
   timeBuffer = tb_ptr;

   // Setup wire to read information from time buffer, from IEW stage.
   fromIEW = timeBuffer->getWire(-iewToRenameDelay);

   // Setup wire to read infromation from time buffer, from commit stage.
   fromCommit = timeBuffer->getWire(-commitToRenameDelay);

   // Setup wire to write information to previous stages.
   toDecode = timeBuffer->getWire(0);
}

template <class Impl>
void
DefaultRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
{
   renameQueue = rq_ptr;

   // Setup wire to write information to future stages.
   toIEW = renameQueue->getWire(0);
}

template <class Impl>
void
DefaultRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
{
   decodeQueue = dq_ptr;

   // Setup wire to get information from decode.
   fromDecode = decodeQueue->getWire(-decodeToRenameDelay);
}

Mainly, there are three interfaces connected to the rename stage. First of all, to deliver the information processed by the rename stage to the IEW stage, it has toIEW wire. Also, to read some information from two other stages, decode and commit, it sets up fromDecode and fromCommit wires.

Tick function of the rename stage

        
      
template <class Impl>
void
DefaultRename<Impl>::tick()
{
   wroteToTimeBuffer = false;

   blockThisCycle = false;

   bool status_change = false;

   toIEWIndex = 0;

   sortInsts();

   list<ThreadID>::iterator threads = activeThreads->begin();
   list<ThreadID>::iterator end = activeThreads->end();

   // Check stall and squash signals.
   while (threads != end) {
       ThreadID tid = *threads++;

       DPRINTF(Rename, "Processing [tid:%i]\n", tid);

       status_change = checkSignalsAndUpdate(tid) || status_change;

       rename(status_change, tid);
   }

   if (status_change) {
       updateStatus();
   }

   if (wroteToTimeBuffer) {
       DPRINTF(Activity, "Activity this cycle.\n");
       cpu->activityThisCycle();
   }

   threads = activeThreads->begin();

   while (threads != end) {
       ThreadID tid = *threads++;

       // If we committed this cycle then doneSeqNum will be > 0
       if (fromCommit->commitInfo[tid].doneSeqNum != 0 &&
           !fromCommit->commitInfo[tid].squash &&
           renameStatus[tid] != Squashing) {

           removeFromHistory(fromCommit->commitInfo[tid].doneSeqNum,
                                 tid);
       }
   }

   // @todo: make into updateProgress function
   for (ThreadID tid = 0; tid < numThreads; tid++) {
       instsInProgress[tid] -= fromIEW->iewInfo[tid].dispatched;
       loadsInProgress[tid] -= fromIEW->iewInfo[tid].dispatchedToLQ;
       storesInProgress[tid] -= fromIEW->iewInfo[tid].dispatchedToSQ;
       assert(loadsInProgress[tid] >= 0);
       assert(storesInProgress[tid] >= 0);
       assert(instsInProgress[tid] >=0);
   }

}

sortInsts

        
      
template <class Impl>
void
DefaultRename<Impl>::sortInsts()
{
   int insts_from_decode = fromDecode->size;
   for (int i = 0; i < insts_from_decode; ++i) {
       const DynInstPtr &inst = fromDecode->insts[i];
       insts[inst->threadNumber].push_back(inst);
#if TRACING_ON
       if (DTRACE(O3PipeView)) {
           inst->renameTick = curTick() - inst->fetchTick;
       }
#endif
   }
}

Because the register maintains all instructions regardless of origin of the instructions (initiated by which thread), it should sort instructions based on the thread that instantiated the instruction. For that purpose, each instruction maintains information representing which thread is the owner of that instruction.

checkSignalsAndUpdate

        
      
template <class Impl>
bool
DefaultRename<Impl>::checkSignalsAndUpdate(ThreadID tid)
{
   // Check if there's a squash signal, squash if there is
   // Check stall signals, block if necessary.
   // If status was blocked
   //     check if stall conditions have passed
   //         if so then go to unblocking
   // If status was Squashing
   //     check if squashing is not high.  Switch to running this cycle.
   // If status was serialize stall
   //     check if ROB is empty and no insts are in flight to the ROB

   readFreeEntries(tid);
   readStallSignals(tid);

   if (fromCommit->commitInfo[tid].squash) {
       DPRINTF(Rename, "[tid:%i] Squashing instructions due to squash from "
               "commit.\n", tid);

       squash(fromCommit->commitInfo[tid].doneSeqNum, tid);

       return true;
   }

   if (checkStall(tid)) {
       return block(tid);
   }

   if (renameStatus[tid] == Blocked) {
       DPRINTF(Rename, "[tid:%i] Done blocking, switching to unblocking.\n",
               tid);

       renameStatus[tid] = Unblocking;

       unblock(tid);

       return true;
   }

   if (renameStatus[tid] == Squashing) {
       // Switch status to running if rename isn't being told to block or
       // squash this cycle.
       if (resumeSerialize) {
           DPRINTF(Rename,
                   "[tid:%i] Done squashing, switching to serialize.\n", tid);

           renameStatus[tid] = SerializeStall;
           return true;
       } else if (resumeUnblocking) {
           DPRINTF(Rename,
                   "[tid:%i] Done squashing, switching to unblocking.\n",
                   tid);
           renameStatus[tid] = Unblocking;
           return true;
       } else {
           DPRINTF(Rename, "[tid:%i] Done squashing, switching to running.\n",
                   tid);
           renameStatus[tid] = Running;
           return false;
       }
   }

   if (renameStatus[tid] == SerializeStall) {
       // Stall ends once the ROB is free.
       DPRINTF(Rename, "[tid:%i] Done with serialize stall, switching to "
               "unblocking.\n", tid);

       DynInstPtr serial_inst = serializeInst[tid];

       renameStatus[tid] = Unblocking;

       unblock(tid);

       DPRINTF(Rename, "[tid:%i] Processing instruction [%lli] with "
               "PC %s.\n", tid, serial_inst->seqNum, serial_inst->pcState());

       // Put instruction into queue here.
       serial_inst->clearSerializeBefore();

       if (!skidBuffer[tid].empty()) {
           skidBuffer[tid].push_front(serial_inst);
       } else {
           insts[tid].push_front(serial_inst);
       }

       DPRINTF(Rename, "[tid:%i] Instruction must be processed by rename."
               " Adding to front of list.\n", tid);

       serializeInst[tid] = NULL;

       return true;
   }

   // If we've reached this point, we have not gotten any signals that
   // cause rename to change its status.  Rename remains the same as before.
   return false;
}

Note that most of the operation sequence of the checkSignalsAndUpdate is very similar to the checkSignalsAndUpdate of the decode stage. It checks the stall and squash signal and execute associated code. For the stall, it executes the block function. For the squash, it invokes the squash function. However, in detail there are two noticeable differences in the readFreeEntries and checkStall function.

        
      
template <class Impl>
void
DefaultRename<Impl>::readFreeEntries(ThreadID tid)
{
   if (fromIEW->iewInfo[tid].usedIQ)
       freeEntries[tid].iqEntries = fromIEW->iewInfo[tid].freeIQEntries;

   if (fromIEW->iewInfo[tid].usedLSQ) {
       freeEntries[tid].lqEntries = fromIEW->iewInfo[tid].freeLQEntries;
       freeEntries[tid].sqEntries = fromIEW->iewInfo[tid].freeSQEntries;
   }

   if (fromCommit->commitInfo[tid].usedROB) {
       freeEntries[tid].robEntries =
           fromCommit->commitInfo[tid].freeROBEntries;
       emptyROB[tid] = fromCommit->commitInfo[tid].emptyROB;
   }

   DPRINTF(Rename, "[tid:%i] Free IQ: %i, Free ROB: %i, "
                   "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i)\n",
           tid,
           freeEntries[tid].iqEntries,
           freeEntries[tid].robEntries,
           freeEntries[tid].lqEntries,
           freeEntries[tid].sqEntries,
           renameMap[tid]->numFreeEntries(),
           renameMap[tid]->numFreeIntEntries(),
           renameMap[tid]->numFreeFloatEntries(),
           renameMap[tid]->numFreeVecEntries(),
           renameMap[tid]->numFreePredEntries(),
           renameMap[tid]->numFreeCCEntries());

   DPRINTF(Rename, "[tid:%i] %i instructions not yet in ROB\n",
           tid, instsInProgress[tid]);
}

After the IEW stage executes and commit the instructions, it should reports the Rename stage that it has used the allocated resources and good to reassign them to other instructions for renaming. We will see how the freeEntries are retrieved to its resource pool and reassigned to other instructions.

        
      
template <class Impl>
bool
DefaultRename<Impl>::checkStall(ThreadID tid)
{
   bool ret_val = false;

   if (stalls[tid].iew) {
       DPRINTF(Rename,"[tid:%i] Stall from IEW stage detected.\n", tid);
       ret_val = true;
   } else if (calcFreeROBEntries(tid) <= 0) {
       DPRINTF(Rename,"[tid:%i] Stall: ROB has 0 free entries.\n", tid);
       ret_val = true;
   } else if (calcFreeIQEntries(tid) <= 0) {
       DPRINTF(Rename,"[tid:%i] Stall: IQ has 0 free entries.\n", tid);
       ret_val = true;
   } else if (calcFreeLQEntries(tid) <= 0 && calcFreeSQEntries(tid) <= 0) {
       DPRINTF(Rename,"[tid:%i] Stall: LSQ has 0 free entries.\n", tid);
       ret_val = true;
   } else if (renameMap[tid]->numFreeEntries() <= 0) {
       DPRINTF(Rename,"[tid:%i] Stall: RenameMap has 0 free entries.\n", tid);
       ret_val = true;
   } else if (renameStatus[tid] == SerializeStall &&
              (!emptyROB[tid] || instsInProgress[tid])) {
       DPRINTF(Rename,"[tid:%i] Stall: Serialize stall and ROB is not "
               "empty.\n",
               tid);
       ret_val = true;
   }

   return ret_val;
}

There could be various reasons of the stall. It might be because of the stall signal received from other stages or the internal issues of the rename stage due to lack of resources.

rename

        
      
template<class Impl>
void
DefaultRename<Impl>::rename(bool &status_change, ThreadID tid)
{
   // If status is Running or idle,
   //     call renameInsts()
   // If status is Unblocking,
   //     buffer any instructions coming from decode
   //     continue trying to empty skid buffer
   //     check if stall conditions have passed

   if (renameStatus[tid] == Blocked) {
       ++renameBlockCycles;
   } else if (renameStatus[tid] == Squashing) {
       ++renameSquashCycles;
   } else if (renameStatus[tid] == SerializeStall) {
       ++renameSerializeStallCycles;
       // If we are currently in SerializeStall and resumeSerialize
       // was set, then that means that we are resuming serializing
       // this cycle.  Tell the previous stages to block.
       if (resumeSerialize) {
           resumeSerialize = false;
           block(tid);
           toDecode->renameUnblock[tid] = false;
       }
   } else if (renameStatus[tid] == Unblocking) {
       if (resumeUnblocking) {
           block(tid);
           resumeUnblocking = false;
           toDecode->renameUnblock[tid] = false;
       }
   }

   if (renameStatus[tid] == Running ||
       renameStatus[tid] == Idle) {
       DPRINTF(Rename,
               "[tid:%i] "
               "Not blocked, so attempting to run stage.\n",
               tid);

       renameInsts(tid);
   } else if (renameStatus[tid] == Unblocking) {
       renameInsts(tid);

       if (validInsts()) {
           // Add the current inputs to the skid buffer so they can be
           // reprocessed when this stage unblocks.
           skidInsert(tid);
       }

       // If we switched over to blocking, then there's a potential for
       // an overall status change.
       status_change = unblock(tid) || status_change || blockThisCycle;
   }
}

When there is no stall or blocking, now it can rename the instructions. When the current renameStatus is Running or Idle, it will invoke renameInsts function to rename the instructions Also, when the renameStatus is Unblocking, which means that the rename stage is being recovered from the Blocking status, it should also invoke the renameInsts function.

renameInsts: the main rename function

The most of the rename function operations are done by the renameInsts function.

        
      
template <class Impl>
void
DefaultRename<Impl>::renameInsts(ThreadID tid)
{
   // Instructions can be either in the skid buffer or the queue of
   // instructions coming from decode, depending on the status.
   int insts_available = renameStatus[tid] == Unblocking ?
       skidBuffer[tid].size() : insts[tid].size();

   // Check the decode queue to see if instructions are available.
   // If there are no available instructions to rename, then do nothing.
   if (insts_available == 0) {
       DPRINTF(Rename, "[tid:%i] Nothing to do, breaking out early.\n",
               tid);
       // Should I change status to idle?
       ++renameIdleCycles;
       return;
   } else if (renameStatus[tid] == Unblocking) {
       ++renameUnblockCycles;
   } else if (renameStatus[tid] == Running) {
       ++renameRunCycles;
   }

First, it checks the current status of the rename stage. If the current status is Unblock, it should fetches instructions from the skidBuffer instead of the insts buffer. Also, even though it is running or idle status, it might not have available instructions because of stall, squash. Therefore, it first checks whether the instructions are ready to be renamed.

Checking ROB and IQ space

        
      
   // Will have to do a different calculation for the number of free
   // entries.
   int free_rob_entries = calcFreeROBEntries(tid);
   int free_iq_entries  = calcFreeIQEntries(tid);
   int min_free_entries = free_rob_entries;

   FullSource source = ROB;

   if (free_iq_entries < min_free_entries) {
       min_free_entries = free_iq_entries;
       source = IQ;
   }

   // Check if there's any space left.
   if (min_free_entries <= 0) {
       DPRINTF(Rename,
               "[tid:%i] Blocking due to no free ROB/IQ/ entries.\n"
               "ROB has %i free entries.\n"
               "IQ has %i free entries.\n",
               tid, free_rob_entries, free_iq_entries);

       blockThisCycle = true;

       block(tid);

       incrFullStat(source);

       return;
   } else if (min_free_entries < insts_available) {
       DPRINTF(Rename,
               "[tid:%i] "
               "Will have to block this cycle. "
               "%i insts available, "
               "but only %i insts can be renamed due to ROB/IQ/LSQ limits.\n",
               tid, insts_available, min_free_entries);

       insts_available = min_free_entries;

       blockThisCycle = true;

       incrFullStat(source);
   }

It needs to check ROB and instruction queue entries are available before the renaming. When there is no space, it should stall right a way. However, if those entries are partially available, only the available parts of the instructions are processed and postpone stall to later (blockThisCycle = true).

Checking serialization

        
      
   InstQueue &insts_to_rename = renameStatus[tid] == Unblocking ?
       skidBuffer[tid] : insts[tid];

   DPRINTF(Rename,
           "[tid:%i] "
           "%i available instructions to send iew.\n",
           tid, insts_available);

   DPRINTF(Rename,
           "[tid:%i] "
           "%i insts pipelining from Rename | "
           "%i insts dispatched to IQ last cycle.\n",
           tid, instsInProgress[tid], fromIEW->iewInfo[tid].dispatched);

   // Handle serializing the next instruction if necessary.
   if (serializeOnNextInst[tid]) {
       if (emptyROB[tid] && instsInProgress[tid] == 0) {
           // ROB already empty; no need to serialize.
           serializeOnNextInst[tid] = false;
       } else if (!insts_to_rename.empty()) {
           insts_to_rename.front()->setSerializeBefore();
       }
   }

It also manages serializing instructions and generate stalls to enforce serialization operation. To this end, it provides associated functions and fields. I will not cover the details here because they are utilized later when each instruction is processed by the rename stage’s main loop,

Checking availability of the LQ and SQ

        
      
   int renamed_insts = 0;

   while (insts_available > 0 &&  toIEWIndex < renameWidth) {
       DPRINTF(Rename, "[tid:%i] Sending instructions to IEW.\n", tid);

       assert(!insts_to_rename.empty());

       DynInstPtr inst = insts_to_rename.front();

       //For all kind of instructions, check ROB and IQ first
       //For load instruction, check LQ size and take into account the inflight loads
       //For store instruction, check SQ size and take into account the inflight stores

       if (inst->isLoad()) {
           if (calcFreeLQEntries(tid) <= 0) {
               DPRINTF(Rename, "[tid:%i] Cannot rename due to no free LQ\n");
               source = LQ;
               incrFullStat(source);
               break;
           }
       }

       if (inst->isStore() || inst->isAtomic()) {
           if (calcFreeSQEntries(tid) <= 0) {
               DPRINTF(Rename, "[tid:%i] Cannot rename due to no free SQ\n");
               source = SQ;
               incrFullStat(source);
               break;
           }
       }

Now we will take a look at the main loop of the rename stage. It traverse all instructions stored in the insts_to_rename. Note that instructions can be retrieved from the Insts or the skidBuffer depending on the status of the current rename stage. Although we already checked the availability of IQ and ROB, rename stage further checks the availability of the LoadQueue (LQ) and StoreQueue (SQ) if the instruction is memory related operation. Note that issuing memory operation will consume one entry from the corresponding queue. If the LQ or SQ is full, then set the source as LQ or SQ to let the rest of the decode stage to know that the instruction cannot be issued to the next stage due to the lack of LQ or SQ and break the loop and stall the rename stage(For statistics).

Consume one instruction and check register availability

        
      
       insts_to_rename.pop_front();

       if (renameStatus[tid] == Unblocking) {
           DPRINTF(Rename,
                   "[tid:%i] "
                   "Removing [sn:%llu] PC:%s from rename skidBuffer\n",
                   tid, inst->seqNum, inst->pcState());
       }

       if (inst->isSquashed()) {
           DPRINTF(Rename,
                   "[tid:%i] "
                   "instruction %i with PC %s is squashed, skipping.\n",
                   tid, inst->seqNum, inst->pcState());

           ++renameSquashedInsts;

           // Decrement how many instructions are available.
           --insts_available;

           continue;
       }

       DPRINTF(Rename,
               "[tid:%i] "
               "Processing instruction [sn:%llu] with PC %s.\n",
               tid, inst->seqNum, inst->pcState());

       // Check here to make sure there are enough destination registers
       // to rename to.  Otherwise block.
       if (!renameMap[tid]->canRename(inst->numIntDestRegs(),
                                      inst->numFPDestRegs(),
                                      inst->numVecDestRegs(),
                                      inst->numVecElemDestRegs(),
                                      inst->numVecPredDestRegs(),
                                      inst->numCCDestRegs())) {
           DPRINTF(Rename,
                   "Blocking due to "
                   " lack of free physical registers to rename to.\n");
           blockThisCycle = true;
           insts_to_rename.push_front(inst);
           ++renameFullRegistersEvents;

           break;
       }

After it is guaranteed that the resources such as ROB, IQ, LQ, SQ are suffice to rename new instruction, it consumes one instruction from the buffer (Line 668). However, if there are not enough physical registers to rename the instruction’s operands, then it should not be consumed.

renameMap

gem5/src/cpu/o3/cpu.hh

        
   /** The rename map. */
   typename CPUPolicy::RenameMap renameMap[Impl::MaxThreads];

gem5/src/cpu/o3/cpu_policy.hh

        
      
template<class Impl>
struct SimpleCPUPolicy
{
   /** Typedef for the freelist of registers. */
   typedef UnifiedFreeList FreeList;
   /** Typedef for the rename map. */
   typedef UnifiedRenameMap RenameMap;

The renameMap contains all the hardware registers accessible by the processor. For example, even though the ISA exposes only handful of registers to the users, there are lots of internal registers to execute instructions. The O3 CPU utilize the UnifiedRenameMap. Let’s take a look at the details.

UnifiedRenameMap has different types of SimpleRenameMaps

        
      
/**
* Unified register rename map for all classes of registers.  Wraps a
* set of class-specific rename maps.  Methods that do not specify a
* register class (e.g., rename()) take register ids,
* while methods that do specify a register class (e.g., renameInt())
* take register indices.
*/
class UnifiedRenameMap
{
 private:
   static constexpr uint32_t NVecElems = TheISA::NumVecElemPerVecReg;
   using VecReg = TheISA::VecReg;
   using VecPredReg = TheISA::VecPredReg;

   /** The integer register rename map */
   SimpleRenameMap intMap;

   /** The floating-point register rename map */
   SimpleRenameMap floatMap;

   /** The condition-code register rename map */
   SimpleRenameMap ccMap;

   /** The vector register rename map */
   SimpleRenameMap vecMap;

   /** The vector element register rename map */
   SimpleRenameMap vecElemMap;

   /** The predicate register rename map */
   SimpleRenameMap predMap;

   using VecMode = Enums::VecRegRenameMode;
   VecMode vecMode;

The renameMap used by the O3 is just a wrapper of the renameMap of each types of registers. As shown in the above class definition, it contains integer, float, vector, and other types of registes

        
      
       renameMap[tid].init(&regFile, TheISA::ZeroReg, fpZeroReg,
                           &freeList, vecMode);
 ......
   // Initialize rename map to assign physical registers to the
   // architectural registers for active threads only.
   for (ThreadID tid = 0; tid < active_threads; tid++) {
       for (RegIndex ridx = 0; ridx < TheISA::NumIntRegs; ++ridx) {
           // Note that we can't use the rename() method because we don't
           // want special treatment for the zero register at this point
           PhysRegIdPtr phys_reg = freeList.getIntReg();
           renameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg);
           commitRenameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg);
       }

       for (RegIndex ridx = 0; ridx < TheISA::NumFloatRegs; ++ridx) {
           PhysRegIdPtr phys_reg = freeList.getFloatReg();
           renameMap[tid].setEntry(RegId(FloatRegClass, ridx), phys_reg);
           commitRenameMap[tid].setEntry(
                   RegId(FloatRegClass, ridx), phys_reg);
       }

       /* Here we need two 'interfaces' the 'whole register' and the
        * 'register element'. At any point only one of them will be
        * active. */
       if (vecMode == Enums::Full) {
           /* Initialize the full-vector interface */
           for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
               RegId rid = RegId(VecRegClass, ridx);
               PhysRegIdPtr phys_reg = freeList.getVecReg();
               renameMap[tid].setEntry(rid, phys_reg);
               commitRenameMap[tid].setEntry(rid, phys_reg);
           }
       } else {
           /* Initialize the vector-element interface */
           for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
               for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg;
                       ++ldx) {
                   RegId lrid = RegId(VecElemClass, ridx, ldx);
                   PhysRegIdPtr phys_elem = freeList.getVecElem();
                   renameMap[tid].setEntry(lrid, phys_elem);
                   commitRenameMap[tid].setEntry(lrid, phys_elem);
               }
           }
       }

       for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) {
           PhysRegIdPtr phys_reg = freeList.getVecPredReg();
           renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg);
           commitRenameMap[tid].setEntry(
                   RegId(VecPredRegClass, ridx), phys_reg);
       }

       for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) {
           PhysRegIdPtr phys_reg = freeList.getCCReg();
           renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg);
           commitRenameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg);
       }
   }

   rename.setRenameMap(renameMap);

The above code initialize all entries of the renameMap. When the setEntry is invoked through the UnifiedRenameMap, it invokes setEntry function of the SimpleRenameMap for all register types of the O3 CPU.

        
      
   /**
    * Update rename map with a specific mapping.  Generally used to
    * roll back to old mappings on a squash.  This version takes a
    * flattened architectural register id and calls the
    * appropriate class-specific rename table.
    * @param arch_reg The architectural register to remap.
    * @param phys_reg The physical register to remap it to.
    */
   void setEntry(const RegId& arch_reg, PhysRegIdPtr phys_reg)
   {
       switch (arch_reg.classValue()) {
         case IntRegClass:
           assert(phys_reg->isIntPhysReg());
           return intMap.setEntry(arch_reg, phys_reg);

         case FloatRegClass:
           assert(phys_reg->isFloatPhysReg());
           return floatMap.setEntry(arch_reg, phys_reg);

         case VecRegClass:
           assert(phys_reg->isVectorPhysReg());
           assert(vecMode == Enums::Full);
           return vecMap.setEntry(arch_reg, phys_reg);

         case VecElemClass:
           assert(phys_reg->isVectorPhysElem());
           assert(vecMode == Enums::Elem);
           return vecElemMap.setEntry(arch_reg, phys_reg);

         case VecPredRegClass:
           assert(phys_reg->isVecPredPhysReg());
           return predMap.setEntry(arch_reg, phys_reg);

         case CCRegClass:
           assert(phys_reg->isCCPhysReg());
           return ccMap.setEntry(arch_reg, phys_reg);

         case MiscRegClass:
           // Misc registers do not actually rename, so don't change
           // their mappings.  We end up here when a commit or squash
           // tries to update or undo a hardwired misc reg nmapping,
           // which should always be setting it to what it already is.
           assert(phys_reg == lookup(arch_reg));
           return;

         default:
           panic("rename setEntry(): unknown reg class %s\n",
                 arch_reg.className());
       }
   }

The setEntry inserts new entry to the renameMap. However, because UnifiedRenameMap is just a wrapper class consisting of multiple SimpleRenameMaps with different types of registers, it inserts an entry to associated SimpleRenameMaps object based on the type of register.

canRename checks availability of the register resource.

        
      
       // Check here to make sure there are enough destination registers
       // to rename to.  Otherwise block.
       if (!renameMap[tid]->canRename(inst->numIntDestRegs(),
                                      inst->numFPDestRegs(),
                                      inst->numVecDestRegs(),
                                      inst->numVecElemDestRegs(),
                                      inst->numVecPredDestRegs(),
                                      inst->numCCDestRegs())) {
           DPRINTF(Rename,
                   "Blocking due to "
                   " lack of free physical registers to rename to.\n");
           blockThisCycle = true;
           insts_to_rename.push_front(inst);
           ++renameFullRegistersEvents;
 710
           break;
       }

Before the rename its registers, it first checks whether the current physical resources are available

        
      
    /**
     * Return whether there are enough registers to serve the request.
     */
    bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t vectorRegs,
                   uint32_t vecElemRegs, uint32_t vecPredRegs,
                   uint32_t ccRegs) const
    {
        return intRegs <= intMap.numFreeEntries() &&
            floatRegs <= floatMap.numFreeEntries() &&
            vectorRegs <= vecMap.numFreeEntries() &&
            vecElemRegs <= vecElemMap.numFreeEntries() &&
            vecPredRegs <= predMap.numFreeEntries() &&
            ccRegs <= ccMap.numFreeEntries();
    }

Handle serialization instruction

If there is enough resources to rename instructions, now it checks whether current instructions should be serialized or protected by the memory barriers.

        
      
       // Handle serializeAfter/serializeBefore instructions.
       // serializeAfter marks the next instruction as serializeBefore.
       // serializeBefore makes the instruction wait in rename until the ROB
       // is empty.

       // In this model, IPR accesses are serialize before
       // instructions, and store conditionals are serialize after
       // instructions.  This is mainly due to lack of support for
       // out-of-order operations of either of those classes of
       // instructions.
       if ((inst->isIprAccess() || inst->isSerializeBefore()) &&
           !inst->isSerializeHandled()) {
           DPRINTF(Rename, "Serialize before instruction encountered.\n");

           if (!inst->isTempSerializeBefore()) {
               renamedSerializing++;
               inst->setSerializeHandled();
           } else {
               renamedTempSerializing++;
           }

           // Change status over to SerializeStall so that other stages know
           // what this is blocked on.
           renameStatus[tid] = SerializeStall;

           serializeInst[tid] = inst;

           blockThisCycle = true;

           break;
       } else if ((inst->isStoreConditional() || inst->isSerializeAfter()) &&
                  !inst->isSerializeHandled()) {
           DPRINTF(Rename, "Serialize after instruction encountered.\n");

           renamedSerializing++;

           inst->setSerializeHandled();

           serializeAfter(insts_to_rename, tid);
       }

StaticInst class has flags member field which represents properties of one instruction such as serializing, memory barrier, load operation, etc. Also, it has corresponding get methods to retrieve those flags from the StaticInst objects. Remember that all the instructions we generated at the fetch stage was the object of the StaticInst. Also, its flags are set based on the implementation of the microops of different architectures. Therefore, the rename stage determines whether it should block the stage or moves to the next instruction by checking the isSerializeAfter and isSerializeBefore of the current static instruction, Note that the serializeBefore means that the current instruction should be blocked. Therefore, it sets current instruction as serializeInst (Line 739) and status of the current rename stage as SerializeStall and break the loop.

        
      
template<class Impl>
void
DefaultRename<Impl>::serializeAfter(InstQueue &inst_list, ThreadID tid)
{
   if (inst_list.empty()) {
       // Mark a bit to say that I must serialize on the next instruction.
       serializeOnNextInst[tid] = true;
       return;
   }

   // Set the next instruction as serializing.
   inst_list.front()->setSerializeBefore();
}

However, when the instruction has serializeAfter flag, the next instruction after the current instruction should be blocked not the current one. In this case we should consider two cases of the rename stage. When it has no instructions to be renamed after the current one, we cannot set the next instruction to be serialized. Therefore, just let the rename stage be aware of that the next instruction should be serialized (Line 1437). If there is another instruction in the queue, it directly set the flag of that instruction by invoking setSerializeBefore function (1442).

        
      
   // Handle serializing the next instruction if necessary.
   if (serializeOnNextInst[tid]) {
       if (emptyROB[tid] && instsInProgress[tid] == 0) {
           // ROB already empty; no need to serialize.
           serializeOnNextInst[tid] = false;
       } else if (!insts_to_rename.empty()) {
           insts_to_rename.front()->setSerializeBefore();
       }
   }

As shown in the above code (Line 632-633), at the next clock cycle, when the renameInsts function is executed, it checks whether the serializeOnNextInst has been set. which means that the last instruction was serializeAfter instruction at the previous clock cycle. In that case it sets the current instruction to be renamed as serializeBefore to make serialization.

Wait until all issued instructions are resolved

Remember that the tick function of the rename stage always check the signal before invoking the rename function. This checkSignalsAndUpdate function checks whether the rename stage can be continued.

        
      
DefaultRename<Impl>::checkSignalsAndUpdate(ThreadID tid)
{
   // Check if there's a squash signal, squash if there is
   // Check stall signals, block if necessary.
   // If status was blocked
   //     check if stall conditions have passed
   //         if so then go to unblocking
   // If status was Squashing
   //     check if squashing is not high.  Switch to running this cycle.
   // If status was serialize stall
   //     check if ROB is empty and no insts are in flight to the ROB

   readFreeEntries(tid);
   readStallSignals(tid);

   if (fromCommit->commitInfo[tid].squash) {
       DPRINTF(Rename, "[tid:%i] Squashing instructions due to squash from "
               "commit.\n", tid);

       squash(fromCommit->commitInfo[tid].doneSeqNum, tid);

       return true;
   }

   if (checkStall(tid)) {
       return block(tid);
   }

One of the important function of checkSignalsAndUpdate is checkStall function that checks whether the current rename stage is ready to be free from the stall. If the rename stage should be stalled more, then it returns false and continue to block the rename stage at current clock cycle (Line 1358). However, if it turns out that previous stall doesn’t block the rename stage further, it tries to recover from the stall based on their previous stall reasons.

        
      
DefaultRename<Impl>::checkStall(ThreadID tid)
{   
   bool ret_val = false;
   
   if (stalls[tid].iew) {
       DPRINTF(Rename,"[tid:%i] Stall from IEW stage detected.\n", tid);
       ret_val = true;
   } else if (calcFreeROBEntries(tid) <= 0) {
       DPRINTF(Rename,"[tid:%i] Stall: ROB has 0 free entries.\n", tid);
       ret_val = true;
   } else if (calcFreeIQEntries(tid) <= 0) {
       DPRINTF(Rename,"[tid:%i] Stall: IQ has 0 free entries.\n", tid);
       ret_val = true;
   } else if (calcFreeLQEntries(tid) <= 0 && calcFreeSQEntries(tid) <= 0) {
       DPRINTF(Rename,"[tid:%i] Stall: LSQ has 0 free entries.\n", tid);
       ret_val = true;
   } else if (renameMap[tid]->numFreeEntries() <= 0) {
       DPRINTF(Rename,"[tid:%i] Stall: RenameMap has 0 free entries.\n", tid);
       ret_val = true;
   } else if (renameStatus[tid] == SerializeStall &&
              (!emptyROB[tid] || instsInProgress[tid])) {
       DPRINTF(Rename,"[tid:%i] Stall: Serialize stall and ROB is not "
               "empty.\n",
               tid);
       ret_val = true;
   }
   
   return ret_val;
}

For example, when the current renameStatus is SerializeStall, the rename stage should not be executed again until all previous instruction in the pipeline will be dispatched to the execution units. Therefore, it invokes instsInProgress[tid] to check whether current hardware thread still have some remaining instructions to process. When the all previous instructions are resolved, it will return false and checkStall will return false. If the checkStall returns false and doesn’t block the rename stage anymore, it will try to recover from the stall based on their previous stall reasons.

        
      
   if (renameStatus[tid] == SerializeStall) {
       // Stall ends once the ROB is free.
       DPRINTF(Rename, "[tid:%i] Done with serialize stall, switching to "
               "unblocking.\n", tid);
       
       DynInstPtr serial_inst = serializeInst[tid];
       
       renameStatus[tid] = Unblocking;
       
       unblock(tid);
       
       DPRINTF(Rename, "[tid:%i] Processing instruction [%lli] with "
               "PC %s.\n", tid, serial_inst->seqNum, serial_inst->pcState());
       
       // Put instruction into queue here.
       serial_inst->clearSerializeBefore();
       
       if (!skidBuffer[tid].empty()) {
           skidBuffer[tid].push_front(serial_inst);
       } else {
           insts[tid].push_front(serial_inst);
       }
       
       DPRINTF(Rename, "[tid:%i] Instruction must be processed by rename."
               " Adding to front of list.\n", tid);
       
       serializeInst[tid] = NULL;
       
       return true;
   }

For example, if the previous reason of stall was SerializeStall, the Line 1395-1424 will be executed. Note that the serialized instruction could not been executed until all previous instructions had been dispatched by the IEW stage. Therefore, the serializing instruction should be reinserted into the instruction buffer of the rename stage (Line 1412-1416). Also, it cleans up the serializeInst field of the rename stage (line 1421). Next time when the rename stage is recovered from the unblocking stage, it will process the serializing instruction that have stalled the rename stage.

X86 in GEM5 provides macro setting serialization

        
      
       def serializeBefore(self):
           self.serialize_before = True
       def serializeAfter(self):
           self.serialize_after = True

       def function_call(self):
           self.function_call = True
       def function_return(self):
           self.function_return = True

       def __init__(self, name):
           super(X86Macroop, self).__init__(name)
           self.directives = {
               "adjust_env" : self.setAdjustEnv,
               "adjust_imm" : self.adjustImm,
               "adjust_disp" : self.adjustDisp,
               "serialize_before" : self.serializeBefore,
               "serialize_after" : self.serializeAfter,
               "function_call" : self.function_call,
               "function_return" : self.function_return
           }

For macroop definition, when .serialize_before or .serialize_after keyword is found in their definition, the GEM5 parser invokes the self.serializeBefore and self.serializeAfter function respectively to set the serialize_before and serialize_after memeber field as true.

        
      
       def getDefinition(self, env):
           #FIXME This first parameter should be the mnemonic. I need to
           #write some code which pulls that out
           numMicroops = len(self.microops)
           allocMicroops = ''
           micropc = 0
           for op in self.microops:
               flags = ["IsMicroop"]
               if micropc == 0:
                   flags.append("IsFirstMicroop")

                   if self.serialize_before:
                       flags.append("IsSerializing")
                       flags.append("IsSerializeBefore")

               if micropc == numMicroops - 1:
                   flags.append("IsLastMicroop")

                   if self.serialize_after:
                       flags.append("IsSerializing")
                       flags.append("IsSerializeAfter")

                   if self.function_call:
                       flags.append("IsCall")
                       flags.append("IsUncondControl")
                   if self.function_return:
                       flags.append("IsReturn")
                       flags.append("IsUncondControl")

When the macroop definition is automatically generated, it checks those two flags and set IsSerializeBefore to the first microop and IsSerializeAfter to the last microop consisting of the macroop.

Rename registers and pass the renamed instruction to the next stage

After handling serialization instruction, it should rename registers of the instruction.

        
      
       renameSrcRegs(inst, inst->threadNumber);

       renameDestRegs(inst, inst->threadNumber);

       if (inst->isAtomic() || inst->isStore()) {
           storesInProgress[tid]++;
       } else if (inst->isLoad()) {
           loadsInProgress[tid]++;
       }

       ++renamed_insts;
       // Notify potential listeners that source and destination registers for
       // this instruction have been renamed.
       ppRename->notify(inst);

       // Put instruction in rename queue.
       toIEW->insts[toIEWIndex] = inst;
       ++(toIEW->size);

       // Increment which instruction we're on.
       ++toIEWIndex;

       // Decrement how many instructions are available.
       --insts_available;
   }

renameSrcRegs

        
      
template <class Impl>
inline void
DefaultRename<Impl>::renameSrcRegs(const DynInstPtr &inst, ThreadID tid)
{
   ThreadContext *tc = inst->tcBase();
   RenameMap *map = renameMap[tid];
   unsigned num_src_regs = inst->numSrcRegs();

   // Get the architectual register numbers from the source and
   // operands, and redirect them to the right physical register.
   for (int src_idx = 0; src_idx < num_src_regs; src_idx++) {
       const RegId& src_reg = inst->srcRegIdx(src_idx);
       PhysRegIdPtr renamed_reg;

       renamed_reg = map->lookup(tc->flattenRegId(src_reg));
       switch (src_reg.classValue()) {
         case IntRegClass:
           intRenameLookups++;
           break;
         case FloatRegClass:
           fpRenameLookups++;
           break;
         case VecRegClass:
         case VecElemClass:
           vecRenameLookups++;
           break;
         case VecPredRegClass:
           vecPredRenameLookups++;
           break;
         case CCRegClass:
         case MiscRegClass:
           break;

         default:
           panic("Invalid register class: %d.", src_reg.classValue());
       }

       DPRINTF(Rename,
               "[tid:%i] "
               "Looking up %s arch reg %i, got phys reg %i (%s)\n",
               tid, src_reg.className(),
               src_reg.index(), renamed_reg->index(),
               renamed_reg->className());

       inst->renameSrcReg(src_idx, renamed_reg);

       // See if the register is ready or not.
       if (scoreboard->getReg(renamed_reg)) {
           DPRINTF(Rename,
                   "[tid:%i] "
                   "Register %d (flat: %d) (%s) is ready.\n",
                   tid, renamed_reg->index(), renamed_reg->flatIndex(),
                   renamed_reg->className());

           inst->markSrcRegReady(src_idx);
       } else {
           DPRINTF(Rename,
                   "[tid:%i] "
                   "Register %d (flat: %d) (%s) is not ready.\n",
                   tid, renamed_reg->index(), renamed_reg->flatIndex(),
                   renamed_reg->className());
       }

       ++renameRenameLookups;
   }
}

The main operation of the renameSrcRegs is to look up register map and find out if the architecture registers used as the current instruction’s source have been renamed to the another physical registers. If it has been renamed to other physical registers, it should consider those registers instead of the architectural registers in the rest of the rename stages. This mapping will be stored in the instruction through the renameSrcReg function (Line 1108).

lookup renameMap to find physical register if it has been renamed

        
      
class SimpleRenameMap
{
......
   /**
    * Look up the physical register mapped to an architectural register.
    * @param arch_reg The architectural register to look up.
    * @return The physical register it is currently mapped to.
    */
   PhysRegIdPtr lookup(const RegId& arch_reg) const
   {
       assert(arch_reg.flatIndex() <= map.size());
       return map[arch_reg.flatIndex()];
   }
......
class UnifiedRenameMap
{
......
   /**
    * Look up the physical register mapped to an architectural register.
    * This version takes a flattened architectural register id
    * and calls the appropriate class-specific rename table.
    * @param arch_reg The architectural register to look up.
    * @return The physical register it is currently mapped to.
    */
   PhysRegIdPtr lookup(const RegId& arch_reg) const
   {
       switch (arch_reg.classValue()) {
         case IntRegClass:
           return intMap.lookup(arch_reg);

         case FloatRegClass:
           return  floatMap.lookup(arch_reg);

         case VecRegClass:
           assert(vecMode == Enums::Full);
           return  vecMap.lookup(arch_reg);

         case VecElemClass:
           assert(vecMode == Enums::Elem);
           return  vecElemMap.lookup(arch_reg);

         case VecPredRegClass:
           return predMap.lookup(arch_reg);

         case CCRegClass:
           return ccMap.lookup(arch_reg);

         case MiscRegClass:
           // misc regs aren't really renamed, they keep the same
           // mapping throughout the execution.
           return regFile->getMiscRegId(arch_reg.flatIndex());

         default:
           panic("rename lookup(): unknown reg class %s\n",
                 arch_reg.className());
       }
   }

If it has been renamed already, the lookup function returns actual physical register to which the architecture register has been mapped. The map used in the rename stage is UnifiedRenameMap and contains multiple SimpleRenameMap with the various register type. Therefore, it first invokes the lookup function of the UnifiedRenameMap, and further invokes the lookup function of the SimpleRenameMap depending on the register type that we are trying to rename.

check scoreboard

After the lookup, it checks the scoreboard if the target registers are available to be read. Note that the renamed register is passed to the scoreboard, getReg. O3 is out-of-order processor and renames the registers to eliminate register dependency such as write after read. The scoreboard let the processor know when the register is ready to be accessed. Particularly, the getReg interface of the scoreboard can check whether specific phyiscal register is currently available. If it returns true, it means that the asked register is ready to be used. It invokes the markSrcRegReady function (Line 1118) to mark that operand of instruction is ready to be used. Also, it sets the instruction to be issued when all operands of that instructions are ready. However, if the getReg returns false, it means that one source register is not available at that cycle, so it should not set the flag and make the instruction to wait until the register is ready. The scoreboard and its interfaces will be described in the below section.

renameDestRegs

        
      
template <class Impl>
inline void
DefaultRename<Impl>::renameDestRegs(const DynInstPtr &inst, ThreadID tid)
{
   ThreadContext *tc = inst->tcBase();
   RenameMap *map = renameMap[tid];
   unsigned num_dest_regs = inst->numDestRegs();

   // Rename the destination registers.
   for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) {
       const RegId& dest_reg = inst->destRegIdx(dest_idx);
       typename RenameMap::RenameInfo rename_result;

       RegId flat_dest_regid = tc->flattenRegId(dest_reg);
       flat_dest_regid.setNumPinnedWrites(dest_reg.getNumPinnedWrites());

       rename_result = map->rename(flat_dest_regid);

       inst->flattenDestReg(dest_idx, flat_dest_regid);

       scoreboard->unsetReg(rename_result.first);

       DPRINTF(Rename,
               "[tid:%i] "
               "Renaming arch reg %i (%s) to physical reg %i (%i).\n",
               tid, dest_reg.index(), dest_reg.className(),
               rename_result.first->index(),
               rename_result.first->flatIndex());

       // Record the rename information so that a history can be kept.
       RenameHistory hb_entry(inst->seqNum, flat_dest_regid,
                              rename_result.first,
                              rename_result.second);

       historyBuffer[tid].push_front(hb_entry);

       DPRINTF(Rename, "[tid:%i] [sn:%llu] "
               "Adding instruction to history buffer (size=%i).\n",
               tid,(*historyBuffer[tid].begin()).instSeqNum,
               historyBuffer[tid].size());

       // Tell the instruction to rename the appropriate destination
       // register (dest_idx) to the new physical register
       // (rename_result.first), and record the previous physical
       // register that the same logical register was renamed to
       // (rename_result.second).
       inst->renameDestReg(dest_idx,
                           rename_result.first,
                           rename_result.second);

       ++renameRenamedOperands;
   }
}

Basically, the renameDestRegs function is similar to the renameSrcRegs However, renaming destination register can change register map and the scoreboard of the destination register. For the source registers, it should check the scoreboard because it might have dependencies on the previous instructions. However, because the destination register does not have dependencies on previous instructions, it can map current destination register to any physical register if the resources are available. Therefore, instead of invoking lookup as we did for the source register, it invokes rename function.

rename: renames specific register to the other

        
      
   /**
    * Tell rename map to get a new free physical register to remap
    * the specified architectural register. This version takes a
    * RegId and reads the  appropriate class-specific rename table.
    * @param arch_reg The architectural register id to remap.
    * @return A RenameInfo pair indicating both the new and previous
    * physical registers.
    */
   RenameInfo rename(const RegId& arch_reg)
   {
       switch (arch_reg.classValue()) {
         case IntRegClass:
           return intMap.rename(arch_reg);
         case FloatRegClass:
           return floatMap.rename(arch_reg);
         case VecRegClass:
           assert(vecMode == Enums::Full);
           return vecMap.rename(arch_reg);
         case VecElemClass:
           assert(vecMode == Enums::Elem);
           return vecElemMap.rename(arch_reg);
         case VecPredRegClass:
           return predMap.rename(arch_reg);
         case CCRegClass:
           return ccMap.rename(arch_reg);
         case MiscRegClass:
           {
           // misc regs aren't really renamed, just remapped
           PhysRegIdPtr phys_reg = lookup(arch_reg);
           // Set the new register to the previous one to keep the same
           // mapping throughout the execution.
           return RenameInfo(phys_reg, phys_reg);
           }

         default:
           panic("rename rename(): unknown reg class %s\n",
                 arch_reg.className());
       }
   }

Because rename stage has access on various physical registers, it should ask proper physical register map to rename the architecture register to its physical register. Note that the return value, RenameInfo, is a pair indicating both the new and previous.

        
      
    /**
     * Pair of a physical register and a physical register.  Used to
     * return the physical register that a logical register has been
     * renamed to, and the previous physical register that the same
     * logical register was previously mapped to.
     */
    typedef std::pair<PhysRegIdPtr, PhysRegIdPtr> RenameInfo;

        
      
SimpleRenameMap::RenameInfo
SimpleRenameMap::rename(const RegId& arch_reg)
{
   PhysRegIdPtr renamed_reg;
   // Record the current physical register that is renamed to the
   // requested architected register.
   PhysRegIdPtr prev_reg = map[arch_reg.flatIndex()];

   if (arch_reg == zeroReg) {
       assert(prev_reg->isZeroReg());
       renamed_reg = prev_reg;
   } else if (prev_reg->getNumPinnedWrites() > 0) {
       // Do not rename if the register is pinned
       assert(arch_reg.getNumPinnedWrites() == 0);  // Prevent pinning the
                                                    // same register twice
       DPRINTF(Rename, "Renaming pinned reg, numPinnedWrites %d\n",
               prev_reg->getNumPinnedWrites());
       renamed_reg = prev_reg;
       renamed_reg->decrNumPinnedWrites();
   } else {
       renamed_reg = freeList->getReg();
       map[arch_reg.flatIndex()] = renamed_reg;
       renamed_reg->setNumPinnedWrites(arch_reg.getNumPinnedWrites());
       renamed_reg->setNumPinnedWritesToComplete(
           arch_reg.getNumPinnedWrites() + 1);
   }

   DPRINTF(Rename, "Renamed reg %d to physical reg %d (%d) old mapping was"
           " %d (%d)\n",
           arch_reg, renamed_reg->flatIndex(), renamed_reg->flatIndex(),
           prev_reg->flatIndex(), prev_reg->flatIndex());

   return RenameInfo(renamed_reg, prev_reg);
}

The prev_reg indicates the previous physical register mapped to the current architecture register. Also, it maps any available physical register to the current architecture register (Line 93) and update its mapping (Line 94).

Make the renamed register as not ready

It invokes unsetReg to make the scoreboard mark the physical register previously mapped to currently remapped destination architecture register as not ready. \TODO{What is the purpose of it?}

Populating history buffer entry per destination register rename

After the renaming is done (line 1161-1163), it generates history entry for providing \TODO{is it for precise exception?? what purpose is it?}

        
      
   struct RenameHistory {
       RenameHistory(InstSeqNum _instSeqNum, const RegId& _archReg,
                     PhysRegIdPtr _newPhysReg,
                     PhysRegIdPtr _prevPhysReg)
           : instSeqNum(_instSeqNum), archReg(_archReg),
             newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg)
       {
       }

       /** The sequence number of the instruction that renamed. */
       InstSeqNum instSeqNum;
       /** The architectural register index that was renamed. */
       RegId archReg;
       /** The new physical register that the arch. register is renamed to. */
       PhysRegIdPtr newPhysReg;
       /** The old physical register that the arch. register was renamed to.
        */
       PhysRegIdPtr prevPhysReg;
   };                       

   /** A per-thread list of all destination register renames, used to either
    * undo rename mappings or free old physical registers.
    */
   std::list<RenameHistory> historyBuffer[Impl::MaxThreads];

End of the main loop

After renaming destination and source registers, it pushes the renamed instruction to the toIEW register.

        
      
   instsInProgress[tid] += renamed_insts;
   renameRenamedInsts += renamed_insts;

   // If we wrote to the time buffer, record this.
   if (toIEWIndex) {
       wroteToTimeBuffer = true;
   }

   // Check if there's any instructions left that haven't yet been renamed.
   // If so then block.
   if (insts_available) {
       blockThisCycle = true;
   }

   if (blockThisCycle) {
       block(tid);
       toDecode->renameUnblock[tid] = false;
   }
}

Now we are good to go to IEW stage!

scoreboard

scoreboard interface

gem5/src/cpu/o3/scoreboard.hh

        
      
/**
* Implements a simple scoreboard to track which registers are
* ready. This class operates on the unified physical register space,
* because the different classes of registers do not need to be distinguished.
* Registers being part of a fixed mapping are always considered ready.
*/
class Scoreboard
{
 private:
   /** The object name, for DPRINTF.  We have to declare this
    *  explicitly because Scoreboard is not a SimObject. */
   const std::string _name;

   /** Scoreboard of physical integer registers, saying whether or not they
    *  are ready. */
   std::vector<bool> regScoreBoard;

   /** The number of actual physical registers */
   unsigned M5_CLASS_VAR_USED numPhysRegs;

 public:
   /** Constructs a scoreboard.
    *  @param _numPhysicalRegs Number of physical registers.
    *  @param _numMiscRegs Number of miscellaneous registers.
    */
   Scoreboard(const std::string &_my_name,
              unsigned _numPhysicalRegs);

   /** Destructor. */
   ~Scoreboard() {}

   /** Returns the name of the scoreboard. */
   std::string name() const { return _name; };

   /** Checks if the register is ready. */
   bool getReg(PhysRegIdPtr phys_reg) const
   {
       assert(phys_reg->flatIndex() < numPhysRegs);

       if (phys_reg->isFixedMapping()) {
           // Fixed mapping regs are always ready
           return true;
       }

       bool ready = regScoreBoard[phys_reg->flatIndex()];

       if (phys_reg->isZeroReg())
           assert(ready);

       return ready;
   }

   /** Sets the register as ready. */
   void setReg(PhysRegIdPtr phys_reg)
   {
       assert(phys_reg->flatIndex() < numPhysRegs);

       if (phys_reg->isFixedMapping()) {
           // Fixed mapping regs are always ready, ignore attempts to change
           // that
           return;
       }

       DPRINTF(Scoreboard, "Setting reg %i (%s) as ready\n",
               phys_reg->index(), phys_reg->className());

       regScoreBoard[phys_reg->flatIndex()] = true;
   }

   /** Sets the register as not ready. */
   void unsetReg(PhysRegIdPtr phys_reg)
   {
       assert(phys_reg->flatIndex() < numPhysRegs);

       if (phys_reg->isFixedMapping()) {
           // Fixed mapping regs are always ready, ignore attempts to
           // change that
           return;
       }

       // zero reg should never be marked unready
       if (phys_reg->isZeroReg())
           return;

       regScoreBoard[phys_reg->flatIndex()] = false;
   }

};

Scoreboard is implemented as a simple vector (regScoreBoard) to indicate specific register is ready to be used or not. And it provide three interfaces to set or get the status of the specific register maintained by the scoreboard.

scoreboard used by the O3 CPU

gem5/src/cpu/o3/cpu.hh

        
   /** Integer Register Scoreboard */
   Scoreboard scoreboard;

gem5/src/cpu/o3/cpu.cc

        
   rename.setScoreboard(&scoreboard);
   iew.setScoreboard(&scoreboard);

When it comes to real hardware implementation, the scoreboard should be accessible by the multiple stages at the same time. However, because it is software emulation, GEM5 doesn’t provide port-wise emulation to service different modules at the same time. Note that GEM5 executes in single thread and cannot be executed in multi-threads. Anyway, the scoreboard is accessed by two different stages in the O3CPU: rename and iew.

GEM5, Pipeline, O3

This post is licensed under CC BY 4.0 by the author.