Memory read and write of the O3 CPU
Memory read
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
| 621 LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
622 {
623 LQEntry& load_req = loadQueue[load_idx];
624 const DynInstPtr& load_inst = load_req.instruction();
625
626 load_req.setRequest(req);
627 assert(load_inst);
628
629 assert(!load_inst->isExecuted());
630
631 // Make sure this isn't a strictly ordered load
632 // A bit of a hackish way to get strictly ordered accesses to work
633 // only if they're at the head of the LSQ and are ready to commit
634 // (at the head of the ROB too).
635
636 if (req->mainRequest()->isStrictlyOrdered() &&
637 (load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
638 // Tell IQ/mem dep unit that this instruction will need to be
639 // rescheduled eventually
640 iewStage->rescheduleMemInst(load_inst);
641 load_inst->clearIssued();
642 load_inst->effAddrValid(false);
643 ++lsqRescheduledLoads;
644 DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
645 load_inst->seqNum, load_inst->pcState());
646
647 // Must delete request now that it wasn't handed off to
648 // memory. This is quite ugly. @todo: Figure out the proper
649 // place to really handle request deletes.
650 load_req.setRequest(nullptr);
651 req->discard();
652 return std::make_shared<GenericISA::M5PanicFault>(
653 "Strictly ordered load [sn:%llx] PC %s\n",
654 load_inst->seqNum, load_inst->pcState());
655 }
656
657 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
658 "storeHead: %i addr: %#x%s\n",
659 load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
660 req->mainRequest()->getPaddr(), req->isSplit() ? " split" : "");
661
662 if (req->mainRequest()->isLLSC()) {
663 // Disable recording the result temporarily. Writing to misc
664 // regs normally updates the result, but this is not the
665 // desired behavior when handling store conditionals.
666 load_inst->recordResult(false);
667 TheISA::handleLockedRead(load_inst.get(), req->mainRequest());
668 load_inst->recordResult(true);
669 }
670
671 if (req->mainRequest()->isMmappedIpr()) {
672 assert(!load_inst->memData);
673 load_inst->memData = new uint8_t[MaxDataBytes];
674
675 ThreadContext *thread = cpu->tcBase(lsqID);
676 PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq);
677
678 main_pkt->dataStatic(load_inst->memData);
679
680 Cycles delay = req->handleIprRead(thread, main_pkt);
681
682 WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);
683 cpu->schedule(wb, cpu->clockEdge(delay));
684 return NoFault;
685 }
686
687 // Check the SQ for any previous stores that might lead to forwarding
......
840 // If there's no forwarding case, then go access memory
841 DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
842 load_inst->seqNum, load_inst->pcState());
843
844 // Allocate memory if this is the first time a load is issued.
845 if (!load_inst->memData) {
846 load_inst->memData = new uint8_t[req->mainRequest()->getSize()];
847 }
848
849 // For now, load throughput is constrained by the number of
850 // load FUs only, and loads do not consume a cache port (only
851 // stores do).
852 // @todo We should account for cache port contention
853 // and arbitrate between loads and stores.
854
855 // if we the cache is not blocked, do cache access
856 if (req->senderState() == nullptr) {
857 LQSenderState *state = new LQSenderState(
858 loadQueue.getIterator(load_idx));
859 state->isLoad = true;
860 state->inst = load_inst;
861 state->isSplit = req->isSplit();
862 req->senderState(state);
863 }
864 req->buildPackets();
865 req->sendPacketToCache();
866 if (!req->isSent())
867 iewStage->blockMemInst(load_inst);
868
869 return NoFault;
870 }
|
If the current instruction has not initiated the memory load operation before,
then it allocates a memory and make the memData of the instruction
points to this allocated memory to store the actual data read from cache or memory.
After that, it generates senderState object if it doesn’t have.
The state object contains information such as
whether this request is load or store,
the instruction that initiated the memory operation, and
information about whether the request is a split or single access.
After the senderState is generated, it is stored in the request object.
Note that here the req is the object of LSQRequest.
Remember that the req is the same object used for the TLB resolution.
Because this object contains all information required for resolving one memory operation
including TLB, cache ports, etc, by invoking proper function,
CPU can handle read/write operations.
Build packet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| 1032 template<class Impl>
1033 void
1034 LSQ<Impl>::SingleDataRequest::buildPackets()
1035 {
1036 assert(_senderState);
1037 /* Retries do not create new packets. */
1038 if (_packets.size() == 0) {
1039 _packets.push_back(
1040 isLoad()
1041 ? Packet::createRead(request())
1042 : Packet::createWrite(request()));
1043 _packets.back()->dataStatic(_inst->memData);
1044 _packets.back()->senderState = _senderState;
1045 }
1046 assert(_packets.size() == 1);
1047 }
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
| 276 /**
277 * A Packet is used to encapsulate a transfer between two objects in
278 * the memory system (e.g., the L1 and L2 cache). (In contrast, a
279 * single Request travels all the way from the requestor to the
280 * ultimate destination and back, possibly being conveyed by several
281 * different Packets along the way.)
282 */
283 class Packet : public Printable
284 {
285 public:
286 typedef uint32_t FlagsType;
287 typedef gem5::Flags<FlagsType> Flags;
......
368 private:
369 /**
370 * A pointer to the data being transferred. It can be different
371 * sizes at each level of the hierarchy so it belongs to the
372 * packet, not request. This may or may not be populated when a
373 * responder receives the packet. If not populated memory should
374 * be allocated.
375 */
376 PacketDataPtr data;
......
846 /**
847 * Constructor. Note that a Request object must be constructed
848 * first, but the Requests's physical address and size fields need
849 * not be valid. The command must be supplied.
850 */
851 Packet(const RequestPtr &_req, MemCmd _cmd)
852 : cmd(_cmd), id((PacketId)_req.get()), req(_req),
853 data(nullptr), addr(0), _isSecure(false), size(0),
854 _qosValue(0),
855 htmReturnReason(HtmCacheFailure::NO_FAIL),
856 htmTransactionUid(0),
857 headerDelay(0), snoopDelay(0),
858 payloadDelay(0), senderState(NULL)
859 {
860 flags.clear();
861 if (req->hasPaddr()) {
862 addr = req->getPaddr();
863 flags.set(VALID_ADDR);
864 _isSecure = req->isSecure();
865 }
866
867 /**
868 * hardware transactional memory
869 *
870 * This is a bit of a hack!
871 * Technically the address of a HTM command is set to zero
872 * but is not valid. The reason that we pretend it's valid is
873 * to void the getAddr() function from failing. It would be
874 * cumbersome to add control flow in many places to check if the
875 * packet represents a HTM command before calling getAddr().
876 */
877 if (req->isHTMCmd()) {
878 flags.set(VALID_ADDR);
879 assert(addr == 0x0);
880 }
881 if (req->hasSize()) {
882 size = req->getSize();
883 flags.set(VALID_SIZE);
884 }
885 }
......
1002 /**
1003 * Constructor-like methods that return Packets based on Request objects.
1004 * Fine-tune the MemCmd type if it's not a vanilla read or write.
1005 */
1006 static PacketPtr
1007 createRead(const RequestPtr &req)
1008 {
1009 return new Packet(req, makeReadCmd(req));
1010 }
1011
1012 static PacketPtr
1013 createWrite(const RequestPtr &req)
1014 {
1015 return new Packet(req, makeWriteCmd(req));
1016 }
|
buildPackets function generates new packet that will be sent to the cache.
The generated packet is maintained in the internal vector called _packets.
Also, it sets the buffer allocated for storing the data, _inst->memData to
internal data member field of the packet. Also, the senderState is stored.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
| 386 /**
387 * A virtual base opaque structure used to hold state associated
388 * with the packet (e.g., an MSHR), specific to a SimObject that
389 * sees the packet. A pointer to this state is returned in the
390 * packet's response so that the SimObject in question can quickly
391 * look up the state needed to process it. A specific subclass
392 * would be derived from this to carry state specific to a
393 * particular sending device.
394 *
395 * As multiple SimObjects may add their SenderState throughout the
396 * memory system, the SenderStates create a stack, where a
397 * SimObject can add a new Senderstate, as long as the
398 * predecessing SenderState is restored when the response comes
399 * back. For this reason, the predecessor should always be
400 * populated with the current SenderState of a packet before
401 * modifying the senderState field in the request packet.
402 */
403 struct SenderState
404 {
405 SenderState* predecessor;
406 SenderState() : predecessor(NULL) {}
407 virtual ~SenderState() {}
408 };
|
attribute of the packet
mem/packet.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
| 209 bool
210 testCmdAttrib(MemCmd::Attribute attrib) const
211 {
212 return commandInfo[cmd].attributes[attrib] != 0;
213 }
214
215 public:
216
217 bool isRead() const { return testCmdAttrib(IsRead); }
218 bool isWrite() const { return testCmdAttrib(IsWrite); }
219 bool isUpgrade() const { return testCmdAttrib(IsUpgrade); }
220 bool isRequest() const { return testCmdAttrib(IsRequest); }
221 bool isResponse() const { return testCmdAttrib(IsResponse); }
222 bool needsWritable() const { return testCmdAttrib(NeedsWritable); }
223 bool needsResponse() const { return testCmdAttrib(NeedsResponse); }
224 bool isInvalidate() const { return testCmdAttrib(IsInvalidate); }
225 bool isEviction() const { return testCmdAttrib(IsEviction); }
226 bool isClean() const { return testCmdAttrib(IsClean); }
227 bool fromCache() const { return testCmdAttrib(FromCache); }
|
mem/packet.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
| 64 const MemCmd::CommandInfo
65 MemCmd::commandInfo[] =
66 {
67 /* InvalidCmd */
68 { {}, InvalidCmd, "InvalidCmd" },
69 /* ReadReq - Read issued by a non-caching agent such as a CPU or
70 * device, with no restrictions on alignment. */
71 { {IsRead, IsRequest, NeedsResponse}, ReadResp, "ReadReq" },
72 /* ReadResp */
73 { {IsRead, IsResponse, HasData}, InvalidCmd, "ReadResp" },
74 /* ReadRespWithInvalidate */
75 { {IsRead, IsResponse, HasData, IsInvalidate},
76 InvalidCmd, "ReadRespWithInvalidate" },
77 /* WriteReq */
78 { {IsWrite, NeedsWritable, IsRequest, NeedsResponse, HasData},
79 WriteResp, "WriteReq" },
80 /* WriteResp */
81 { {IsWrite, IsResponse}, InvalidCmd, "WriteResp" },
82 /* WriteCompleteResp - The WriteCompleteResp command is needed
83 * because in the GPU memory model we use a WriteResp to indicate
84 * that a write has reached the cache controller so we can free
85 * resources at the coalescer. Later, when the write succesfully
86 * completes we send a WriteCompleteResp to the CU so its wait
87 * counters can be updated. Wait counters in the CU is how memory
88 * dependences are handled in the GPU ISA. */
89 { {IsWrite, IsResponse}, InvalidCmd, "WriteCompleteResp" },
|
send packet to the cache
1
2
3
4
5
6
7
8
| 1083 template<class Impl>
1084 void
1085 LSQ<Impl>::SingleDataRequest::sendPacketToCache()
1086 {
1087 assert(_numOutstandingPackets == 0);
1088 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1089 _numOutstandingPackets = 1;
1090 }
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
| 1083 template <class Impl>
1084 bool
1085 LSQUnit<Impl>::trySendPacket(bool isLoad, PacketPtr data_pkt)
1086 {
1087 bool ret = true;
1088 bool cache_got_blocked = false;
1089
1090 auto state = dynamic_cast<LSQSenderState*>(data_pkt->senderState);
1091
1092 if (!lsq->cacheBlocked() &&
1093 lsq->cachePortAvailable(isLoad)) {
1094 if (!dcachePort->sendTimingReq(data_pkt)) {
1095 ret = false;
1096 cache_got_blocked = true;
1097 }
1098 } else {
1099 ret = false;
1100 }
1101
1102 if (ret) {
1103 if (!isLoad) {
1104 isStoreBlocked = false;
1105 }
1106 lsq->cachePortBusy(isLoad);
1107 state->outstanding++;
1108 state->request()->packetSent();
1109 } else {
1110 if (cache_got_blocked) {
1111 lsq->cacheBlocked(true);
1112 ++lsqCacheBlocked;
1113 }
1114 if (!isLoad) {
1115 assert(state->request() == storeWBIt->request());
1116 isStoreBlocked = true;
1117 }
1118 state->request()->packetNotSent();
1119 }
1120 return ret;
1121 }
|
This packet will be sent to the cache through the cache port
connected to the LSQ.
It first checks whether the cache is currently blocked.
If it is not blocked and there are available read port for the cache,
then it sends the request packet through the dcachePort.
It can initiate memory access by sending request packet
through a sendTimingReq method.
Because CPU goes through the data cache
before touching the physical memory,
the sendTimingReq is invoked on the DcachePort.
gem5/src/mem/port.hh
1
2
3
4
5
| 444 inline bool
445 MasterPort::sendTimingReq(PacketPtr pkt)
446 {
447 return TimingRequestProtocol::sendReq(_slavePort, pkt);
448 }
|
mem/protocol/timing.cc
1
2
3
4
5
6
7
8
| 47 /* The request protocol. */
48
49 bool
50 TimingRequestProtocol::sendReq(TimingResponseProtocol *peer, PacketPtr pkt)
51 {
52 assert(pkt->isRequest());
53 return peer->recvTimingReq(pkt);
54 }
|
The sendTimingReq function is very simple.
Just invoke the recvTimingReq function of the peer connected to the dcachePort
as a slave.
Because the cache unit is connected to the dcachePort on the other side of the CPU,
we will take a look at the recvTimingReq implementation of the cache unit.
Cache, Cache, Cache!
recvTimingReq of the BaseCache: how to process the cache access?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
| 2448 bool
2449 BaseCache::CpuSidePort::recvTimingReq(PacketPtr pkt)
2450 {
2451 assert(pkt->isRequest());
2452
2453 if (cache->system->bypassCaches()) {
2454 // Just forward the packet if caches are disabled.
2455 // @todo This should really enqueue the packet rather
2456 GEM5_VAR_USED bool success = cache->memSidePort.sendTimingReq(pkt);
2457 assert(success);
2458 return true;
2459 } else if (tryTiming(pkt)) {
2460 cache->recvTimingReq(pkt);
2461 return true;
2462 }
2463 return false;
2464 }
|
First of all, the cache port connected to the CPU side
will be in charge of handling timing request generated from CPU side.
Because the BaseCache contains dedicated port for communicating with the CPU side,
called CpuSidePort, its recvTimingReq function will be invoked.
However, the main cache operations are done by the BaseCache’s recvTimingReq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
| 349 void
350 BaseCache::recvTimingReq(PacketPtr pkt)
351 {
352 // anything that is merely forwarded pays for the forward latency and
353 // the delay provided by the crossbar
354 Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
355
356 Cycles lat;
357 CacheBlk *blk = nullptr;
358 bool satisfied = false;
359 {
360 PacketList writebacks;
361 // Note that lat is passed by reference here. The function
362 // access() will set the lat value.
363 satisfied = access(pkt, blk, lat, writebacks);
364
365 // After the evicted blocks are selected, they must be forwarded
366 // to the write buffer to ensure they logically precede anything
367 // happening below
368 doWritebacks(writebacks, clockEdge(lat + forwardLatency));
369 }
370
|
Because the recvTimingReq is pretty complex and long,
I will explain important parts one by one.
First of all, it invokes the access function
to access the cache entry if the data mapped to the
request address exists in the cache.
After that, it invokes doWritebacks function to
write backs evicted entries if exist.
Btw, why the access generates victim entry and write back is required?
I will show you the answer soon.
access function, another long journey in the midst of recvTimingReq
Unfortunately, the access function is more complex function
than the recvTimingReq cause it emulates
actual cache accesses in the GEM5 cache.
Let’s take a look at its implementation one by one.
access1: check if the cache block exist in current cache
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| 1152 bool
1153 BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
1154 PacketList &writebacks)
1155 {
1156 // sanity check
1157 assert(pkt->isRequest());
1158
1159 chatty_assert(!(isReadOnly && pkt->isWrite()),
1160 "Should never see a write in a read-only cache %s\n",
1161 name());
1162
1163 // Access block in the tags
1164 Cycles tag_latency(0);
1165 blk = tags->accessBlock(pkt, tag_latency);
1166
1167 DPRINTF(Cache, "%s for %s %s\n", __func__, pkt->print(),
1168 blk ? "hit " + blk->print() : "miss");
1169
|
The first job done by the access function is retrieving the CacheBlk
associated with current request address.
Because the tags member field manages all CacheBlk of the cache,
it invokes the accessBlock function of the tags.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
| 117 /**
118 * Access block and update replacement data. May not succeed, in which case
119 * nullptr is returned. This has all the implications of a cache access and
120 * should only be used as such. Returns the tag lookup latency as a side
121 * effect.
122 *
123 * @param pkt The packet holding the address to find.
124 * @param lat The latency of the tag lookup.
125 * @return Pointer to the cache block if found.
126 */
127 CacheBlk* accessBlock(const PacketPtr pkt, Cycles &lat) override
128 {
129 CacheBlk *blk = findBlock(pkt->getAddr(), pkt->isSecure());
130
131 // Access all tags in parallel, hence one in each way. The data side
132 // either accesses all blocks in parallel, or one block sequentially on
133 // a hit. Sequential access with a miss doesn't access data.
134 stats.tagAccesses += allocAssoc;
135 if (sequentialAccess) {
136 if (blk != nullptr) {
137 stats.dataAccesses += 1;
138 }
139 } else {
140 stats.dataAccesses += allocAssoc;
141 }
142
143 // If a cache hit
144 if (blk != nullptr) {
145 // Update number of references to accessed block
146 blk->increaseRefCount();
147
148 // Update replacement data of accessed block
149 replacementPolicy->touch(blk->replacementData, pkt);
150 }
151
152 // The tag lookup latency is the same for a hit or a miss
153 lat = lookupLatency;
154
155 return blk;
156 }
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
| 79 CacheBlk*
80 BaseTags::findBlock(Addr addr, bool is_secure) const
81 {
82 // Extract block tag
83 Addr tag = extractTag(addr);
84
85 // Find possible entries that may contain the given address
86 const std::vector<ReplaceableEntry*> entries =
87 indexingPolicy->getPossibleEntries(addr);
88
89 // Search for block
90 for (const auto& location : entries) {
91 CacheBlk* blk = static_cast<CacheBlk*>(location);
92 if (blk->matchTag(tag, is_secure)) {
93 return blk;
94 }
95 }
96
97 // Did not find block
98 return nullptr;
99 }
|
Because the CacheBlk is associated with one address
based on the Tag value, by checking the tag value
of way entries in one set mapped to current request’s address,
it can find whether the cache already contains the cache block
mapped to current request address.
Also, note that it can return nullptr
when there is no cache hit.
Therefore, by checking the returned CacheBlk
as a result of the findBlock function,
it can distinguish cache hit and miss.
When the cache hit happens,
it invokes touch function of the replacementPolicy
to update the replacement policy
associated with current CacheBlk.
access2: handling cache maintenance packet
Let’s go back to the access function.
After the accessBlock function returns, it checks
types of the packet.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
| 1170 if (pkt->req->isCacheMaintenance()) {
1171 // A cache maintenance operation is always forwarded to the
1172 // memory below even if the block is found in dirty state.
1173
1174 // We defer any changes to the state of the block until we
1175 // create and mark as in service the mshr for the downstream
1176 // packet.
1177
1178 // Calculate access latency on top of when the packet arrives. This
1179 // takes into account the bus delay.
1180 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1181
1182 return false;
1183 }
```cpp
1001 /**
1002 * Accessor functions to determine whether this request is part of
1003 * a cache maintenance operation. At the moment three operations
1004 * are supported:
1005
1006 * 1) A cache clean operation updates all copies of a memory
1007 * location to the point of reference,
1008 * 2) A cache invalidate operation invalidates all copies of the
1009 * specified block in the memory above the point of reference,
1010 * 3) A clean and invalidate operation is a combination of the two
1011 * operations.
1012 * @{ */
1013 bool isCacheClean() const { return _flags.isSet(CLEAN); }
1014 bool isCacheInvalidate() const { return _flags.isSet(INVALIDATE); }
1015 bool isCacheMaintenance() const { return _flags.isSet(CLEAN|INVALIDATE); }
1016 /** @} */
|
Currently, GEM5 provide three different requests for cache maintenance:
cache clean, cache invalidate, and clean and invalidate.
Here is a good definition about invalidate and clean event in general.
Invalidate simply marks a cache line as “invalid”, meaning you won’t hit upon.
Clean causes the contents of the cache line to be written back to memory (or the next level of cache),
but only if the cache line is “dirty”.
That is, the cache line holds the latest copy of that memory.
Clean & Invalidate, as the name suggests, does both.
Dirty lines normally get back to memory through evictions.
When the line is selected to be evicted,
there is a check to see if it’s dirty.
If yes, it gets written back to memory.
Cleaning is way to force this to happen at a particular time.
For example, because something else is going to read the buffer.
In theory, if you invalidated a dirty line you could loose data.
As an invalid line won’t get written back to memory automatically through eviction.
In practice many cores will treat Invalidate as Clean&Invalidate -
but you shouldn’t rely on that.
If the line is potentially dirty, and you care about the data,
you should use Clean&Invalidate rather than Invalidate.
Because the cache maintenance request is related with cache flushing
and coherency, it should be specially handled by the cache unit.
When the packet is sent to the cache for its maintenance
it returns immediately from the access function and set the
satisfied variable as false, which indicates the miss event happens.
access3: handling eviction request packet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
| 1185 if (pkt->isEviction()) {
1186 // We check for presence of block in above caches before issuing
1187 // Writeback or CleanEvict to write buffer. Therefore the only
1188 // possible cases can be of a CleanEvict packet coming from above
1189 // encountering a Writeback generated in this cache peer cache and
1190 // waiting in the write buffer. Cases of upper level peer caches
1191 // generating CleanEvict and Writeback or simply CleanEvict and
1192 // CleanEvict almost simultaneously will be caught by snoops sent out
1193 // by crossbar.
1194 WriteQueueEntry *wb_entry = writeBuffer.findMatch(pkt->getAddr(),
1195 pkt->isSecure());
1196 if (wb_entry) {
1197 assert(wb_entry->getNumTargets() == 1);
1198 PacketPtr wbPkt = wb_entry->getTarget()->pkt;
1199 assert(wbPkt->isWriteback());
1200
1201 if (pkt->isCleanEviction()) {
1202 // The CleanEvict and WritebackClean snoops into other
1203 // peer caches of the same level while traversing the
1204 // crossbar. If a copy of the block is found, the
1205 // packet is deleted in the crossbar. Hence, none of
1206 // the other upper level caches connected to this
1207 // cache have the block, so we can clear the
1208 // BLOCK_CACHED flag in the Writeback if set and
1209 // discard the CleanEvict by returning true.
1210 wbPkt->clearBlockCached();
1211
1212 // A clean evict does not need to access the data array
1213 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1214
1215 return true;
1216 } else {
1217 assert(pkt->cmd == MemCmd::WritebackDirty);
1218 // Dirty writeback from above trumps our clean
1219 // writeback... discard here
1220 // Note: markInService will remove entry from writeback buffer.
1221 markInService(wb_entry);
1222 delete wbPkt;
1223 }
1224 }
1225 }
|
1
2
3
4
5
6
7
8
9
| 91 { {IsWrite, IsRequest, IsEviction, HasData, FromCache},
92 InvalidCmd, "WritebackDirty" },
93 /* WritebackClean - This allows the upstream cache to writeback a
94 * line to the downstream cache without it being considered
95 * dirty. */
96 { {IsWrite, IsRequest, IsEviction, HasData, FromCache},
97 InvalidCmd, "WritebackClean" },
101 /* CleanEvict */
102 { {IsRequest, IsEviction, FromCache}, InvalidCmd, "CleanEvict" },
|
access4: handle writeback packets
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
| 1227 // The critical latency part of a write depends only on the tag access
1228 if (pkt->isWrite()) {
1229 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1230 }
1231
1232 // Writeback handling is special case. We can write the block into
1233 // the cache without having a writeable copy (or any copy at all).
1234 if (pkt->isWriteback()) {
1235 assert(blkSize == pkt->getSize());
1236
1237 // we could get a clean writeback while we are having
1238 // outstanding accesses to a block, do the simple thing for
1239 // now and drop the clean writeback so that we do not upset
1240 // any ordering/decisions about ownership already taken
1241 if (pkt->cmd == MemCmd::WritebackClean &&
1242 mshrQueue.findMatch(pkt->getAddr(), pkt->isSecure())) {
1243 DPRINTF(Cache, "Clean writeback %#llx to block with MSHR, "
1244 "dropping\n", pkt->getAddr());
1245
1246 // A writeback searches for the block, then writes the data.
1247 // As the writeback is being dropped, the data is not touched,
1248 // and we just had to wait for the time to find a match in the
1249 // MSHR. As of now assume a mshr queue search takes as long as
1250 // a tag lookup for simplicity.
1251 return true;
1252 }
1253
1254 const bool has_old_data = blk && blk->isValid();
1255 if (!blk) {
1256 // need to do a replacement
1257 blk = allocateBlock(pkt, writebacks);
1258 if (!blk) {
1259 // no replaceable block available: give up, fwd to next level.
1260 incMissCount(pkt);
1261 return false;
1262 }
1263
1264 blk->setCoherenceBits(CacheBlk::ReadableBit);
1265 } else if (compressor) {
1266 // This is an overwrite to an existing block, therefore we need
1267 // to check for data expansion (i.e., block was compressed with
1268 // a smaller size, and now it doesn't fit the entry anymore).
1269 // If that is the case we might need to evict blocks.
1270 if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(),
1271 writebacks)) {
1272 invalidateBlock(blk);
1273 return false;
1274 }
1275 }
1276
1277 // only mark the block dirty if we got a writeback command,
1278 // and leave it as is for a clean writeback
1279 if (pkt->cmd == MemCmd::WritebackDirty) {
1280 // TODO: the coherent cache can assert that the dirty bit is set
1281 blk->setCoherenceBits(CacheBlk::DirtyBit);
1282 }
1283 // if the packet does not have sharers, it is passing
1284 // writable, and we got the writeback in Modified or Exclusive
1285 // state, if not we are in the Owned or Shared state
1286 if (!pkt->hasSharers()) {
1287 blk->setCoherenceBits(CacheBlk::WritableBit);
1288 }
1289 // nothing else to do; writeback doesn't expect response
1290 assert(!pkt->needsResponse());
1291
1292 updateBlockData(blk, pkt, has_old_data);
1293 DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
1294 incHitCount(pkt);
1295
1296 // When the packet metadata arrives, the tag lookup will be done while
1297 // the payload is arriving. Then the block will be ready to access as
1298 // soon as the fill is done
1299 blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
1300 std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay));
1301
1302 return true;
1303 } else if (pkt->cmd == MemCmd::CleanEvict) {
|
The GEM5 defines the condition for writeback as below.
1
2
3
4
5
| 229 /**
230 * A writeback is an eviction that carries data.
231 */
232 bool isWriteback() const { return testCmdAttrib(IsEviction) &&
233 testCmdAttrib(HasData); }
|
When the request packet sets IsEviction and HasData,
it means that current request packet invoked the access function
was the writeback request packet.
Below code specified the commands that satisfy above condition.
1
2
3
4
5
6
7
| 91 { {IsWrite, IsRequest, IsEviction, HasData, FromCache},
92 InvalidCmd, "WritebackDirty" },
93 /* WritebackClean - This allows the upstream cache to writeback a
94 * line to the downstream cache without it being considered
95 * dirty. */
96 { {IsWrite, IsRequest, IsEviction, HasData, FromCache},
97 InvalidCmd, "WritebackClean" },
|
When those conditions are met, the access function handles writeback packet.
\XXX{need more explain for this case}
access5: handle CleanEvict and writeClean packets
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
| 1303 } else if (pkt->cmd == MemCmd::CleanEvict) {
1304 // A CleanEvict does not need to access the data array
1305 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1306
1307 if (blk) {
1308 // Found the block in the tags, need to stop CleanEvict from
1309 // propagating further down the hierarchy. Returning true will
1310 // treat the CleanEvict like a satisfied write request and delete
1311 // it.
1312 return true;
1313 }
1314 // We didn't find the block here, propagate the CleanEvict further
1315 // down the memory hierarchy. Returning false will treat the CleanEvict
1316 // like a Writeback which could not find a replaceable block so has to
1317 // go to next level.
1318 return false;
1319 } else if (pkt->cmd == MemCmd::WriteClean) {
1320 // WriteClean handling is a special case. We can allocate a
1321 // block directly if it doesn't exist and we can update the
1322 // block immediately. The WriteClean transfers the ownership
1323 // of the block as well.
1324 assert(blkSize == pkt->getSize());
1325
1326 const bool has_old_data = blk && blk->isValid();
1327 if (!blk) {
1328 if (pkt->writeThrough()) {
1329 // if this is a write through packet, we don't try to
1330 // allocate if the block is not present
1331 return false;
1332 } else {
1333 // a writeback that misses needs to allocate a new block
1334 blk = allocateBlock(pkt, writebacks);
1335 if (!blk) {
1336 // no replaceable block available: give up, fwd to
1337 // next level.
1338 incMissCount(pkt);
1339 return false;
1340 }
1341
1342 blk->setCoherenceBits(CacheBlk::ReadableBit);
1343 }
1344 } else if (compressor) {
1345 // This is an overwrite to an existing block, therefore we need
1346 // to check for data expansion (i.e., block was compressed with
1347 // a smaller size, and now it doesn't fit the entry anymore).
1348 // If that is the case we might need to evict blocks.
1349 if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(),
1350 writebacks)) {
1351 invalidateBlock(blk);
1352 return false;
1353 }
1354 }
1355
1356 // at this point either this is a writeback or a write-through
1357 // write clean operation and the block is already in this
1358 // cache, we need to update the data and the block flags
1359 assert(blk);
1360 // TODO: the coherent cache can assert that the dirty bit is set
1361 if (!pkt->writeThrough()) {
1362 blk->setCoherenceBits(CacheBlk::DirtyBit);
1363 }
1364 // nothing else to do; writeback doesn't expect response
1365 assert(!pkt->needsResponse());
1366
1367 updateBlockData(blk, pkt, has_old_data);
1368 DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
1369
1370 incHitCount(pkt);
1371
1372 // When the packet metadata arrives, the tag lookup will be done while
1373 // the payload is arriving. Then the block will be ready to access as
1374 // soon as the fill is done
1375 blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
1376 std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay));
1377
1378 // If this a write-through packet it will be sent to cache below
1379 return !pkt->writeThrough();
|
access6: handle normal read or write request to existing block with adequate properties
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
| 1380 } else if (blk && (pkt->needsWritable() ?
1381 blk->isSet(CacheBlk::WritableBit) :
1382 blk->isSet(CacheBlk::ReadableBit))) {
1383 // OK to satisfy access
1384 incHitCount(pkt);
1385
1386 // Calculate access latency based on the need to access the data array
1387 if (pkt->isRead()) {
1388 lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1389
1390 // When a block is compressed, it must first be decompressed
1391 // before being read. This adds to the access latency.
1392 if (compressor) {
1393 lat += compressor->getDecompressionLatency(blk);
1394 }
1395 } else {
1396 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1397 }
1398
1399 satisfyRequest(pkt, blk);
1400 maintainClusivity(pkt->fromCache(), blk);
1401
1402 return true;
1403 }
1404
|
To handle the read and write access to existing cache block,
it first should check the properties of the
existing cache block such as writable and readable bit.
If those conditions of the cached block met
requirement of the current request’s type such as read or write,
then it can be handled in the above condition statement.
Note that it returns true at the end because
the request can be handled with the cached block, which means cache hit.
Also, it invokes the satisfyRequest function to \XXX{do what?}
The satisfyRequest function is the virtual function of the BaseCache
and implemented also by its children class Cache class.
There are two main places that satisfyRequest is invoked,
the access function and serviceMSHRTarget.
access7: other cases, mainly cache misses
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
| 1405 // Can't satisfy access normally... either no block (blk == nullptr)
1406 // or have block but need writable
1407
1408 incMissCount(pkt);
1409
1410 lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1411
1412 if (!blk && pkt->isLLSC() && pkt->isWrite()) {
1413 // complete miss on store conditional... just give up now
1414 pkt->req->setExtraData(0);
1415 return true;
1416 }
1417
1418 return false;
1419 }
|
This cases includes cache misses, write request to non-writable block,
or read request to non-readable block, etc.
When all conditions doesn’t match with the current’ request,
then it should be handled by the
rest of the recvTimingReq function, particularly cache miss handling.
Note that it returns false.
Revisiting revTimingReq of the BaseCache to handle cache hit and miss
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
| 349 void
350 BaseCache::recvTimingReq(PacketPtr pkt)
......
371 // Here we charge the headerDelay that takes into account the latencies
372 // of the bus, if the packet comes from it.
373 // The latency charged is just the value set by the access() function.
374 // In case of a hit we are neglecting response latency.
375 // In case of a miss we are neglecting forward latency.
376 Tick request_time = clockEdge(lat);
377 // Here we reset the timing of the packet.
378 pkt->headerDelay = pkt->payloadDelay = 0;
379
380 if (satisfied) {
381 // notify before anything else as later handleTimingReqHit might turn
382 // the packet in a response
383 ppHit->notify(pkt);
384
385 if (prefetcher && blk && blk->wasPrefetched()) {
386 DPRINTF(Cache, "Hit on prefetch for addr %#x (%s)\n",
387 pkt->getAddr(), pkt->isSecure() ? "s" : "ns");
388 blk->clearPrefetched();
389 }
390
391 handleTimingReqHit(pkt, blk, request_time);
392 } else {
393 handleTimingReqMiss(pkt, blk, forward_time, request_time);
394
395 ppMiss->notify(pkt);
396 }
397
398 if (prefetcher) {
399 // track time of availability of next prefetch, if any
400 Tick next_pf_time = prefetcher->nextPrefetchReadyTime();
401 if (next_pf_time != MaxTick) {
402 schedMemSideSendEvent(next_pf_time);
403 }
404 }
405 }
|
After executing the access function that asks caches
if the requested data exists in the cache,
it returns value to indicate whether there was an item in the cache or not.
The satisfied variable contains the return value of the access.
Therefore, based on the satisfied condition,
it should handle cache hit and miss event differently.
When the cache hit happens
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
| 223 void
224 BaseCache::handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time)
225 {
226 if (pkt->needsResponse()) {
227 // These delays should have been consumed by now
228 assert(pkt->headerDelay == 0);
229 assert(pkt->payloadDelay == 0);
230
231 pkt->makeTimingResponse();
232
233 // In this case we are considering request_time that takes
234 // into account the delay of the xbar, if any, and just
235 // lat, neglecting responseLatency, modelling hit latency
236 // just as the value of lat overriden by access(), which calls
237 // the calculateAccessLatency() function.
238 cpuSidePort.schedTimingResp(pkt, request_time);
239 } else {
240 DPRINTF(Cache, "%s satisfied %s, no response needed\n", __func__,
241 pkt->print());
242
243 // queue the packet for deletion, as the sending cache is
244 // still relying on it; if the block is found in access(),
245 // CleanEvict and Writeback messages will be deleted
246 // here as well
247 pendingDelete.reset(pkt);
248 }
249 }
|
Based on the request type of the memory operation, it may or may not require response.
Therefore, it first checks whether the packet requires response
with the needsResponse method.
When it requires response, it invokes schedTimingResp of the cpuSidePort.
1
2
| 93 void schedTimingResp(PacketPtr pkt, Tick when)
94 { respQueue.schedSendTiming(pkt, when); }
|
The schedTimingResp function is defined in the QueuedResponsePort class
which is one of the ancestor class of the CpuSidePort class.
Also, schedSendTiming is defined as the member function of the RespPacketQueue
which is the type of the respQueue.
The PacketQueue class defines the schedSendTiming method, and
the RespPacketQueue inherits PacketQueue.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
| 106 void
107 PacketQueue::schedSendTiming(PacketPtr pkt, Tick when)
108 {
109 DPRINTF(PacketQueue, "%s for %s address %x size %d when %lu ord: %i\n",
110 __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize(), when,
111 forceOrder);
112
113 // we can still send a packet before the end of this tick
114 assert(when >= curTick());
115
116 // express snoops should never be queued
117 assert(!pkt->isExpressSnoop());
118
119 // add a very basic sanity check on the port to ensure the
120 // invisible buffer is not growing beyond reasonable limits
121 if (!_disableSanityCheck && transmitList.size() > 128) {
122 panic("Packet queue %s has grown beyond 128 packets\n",
123 name());
124 }
125
126 // we should either have an outstanding retry, or a send event
127 // scheduled, but there is an unfortunate corner case where the
128 // x86 page-table walker and timing CPU send out a new request as
129 // part of the receiving of a response (called by
130 // PacketQueue::sendDeferredPacket), in which we end up calling
131 // ourselves again before we had a chance to update waitingOnRetry
132 // assert(waitingOnRetry || sendEvent.scheduled());
133
134 // this belongs in the middle somewhere, so search from the end to
135 // order by tick; however, if forceOrder is set, also make sure
136 // not to re-order in front of some existing packet with the same
137 // address
138 auto it = transmitList.end();
139 while (it != transmitList.begin()) {
140 --it;
141 if ((forceOrder && it->pkt->matchAddr(pkt)) || it->tick <= when) {
142 // emplace inserts the element before the position pointed to by
143 // the iterator, so advance it one step
144 transmitList.emplace(++it, when, pkt);
145 return;
146 }
147 }
148 // either the packet list is empty or this has to be inserted
149 // before every other packet
150 transmitList.emplace_front(when, pkt);
151 schedSendEvent(when);
152 }
|
transmitList maintains all the packets need to be sent to other end of the port
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| 68 /** A deferred packet, buffered to transmit later. */
69 class DeferredPacket
70 {
71 public:
72 Tick tick; ///< The tick when the packet is ready to transmit
73 PacketPtr pkt; ///< Pointer to the packet to transmit
74 DeferredPacket(Tick t, PacketPtr p)
75 : tick(t), pkt(p)
76 {}
77 };
78
79 typedef std::list<DeferredPacket> DeferredPacketList;
80
81 /** A list of outgoing packets. */
82 DeferredPacketList transmitList;
83
|
tranmitList contains all the deferrredPackets that are waiting to be sent.
Therefore, it contains the packet itself and when should it be sent.
Note that when which is the Tick is required because GEM5 is emulator not the hardware.
Anyway the maintained packets will be sent
when the schedSendEvent fires.
Note that it is scheduled to be fired at when clock cycle
through schedSendEvent function.
schedSendEvent function schedules event to handle the deferred packet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
| 154 void
155 PacketQueue::schedSendEvent(Tick when)
156 {
157 // if we are waiting on a retry just hold off
158 if (waitingOnRetry) {
159 DPRINTF(PacketQueue, "Not scheduling send as waiting for retry\n");
160 assert(!sendEvent.scheduled());
161 return;
162 }
163
164 if (when != MaxTick) {
165 // we cannot go back in time, and to be consistent we stick to
166 // one tick in the future
167 when = std::max(when, curTick() + 1);
168 // @todo Revisit the +1
169
170 if (!sendEvent.scheduled()) {
171 em.schedule(&sendEvent, when);
172 } else if (when < sendEvent.when()) {
173 // if the new time is earlier than when the event
174 // currently is scheduled, move it forward
175 em.reschedule(&sendEvent, when);
176 }
177 } else {
178 // we get a MaxTick when there is no more to send, so if we're
179 // draining, we may be done at this point
180 if (drainState() == DrainState::Draining &&
181 transmitList.empty() && !sendEvent.scheduled()) {
182
183 DPRINTF(Drain, "PacketQueue done draining,"
184 "processing drain event\n");
185 signalDrainDone();
186 }
187 }
188 }
|
The most important things done by the schedSendEvent is the scheduling event
to make it fire at the exact time specified by the GEM5 emulator.
As shown in Line 170-176,
it first checks whether the sendEvent is already scheduled before.
If there is no scheduled event, then it schedule the event with schedule function.
Note that the em member field points to the BaseCache.
Also, if there is already pre-scheduled event for the sendEvent and
if the current event should be raised before the pre-scheduled one,
then it reschedule the event.
BTW, if there were events that should be handled later then newly scheduled event,
how those events can be processed!?
To understand the how the deferred packet will be processed
and resolve question, let’s take a look at the function invoked
when the scheduled event raises.
processSendEvent: event to handle deferred packet processing
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
| 50 PacketQueue::PacketQueue(EventManager& _em, const std::string& _label,
51 const std::string& _sendEventName,
52 bool force_order,
53 bool disable_sanity_check)
54 : em(_em), sendEvent([this]{ processSendEvent(); }, _sendEventName),
55 _disableSanityCheck(disable_sanity_check),
56 forceOrder(force_order),
57 label(_label), waitingOnRetry(false)
58 {
59 }
......
220 void
221 PacketQueue::processSendEvent()
222 {
223 assert(!waitingOnRetry);
224 sendDeferredPacket();
225 }
|
I can easily find that the sendEvent is initialized with processSendEvent
in the constructor of the PacketQueue.
Therefore, when the sendEvent fires, it invokes the processSendEvent function.
Note that it further invokes sendDeferredPacket function of the PacketQueue.
sendDeferredPacket handles deferred packet processing at right time
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
| 190 void
191 PacketQueue::sendDeferredPacket()
192 {
193 // sanity checks
194 assert(!waitingOnRetry);
195 assert(deferredPacketReady());
196
197 DeferredPacket dp = transmitList.front();
198
199 // take the packet of the list before sending it, as sending of
200 // the packet in some cases causes a new packet to be enqueued
201 // (most notaly when responding to the timing CPU, leading to a
202 // new request hitting in the L1 icache, leading to a new
203 // response)
204 transmitList.pop_front();
205
206 // use the appropriate implementation of sendTiming based on the
207 // type of queue
208 waitingOnRetry = !sendTiming(dp.pkt);
209
210 // if we succeeded and are not waiting for a retry, schedule the
211 // next send
212 if (!waitingOnRetry) {
213 schedSendEvent(deferredPacketReadyTime());
214 } else {
215 // put the packet back at the front of the list
216 transmitList.emplace_front(dp);
217 }
218 }
|
You might remember that the transmitList contains all the packet and when should it be fired.
And because the sendDeferredPacket is the function that process the packet in the transmitList
at the right time specified.
Therefore, the sendDeferredPacket extracts the packet from the transmitList (line 197-204).
After getting the packet to send, it invokes sendTiming function to actually send the
packet to the unit that waits for the response.
However, you can find that sendTiming function is not implemented on the PacketQueue,
and implemented as a virtual function, which means
it should invoke its child’s sendTiming.
Remind that the schedTimingResp of the cpuSidePort makes us to all the way down to here.
Also the respQueue used to schedule sendTiming event was the RespPacketQueue object.
And the RespPacketQueue inherits PacketQueue, which means it has the sendTiming function.
1
2
3
4
5
| 275 bool
276 RespPacketQueue::sendTiming(PacketPtr pkt)
277 {
278 return cpuSidePort.sendTimingResp(pkt);
279 }
|
Finally it invokes sendTimingResp function of the cpuSidePort to send packet to the CPU.
Yeah… It is kind of a long detour to get to the sendTimingResp.
The important reason of this complicated process for handling packets is because
it wants to decouple the CpuSidePort from the managing response packets.
After the cache generates the response packet,
instead of directly invoking the sendTimingResp function of the cpuSidePort
it let the PacketQueue handles all relevant operations to manage response packets.
Anyway, after sendTimingResp is invoked,
it returns the waitingOnRetry which indicates whether
the CPU is currently not available for receiving the response packet from the cache.
In that case, the waitingOnRetry field is set and should send the packet once again
when the CPU send the retry message to the cache at some point.
1
2
3
4
5
| 169 /**
170 * Get the next packet ready time.
171 */
172 Tick deferredPacketReadyTime() const
173 { return transmitList.empty() ? MaxTick : transmitList.front().tick; }
|
Now this is the time for answering previous question: after one packet is processed,
if there are remaining packets need to be sent at some later point, what should we do?
Yeah the deferredPacketReadyTime checks the transmitList and returns the tick
if deferred packet still remains.
This tick is passed to the schedSendEvent function, and
will schedule the sendEvent.
That’s it!
waitingOnRetry
\TODO{need to explain some particular details regarding waitingOnRetry}
When the cache miss happens
When the access function cannot return cache block associated with
current request, the satisfied condition is set as false.
Therefore, the handleTimingReqMiss function is executed to fetch
cache block from the upper level cache or memory.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
| 323 void
324 Cache::handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk, Tick forward_time,
325 Tick request_time)
326 {
327 if (pkt->req->isUncacheable()) {
328 // ignore any existing MSHR if we are dealing with an
329 // uncacheable request
330
331 // should have flushed and have no valid block
332 assert(!blk || !blk->isValid());
333
334 stats.cmdStats(pkt).mshrUncacheable[pkt->req->requestorId()]++;
335
336 if (pkt->isWrite()) {
337 allocateWriteBuffer(pkt, forward_time);
338 } else {
339 assert(pkt->isRead());
340
341 // uncacheable accesses always allocate a new MSHR
342
343 // Here we are using forward_time, modelling the latency of
344 // a miss (outbound) just as forwardLatency, neglecting the
345 // lookupLatency component.
346 allocateMissBuffer(pkt, forward_time);
347 }
348
349 return;
350 }
351
352 Addr blk_addr = pkt->getBlockAddr(blkSize);
353
354 MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure());
355
356 // Software prefetch handling:
357 // To keep the core from waiting on data it won't look at
358 // anyway, send back a response with dummy data. Miss handling
359 // will continue asynchronously. Unfortunately, the core will
360 // insist upon freeing original Packet/Request, so we have to
361 // create a new pair with a different lifecycle. Note that this
362 // processing happens before any MSHR munging on the behalf of
363 // this request because this new Request will be the one stored
364 // into the MSHRs, not the original.
365 if (pkt->cmd.isSWPrefetch()) {
366 assert(pkt->needsResponse());
367 assert(pkt->req->hasPaddr());
368 assert(!pkt->req->isUncacheable());
369
370 // There's no reason to add a prefetch as an additional target
371 // to an existing MSHR. If an outstanding request is already
372 // in progress, there is nothing for the prefetch to do.
373 // If this is the case, we don't even create a request at all.
374 PacketPtr pf = nullptr;
375
376 if (!mshr) {
377 // copy the request and create a new SoftPFReq packet
378 RequestPtr req = std::make_shared<Request>(pkt->req->getPaddr(),
379 pkt->req->getSize(),
380 pkt->req->getFlags(),
381 pkt->req->requestorId());
382 pf = new Packet(req, pkt->cmd);
383 pf->allocate();
384 assert(pf->matchAddr(pkt));
385 assert(pf->getSize() == pkt->getSize());
386 }
387
388 pkt->makeTimingResponse();
389
390 // request_time is used here, taking into account lat and the delay
391 // charged if the packet comes from the xbar.
392 cpuSidePort.schedTimingResp(pkt, request_time);
393
394 // If an outstanding request is in progress (we found an
395 // MSHR) this is set to null
396 pkt = pf;
397 }
398
399 BaseCache::handleTimingReqMiss(pkt, mshr, blk, forward_time, request_time);
400 }
|
When cache miss happens, the first thing to do is
searching the MSHR entry.
The findMatch function of the mshrQueue containing
all the previous MSHR entries
will be invoked to search
if there is MSHR entry associated with the current request.
Whether it has matching MSHR entry or not,
it invokes the handleTimingReqMiss of the BaseCache
to further handles the cache miss.
Briefly speaking,
this function handles cache miss
based on whether the MSHR entry exists or not.
Because this function is quite long, I will split it in two parts:
when MSHR exists and when MSHR doesn’t existing.
When MSHR does exist
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
| 251 void
252 BaseCache::handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk,
253 Tick forward_time, Tick request_time)
254 {
255 if (writeAllocator &&
256 pkt && pkt->isWrite() && !pkt->req->isUncacheable()) {
257 writeAllocator->updateMode(pkt->getAddr(), pkt->getSize(),
258 pkt->getBlockAddr(blkSize));
259 }
260
261 if (mshr) {
262 /// MSHR hit
263 /// @note writebacks will be checked in getNextMSHR()
264 /// for any conflicting requests to the same block
265
266 //@todo remove hw_pf here
267
268 // Coalesce unless it was a software prefetch (see above).
269 if (pkt) {
270 assert(!pkt->isWriteback());
271 // CleanEvicts corresponding to blocks which have
272 // outstanding requests in MSHRs are simply sunk here
273 if (pkt->cmd == MemCmd::CleanEvict) {
274 pendingDelete.reset(pkt);
275 } else if (pkt->cmd == MemCmd::WriteClean) {
276 // A WriteClean should never coalesce with any
277 // outstanding cache maintenance requests.
278
279 // We use forward_time here because there is an
280 // uncached memory write, forwarded to WriteBuffer.
281 allocateWriteBuffer(pkt, forward_time);
282 } else {
283 DPRINTF(Cache, "%s coalescing MSHR for %s\n", __func__,
284 pkt->print());
285
286 assert(pkt->req->requestorId() < system->maxRequestors());
287 stats.cmdStats(pkt).mshrHits[pkt->req->requestorId()]++;
288
289 // We use forward_time here because it is the same
290 // considering new targets. We have multiple
291 // requests for the same address here. It
292 // specifies the latency to allocate an internal
293 // buffer and to schedule an event to the queued
294 // port and also takes into account the additional
295 // delay of the xbar.
296 mshr->allocateTarget(pkt, forward_time, order++,
297 allocOnFill(pkt->cmd));
298 if (mshr->getNumTargets() == numTarget) {
299 noTargetMSHR = mshr;
300 setBlocked(Blocked_NoTargets);
301 // need to be careful with this... if this mshr isn't
302 // ready yet (i.e. time > curTick()), we don't want to
303 // move it ahead of mshrs that are ready
304 // mshrQueue.moveToFront(mshr);
305 }
306 }
307 }
|
You have to understand that one MSHR entry can track
multiple memory requests associated with
the address handled by the particular MSHR entry.
Therefore, the first job needs to be done is
registering the missed request
to the MSHR entry as its target.
Based on the type of the memory request,
it might not add the missed request as the targets of the MSHR entry.
However, in most of the cases, when the L1 cache miss happens,
it will be added to the found MSHR entry by invoking
allocateTarget function of the MSHR entry.
allocateTarget associates the missed requests to the found MSHR entry
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
| 372 /*
373 * Adds a target to an MSHR
374 */
375 void
376 MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order,
377 bool alloc_on_fill)
378 {
379 // assume we'd never issue a prefetch when we've got an
380 // outstanding miss
381 assert(pkt->cmd != MemCmd::HardPFReq);
382
383 // if there's a request already in service for this MSHR, we will
384 // have to defer the new target until after the response if any of
385 // the following are true:
386 // - there are other targets already deferred
387 // - there's a pending invalidate to be applied after the response
388 // comes back (but before this target is processed)
389 // - the MSHR's first (and only) non-deferred target is a cache
390 // maintenance packet
391 // - the new target is a cache maintenance packet (this is probably
392 // overly conservative but certainly safe)
393 // - this target requires a writable block and either we're not
394 // getting a writable block back or we have already snooped
395 // another read request that will downgrade our writable block
396 // to non-writable (Shared or Owned)
397 PacketPtr tgt_pkt = targets.front().pkt;
398 if (pkt->req->isCacheMaintenance() ||
399 tgt_pkt->req->isCacheMaintenance() ||
400 !deferredTargets.empty() ||
401 (inService &&
402 (hasPostInvalidate() ||
403 (pkt->needsWritable() &&
404 (!isPendingModified() || hasPostDowngrade() || isForward))))) {
405 // need to put on deferred list
406 if (inService && hasPostInvalidate())
407 replaceUpgrade(pkt);
408 deferredTargets.add(pkt, whenReady, _order, Target::FromCPU, true,
409 alloc_on_fill);
410 } else {
411 // No request outstanding, or still OK to append to
412 // outstanding request: append to regular target list. Only
413 // mark pending if current request hasn't been issued yet
414 // (isn't in service).
415 targets.add(pkt, whenReady, _order, Target::FromCPU, !inService,
416 alloc_on_fill);
417 }
418
419 DPRINTF(MSHR, "After target allocation: %s", print());
420 }
|
The basic functionality of the allocateTarget is adding the missed memory request
to one particular MSHR entries’ target list.
Because MSHR collects every memory accesses targeting specific address
and maintains them as its targets,
this function must associates the missed packet to proper MSHR entry.
Also, based on the current condition of the MSHR and pending requests associated with that MSHR entry,
the new packet can be added to either deferredTargets and targets.
Because they are all TargetList objects, let’s take a look at it first.
Target and TargetList
The TargetList is the expanded vector class with Target type.
Because one MSHR should record all the memory request
associated with that entry,
the TargetList vector stores all the missed request and associated information together
represented as a Target type.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
| 129 class Target : public QueueEntry::Target
130 {
131 public:
132
133 enum Source
134 {
135 FromCPU,
136 FromSnoop,
137 FromPrefetcher
138 };
139
140 const Source source; //!< Request from cpu, memory, or prefetcher?
141
142 /**
143 * We use this flag to track whether we have cleared the
144 * downstreamPending flag for the MSHR of the cache above
145 * where this packet originates from and guard noninitial
146 * attempts to clear it.
147 *
148 * The flag markedPending needs to be updated when the
149 * TargetList is in service which can be:
150 * 1) during the Target instantiation if the MSHR is in
151 * service and the target is not deferred,
152 * 2) when the MSHR becomes in service if the target is not
153 * deferred,
154 * 3) or when the TargetList is promoted (deferredTargets ->
155 * targets).
156 */
157 bool markedPending;
158
159 const bool allocOnFill; //!< Should the response servicing this
160 //!< target list allocate in the cache?
161
162 Target(PacketPtr _pkt, Tick _readyTime, Counter _order,
163 Source _source, bool _markedPending, bool alloc_on_fill)
164 : QueueEntry::Target(_pkt, _readyTime, _order), source(_source),
165 markedPending(_markedPending), allocOnFill(alloc_on_fill)
166 {}
167 };
168
169 class TargetList : public std::list<Target>, public Named
170 {
|
When no MSHR is present
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
| 308 } else {
309 // no MSHR
310 assert(pkt->req->requestorId() < system->maxRequestors());
311 stats.cmdStats(pkt).mshrMisses[pkt->req->requestorId()]++;
312 if (prefetcher && pkt->isDemand())
313 prefetcher->incrDemandMhsrMisses();
314
315 if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean) {
316 // We use forward_time here because there is an
317 // writeback or writeclean, forwarded to WriteBuffer.
318 allocateWriteBuffer(pkt, forward_time);
319 } else {
320 if (blk && blk->isValid()) {
321 // If we have a write miss to a valid block, we
322 // need to mark the block non-readable. Otherwise
323 // if we allow reads while there's an outstanding
324 // write miss, the read could return stale data
325 // out of the cache block... a more aggressive
326 // system could detect the overlap (if any) and
327 // forward data out of the MSHRs, but we don't do
328 // that yet. Note that we do need to leave the
329 // block valid so that it stays in the cache, in
330 // case we get an upgrade response (and hence no
331 // new data) when the write miss completes.
332 // As long as CPUs do proper store/load forwarding
333 // internally, and have a sufficiently weak memory
334 // model, this is probably unnecessary, but at some
335 // point it must have seemed like we needed it...
336 assert((pkt->needsWritable() &&
337 !blk->isSet(CacheBlk::WritableBit)) ||
338 pkt->req->isCacheMaintenance());
339 blk->clearCoherenceBits(CacheBlk::ReadableBit);
340 }
341 // Here we are using forward_time, modelling the latency of
342 // a miss (outbound) just as forwardLatency, neglecting the
343 // lookupLatency component.
344 allocateMissBuffer(pkt, forward_time);
345 }
346 }
347 }
|
It first checks whether the current memory request is Eviction request.
Note that cache miss can happen either because of the read and write operation.
When it already has a valid block, but the cache access returns miss,
it means that the block exists but not writable.
In that case, it first set the selected block as non-readable (line 339)
because the data should not be read until
the write miss is resolved through the XBar.
To handle the write miss request, it invokes allocateMissBuffer function.
allocateMissBuffer: allocate MSHR entry for the write miss event
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
| 1164 MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool sched_send = true)
1165 {
1166 MSHR *mshr = mshrQueue.allocate(pkt->getBlockAddr(blkSize), blkSize,
1167 pkt, time, order++,
1168 allocOnFill(pkt->cmd));
1169
1170 if (mshrQueue.isFull()) {
1171 setBlocked((BlockedCause)MSHRQueue_MSHRs);
1172 }
1173
1174 if (sched_send) {
1175 // schedule the send
1176 schedMemSideSendEvent(time);
1177 }
1178
1179 return mshr;
1180 }
|
When there is no MSHR entry associated with current request,
the first priority job is allocating new MSHR entry
for this memory request and further memory requests.
mshrQueue maintains all MSHR entries and provide allocate interface
that adds new MSHR entry to the queue.
After that, because the allocateMissBuffer by default set sched_send parameter,
it invokes schedMemSideSendEvent to let the lower level cache or memory
to fetch data.
Let’s take a look at how the MSHR entry is allocated and
processed by the schedMemSideSendEvent later.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
| 62 MSHR *
63 MSHRQueue::allocate(Addr blk_addr, unsigned blk_size, PacketPtr pkt,
64 Tick when_ready, Counter order, bool alloc_on_fill)
65 {
66 assert(!freeList.empty());
67 MSHR *mshr = freeList.front();
68 assert(mshr->getNumTargets() == 0);
69 freeList.pop_front();
70
71 DPRINTF(MSHR, "Allocating new MSHR. Number in use will be %lu/%lu\n",
72 allocatedList.size() + 1, numEntries);
73
74 mshr->allocate(blk_addr, blk_size, pkt, when_ready, order, alloc_on_fill);
75 mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
76 mshr->readyIter = addToReadyList(mshr);
77
78 allocated += 1;
79 return mshr;
80 }
|
The MSHRQueue manages entire MSHR entries in the system.
Also, the MSHRQueue is the child class of the Queue class.
Therefore, to understand how each MSHR entry is allocated,
we should take a look at the methods and fields
implemented in the Queue class.
Note that the Queue is template class so that it can
manage any type of queue entries.
Each Queue has a list called freeList
which have free queue entries typed passed at template initialization.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
| 302 void
303 MSHR::allocate(Addr blk_addr, unsigned blk_size, PacketPtr target,
304 Tick when_ready, Counter _order, bool alloc_on_fill)
305 {
306 blkAddr = blk_addr;
307 blkSize = blk_size;
308 isSecure = target->isSecure();
309 readyTime = when_ready;
310 order = _order;
311 assert(target);
312 isForward = false;
313 wasWholeLineWrite = false;
314 _isUncacheable = target->req->isUncacheable();
315 inService = false;
316 downstreamPending = false;
317
318 targets.init(blkAddr, blkSize);
319 deferredTargets.init(blkAddr, blkSize);
320
321 // Don't know of a case where we would allocate a new MSHR for a
322 // snoop (mem-side request), so set source according to request here
323 Target::Source source = (target->cmd == MemCmd::HardPFReq) ?
324 Target::FromPrefetcher : Target::FromCPU;
325 targets.add(target, when_ready, _order, source, true, alloc_on_fill);
326
327 // All targets must refer to the same block
328 assert(target->matchBlockAddr(targets.front().pkt, blkSize));
329 }
|
First of all, the retrieved MSHR entry should be initialized.
The allocation function of the MSHR object
first initialize the targets list.
Remember that one MSHR entry can have multiple targets.
Also, those targets are maintained by targets and deferredTargets
TargetList. Therefore, the two TargetLists should be initialized first.
After the initialization, it adds the current request
to the targets list.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| 104 typename Entry::Iterator addToReadyList(Entry* entry)
105 {
106 if (readyList.empty() ||
107 readyList.back()->readyTime <= entry->readyTime) {
108 return readyList.insert(readyList.end(), entry);
109 }
110
111 for (auto i = readyList.begin(); i != readyList.end(); ++i) {
112 if ((*i)->readyTime > entry->readyTime) {
113 return readyList.insert(i, entry);
114 }
115 }
116 panic("Failed to add to ready list.");
117 }
|
After the MSHR entry is initialized,
the packet should also be registered to the readyList
of the MSHRQueue.
The readyList manages all MSHR entries
in ascending order of the readyTime of the
initial packet that populated the MSHR entry.
Because the MSHR entries should be processed
in the readyTime order,
when the time specified by the readyTime reaches,
the waiting MSHR will be processed.
You can think of the readyList is kind of a queue
determines the order
which entry should be processed first among all MSHR entries.
schedMemSideSendEvent: schedule sending deferred packet
After allocating the MSHR entry for the missed packet,
the missed request should be forwarded to the next cache level
or the memory based on where the current cache is located on.
However, the real hardware cannot process
cache miss and forwarding at the same clock cycle.
Therefore, it schedules the sending missed cache request packet
after a few clock cycles elapsed.
For that purpose, the schedMemSideSendEvent function is invoked.
1
2
3
4
5
6
7
8
9
10
11
12
| 1257 /**
1258 * Schedule a send event for the memory-side port. If already
1259 * scheduled, this may reschedule the event at an earlier
1260 * time. When the specified time is reached, the port is free to
1261 * send either a response, a request, or a prefetch request.
1262 *
1263 * @param time The time when to attempt sending a packet.
1264 */
1265 void schedMemSideSendEvent(Tick time)
1266 {
1267 memSidePort.schedSendEvent(time);
1268 }
|
We took a look at the schedSendEvent function provided by the PacketQueue.
The major job of the function was registering event to process
deferred packet and send response to the CpuSidePort.
However, note that we are currently looking at the memSidePort’s
schedSendEvent.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
| 234 /**
235 * The memory-side port extends the base cache request port with
236 * access functions for functional, atomic and timing snoops.
237 */
238 class MemSidePort : public CacheRequestPort
239 {
240 private:
241
242 /** The cache-specific queue. */
243 CacheReqPacketQueue _reqQueue;
244
245 SnoopRespPacketQueue _snoopRespQueue;
246
247 // a pointer to our specific cache implementation
248 BaseCache *cache;
249
250 protected:
251
252 virtual void recvTimingSnoopReq(PacketPtr pkt);
253
254 virtual bool recvTimingResp(PacketPtr pkt);
255
256 virtual Tick recvAtomicSnoop(PacketPtr pkt);
257
258 virtual void recvFunctionalSnoop(PacketPtr pkt);
259
260 public:
261
262 MemSidePort(const std::string &_name, BaseCache *_cache,
263 const std::string &_label);
264 };
|
Because it doesn’t provide the function schedSendEvent,
we should go deeper to its parent class, CacheRequestPort.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
| 143 /**
144 * A cache request port is used for the memory-side port of the
145 * cache, and in addition to the basic timing port that only sends
146 * response packets through a transmit list, it also offers the
147 * ability to schedule and send request packets (requests &
148 * writebacks). The send event is scheduled through schedSendEvent,
149 * and the sendDeferredPacket of the timing port is modified to
150 * consider both the transmit list and the requests from the MSHR.
151 */
152 class CacheRequestPort : public QueuedRequestPort
153 {
154
155 public:
156
157 /**
158 * Schedule a send of a request packet (from the MSHR). Note
159 * that we could already have a retry outstanding.
160 */
161 void schedSendEvent(Tick time)
162 {
163 DPRINTF(CachePort, "Scheduling send event at %llu\n", time);
164 reqQueue.schedSendEvent(time);
165 }
166
167 protected:
168
169 CacheRequestPort(const std::string &_name, BaseCache *_cache,
170 ReqPacketQueue &_reqQueue,
171 SnoopRespPacketQueue &_snoopRespQueue) :
172 QueuedRequestPort(_name, _cache, _reqQueue, _snoopRespQueue)
173 { }
174
175 /**
176 * Memory-side port always snoops.
177 *
178 * @return always true
179 */
180 virtual bool isSnooping() const { return true; }
181 };
|
Yeah this has very similar interfaces with the CpuSidePort.
However, the schedSendEvent function invokes schedSendEvent function
of the reqQueue instead of the respQueue.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
| 154 void
155 PacketQueue::schedSendEvent(Tick when)
156 {
157 // if we are waiting on a retry just hold off
158 if (waitingOnRetry) {
159 DPRINTF(PacketQueue, "Not scheduling send as waiting for retry\n");
160 assert(!sendEvent.scheduled());
161 return;
162 }
163
164 if (when != MaxTick) {
165 // we cannot go back in time, and to be consistent we stick to
166 // one tick in the future
167 when = std::max(when, curTick() + 1);
168 // @todo Revisit the +1
169
170 if (!sendEvent.scheduled()) {
171 em.schedule(&sendEvent, when);
172 } else if (when < sendEvent.when()) {
173 // if the new time is earlier than when the event
174 // currently is scheduled, move it forward
175 em.reschedule(&sendEvent, when);
176 }
177 } else {
178 // we get a MaxTick when there is no more to send, so if we're
179 // draining, we may be done at this point
180 if (drainState() == DrainState::Draining &&
181 transmitList.empty() && !sendEvent.scheduled()) {
182
183 DPRINTF(Drain, "PacketQueue done draining,"
184 "processing drain event\n");
185 signalDrainDone();
186 }
187 }
188 }
|
Although the reqQueue type is different from respQueue,
note that the same methods are invoked
because they both inherit the PacketQueue class.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| 50 PacketQueue::PacketQueue(EventManager& _em, const std::string& _label,
51 const std::string& _sendEventName,
52 bool force_order,
53 bool disable_sanity_check)
54 : em(_em), sendEvent([this]{ processSendEvent(); }, _sendEventName),
55 _disableSanityCheck(disable_sanity_check),
56 forceOrder(force_order),
57 label(_label), waitingOnRetry(false)
58 {
59 }
......
220 void
221 PacketQueue::processSendEvent()
222 {
223 assert(!waitingOnRetry);
224 sendDeferredPacket();
225 }
|
It schedules sendEvent and involves processSendEvent when the event fires.
However, when the sendEvent raises, processSendEvent function invokes
different sendDeferredPacket function.
Note that respQueue is CacheReqPacketQueue inheriting ReqPacketQueue.
Also, the CacheReqPacketQueue overrides sendDeferredPacket implemented in the
PacketQueue class. Although the CacheReqPacketQueue inherits the PacketQueue class,
the overidden implementation of sendDeferredPacket will be invoked instead.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
| 2549 void
2550 BaseCache::CacheReqPacketQueue::sendDeferredPacket()
2551 {
2552 // sanity check
2553 assert(!waitingOnRetry);
2554
2555 // there should never be any deferred request packets in the
2556 // queue, instead we rely on the cache to provide the packets
2557 // from the MSHR queue or write queue
2558 assert(deferredPacketReadyTime() == MaxTick);
2559
2560 // check for request packets (requests & writebacks)
2561 QueueEntry* entry = cache.getNextQueueEntry();
2562
2563 if (!entry) {
2564 // can happen if e.g. we attempt a writeback and fail, but
2565 // before the retry, the writeback is eliminated because
2566 // we snoop another cache's ReadEx.
2567 } else {
2568 // let our snoop responses go first if there are responses to
2569 // the same addresses
2570 if (checkConflictingSnoop(entry->getTarget()->pkt)) {
2571 return;
2572 }
2573 waitingOnRetry = entry->sendPacket(cache);
2574 }
2575
2576 // if we succeeded and are not waiting for a retry, schedule the
2577 // next send considering when the next queue is ready, note that
2578 // snoop responses have their own packet queue and thus schedule
2579 // their own events
2580 if (!waitingOnRetry) {
2581 schedSendEvent(cache.nextQueueReadyTime());
2582 }
2583 }
|
You might remember that the sendDeferredPacket of the PacketQueue utilizes the
transmitList to dequeue the packets and send it to the CPU in our previous
cache hit cases (sending response to the CPU).
However, when the cache miss happens, it needs help from complicated cache units
MSHR and writeBuffer.
Also, you might have noticed that the packet had not been pushed to the
transmitList but MSHR or writeBuffer.
Instead of searching the transmitList,
it invokes getNextQueueEntry function to find the next entry to process.
getNextQueueEntry: select entry to send to the memory either from MSHR or writeBuffer
1
2
3
4
5
6
7
8
| 773 QueueEntry*
774 BaseCache::getNextQueueEntry()
775 {
776 // Check both MSHR queue and write buffer for potential requests,
777 // note that null does not mean there is no request, it could
778 // simply be that it is not ready
779 MSHR *miss_mshr = mshrQueue.getNext();
780 WriteQueueEntry *wq_entry = writeBuffer.getNext();
|
When the cache miss happens,
the missed request packet could be stored in
either MSHR or WriteBuffer.
This is because the sending memory request operations
can be issued from two different units depending on the type
of the memory request.
However, the sending response to the upper cache or processor
can be handled in unified way regardless of
the request type.
getNext functions return entry which becomes ready to be processed
When one entry is retrieved with the getNext method in
the getNextQueueEntry function, it returns the MSHR entry or writeBack entry
that waits the longest time among them.
Note that getNext function is defined in the Queue class, and
the WriteBuffer and MSHRQueue inherits the Queue class.
1
2
3
4
5
6
7
8
9
10
11
| 217 /**
218 * Returns the WriteQueueEntry at the head of the readyList.
219 * @return The next request to service.
220 */
221 Entry* getNext() const
222 {
223 if (readyList.empty() || readyList.front()->readyTime > curTick()) {
224 return nullptr;
225 }
226 return readyList.front();
227 }
|
The getNext function returns the first entry
stored in the readyList.
Note that the front entry of the readyList
is the entry that has highest priority
based on the readyTime.
Therefore, it can process the entry
that needs to be handled as soon as possible.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
| 782 // If we got a write buffer request ready, first priority is a
783 // full write buffer, otherwise we favour the miss requests
784 if (wq_entry && (writeBuffer.isFull() || !miss_mshr)) {
785 // need to search MSHR queue for conflicting earlier miss.
786 MSHR *conflict_mshr = mshrQueue.findPending(wq_entry);
787
788 if (conflict_mshr && conflict_mshr->order < wq_entry->order) {
789 // Service misses in order until conflict is cleared.
790 return conflict_mshr;
791
792 // @todo Note that we ignore the ready time of the conflict here
793 }
794
795 // No conflicts; issue write
796 return wq_entry;
797 } else if (miss_mshr) {
798 // need to check for conflicting earlier writeback
799 WriteQueueEntry *conflict_mshr = writeBuffer.findPending(miss_mshr);
800 if (conflict_mshr) {
801 // not sure why we don't check order here... it was in the
802 // original code but commented out.
803
804 // The only way this happens is if we are
805 // doing a write and we didn't have permissions
806 // then subsequently saw a writeback (owned got evicted)
807 // We need to make sure to perform the writeback first
808 // To preserve the dirty data, then we can issue the write
809
810 // should we return wq_entry here instead? I.e. do we
811 // have to flush writes in order? I don't think so... not
812 // for Alpha anyway. Maybe for x86?
813 return conflict_mshr;
814
815 // @todo Note that we ignore the ready time of the conflict here
816 }
817
818 // No conflicts; issue read
819 return miss_mshr;
820 }
|
After the two entries from the MSHR and writeBack queue are retrieved,
it should check condition of two entries
to determine which entry should be processed first.
It is important to note that the port from the cache unit to the memory is
limited resource. However, because we have two input sources to choose
we need to determine which packet retrieved from where should be sent to the memory.
Here, the logic put more priority in consuming full writeBuffer.
When the writeBuffer is not full, then MSHRqueue will be consumed.
Also, even when the writeBuffer is full,
if there is conflicting and earlier entry in the MSHR,
then the selected entry should be replaced with the conflicting MSHR entry.
Otherwise, the selected entry from the writeBuffer will be returned.
Based on the comment in the left part of the getNextQueueEntry function,
it seems that the selecting order is somewhat controversial, so I will skip them.
Generate prefetching request when there is no entries to process
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
| 822 // fall through... no pending requests. Try a prefetch.
823 assert(!miss_mshr && !wq_entry);
824 if (prefetcher && mshrQueue.canPrefetch() && !isBlocked()) {
825 // If we have a miss queue slot, we can try a prefetch
826 PacketPtr pkt = prefetcher->getPacket();
827 if (pkt) {
828 Addr pf_addr = pkt->getBlockAddr(blkSize);
829 if (tags->findBlock(pf_addr, pkt->isSecure())) {
830 DPRINTF(HWPrefetch, "Prefetch %#x has hit in cache, "
831 "dropped.\n", pf_addr);
832 prefetcher->pfHitInCache();
833 // free the request and packet
834 delete pkt;
835 } else if (mshrQueue.findMatch(pf_addr, pkt->isSecure())) {
836 DPRINTF(HWPrefetch, "Prefetch %#x has hit in a MSHR, "
837 "dropped.\n", pf_addr);
838 prefetcher->pfHitInMSHR();
839 // free the request and packet
840 delete pkt;
841 } else if (writeBuffer.findMatch(pf_addr, pkt->isSecure())) {
842 DPRINTF(HWPrefetch, "Prefetch %#x has hit in the "
843 "Write Buffer, dropped.\n", pf_addr);
844 prefetcher->pfHitInWB();
845 // free the request and packet
846 delete pkt;
847 } else {
848 // Update statistic on number of prefetches issued
849 // (hwpf_mshr_misses)
850 assert(pkt->req->requestorId() < system->maxRequestors());
851 stats.cmdStats(pkt).mshrMisses[pkt->req->requestorId()]++;
852
853 // allocate an MSHR and return it, note
854 // that we send the packet straight away, so do not
855 // schedule the send
856 return allocateMissBuffer(pkt, curTick(), false);
857 }
858 }
859 }
860
861 return nullptr;
862 }
|
The fall through pass can only be reachable when
there are no suitable request waiting in the writeBuffer and mshrQueue.
In that case, it tries to prefetch entries.
Note that this prefetching is not software thing, but
a hardware prefetcher generated addresses are accessed.
Because hardware prefetcher doesn’t know whether the cache
or other waiting queues already have entry for that prefetched cache line,
it checks them to confirm this is the fresh prefetch request.
If it is the fresh request, then add the request to the MSHR.
Because the added request will be handled later when the next events happen,
so it returns nullptr to report that there is no packet to be sent to the memory
at this cycle.
checkConflictingSnoop
1
2
3
4
5
6
7
8
9
10
11
12
| 2563 if (!entry) {
2564 // can happen if e.g. we attempt a writeback and fail, but
2565 // before the retry, the writeback is eliminated because
2566 // we snoop another cache's ReadEx.
2567 } else {
2568 // let our snoop responses go first if there are responses to
2569 // the same addresses
2570 if (checkConflictingSnoop(entry->getTarget()->pkt)) {
2571 return;
2572 }
2573 waitingOnRetry = entry->sendPacket(cache);
2574 }
|
After the entry is found it should check that
whether the found entry has conflicting snoop response.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
| 212 /**
213 * Check if there is a conflicting snoop response about to be
214 * send out, and if so simply stall any requests, and schedule
215 * a send event at the same time as the next snoop response is
216 * being sent out.
217 *
218 * @param pkt The packet to check for conflicts against.
219 */
220 bool checkConflictingSnoop(const PacketPtr pkt)
221 {
222 if (snoopRespQueue.checkConflict(pkt, cache.blkSize)) {
223 DPRINTF(CachePort, "Waiting for snoop response to be "
224 "sent\n");
225 Tick when = snoopRespQueue.deferredPacketReadyTime();
226 schedSendEvent(when);
227 return true;
228 }
229 return false;
230 }
|
In other words,
if there are the waiting snoop response
for the same address,
currently selected entry should be deferred
until the snooping response is handled.
The deferredPacketReadyTime function calculates
the required time to send the snoop response, so that
the cache miss handling is done
after the elapsed time passes (by schedSendEvent).
1
2
3
4
5
6
7
8
9
10
11
| 74 bool
75 PacketQueue::checkConflict(const PacketPtr pkt, const int blk_size) const
76 {
77 // caller is responsible for ensuring that all packets have the
78 // same alignment
79 for (const auto& p : transmitList) {
80 if (p.pkt->matchBlockAddr(pkt, blk_size))
81 return true;
82 }
83 return false;
84 }
|
Because the SnoopRespPacketQueue is the child of PacketQueue,
it invokes the above checkConflict function
to figure out if there is waiting snoopResponse packet
for the same address of the selected entry.
finally sendPacket
When there is no conflict between the selected entry
and the snoop response,
it will send the request stored in the selected entry.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
| 2549 void
2550 BaseCache::CacheReqPacketQueue::sendDeferredPacket()
......
2561 QueueEntry* entry = cache.getNextQueueEntry();
2562
2563 if (!entry) {
2564 // can happen if e.g. we attempt a writeback and fail, but
2565 // before the retry, the writeback is eliminated because
2566 // we snoop another cache's ReadEx.
2567 } else {
2568 // let our snoop responses go first if there are responses to
2569 // the same addresses
2570 if (checkConflictingSnoop(entry->getTarget()->pkt)) {
2571 return;
2572 }
2573 waitingOnRetry = entry->sendPacket(cache);
2574 }
2575
2576 // if we succeeded and are not waiting for a retry, schedule the
2577 // next send considering when the next queue is ready, note that
2578 // snoop responses have their own packet queue and thus schedule
2579 // their own events
2580 if (!waitingOnRetry) {
2581 schedSendEvent(cache.nextQueueReadyTime());
2582 }
2583 }
|
The sendPacket function is defined as a virtual function
in the QueueEntry class.
Therefore, the corresponding implementation
of the sendPacket function should be implemented
in the MSHR class and WriteQueueEntry class.
Therefore, based on which type of packet is selected,
one of below sendPacket implementation will be invoked.
Also note that the CacheReqPacketQueue has member field cache
which is the reference of the BaseCache.
And this cache field is initialized as the cache object itself
who owns this CacheReqPacketQueue.
In our case it will be the Cache object.
1
2
3
4
5
| 705 bool
706 MSHR::sendPacket(BaseCache &cache)
707 {
708 return cache.sendMSHRQueuePacket(this);
709 }
|
1
2
3
4
5
| 140 bool
141 WriteQueueEntry::sendPacket(BaseCache &cache)
142 {
143 return cache.sendWriteQueuePacket(this);
144 }
|
Processing selected MSHR entry
Cache::sendMSHRQueuePacket
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
| 1358 bool
1359 Cache::sendMSHRQueuePacket(MSHR* mshr)
1360 {
1361 assert(mshr);
1362
1363 // use request from 1st target
1364 PacketPtr tgt_pkt = mshr->getTarget()->pkt;
1365
1366 if (tgt_pkt->cmd == MemCmd::HardPFReq && forwardSnoops) {
1367 DPRINTF(Cache, "%s: MSHR %s\n", __func__, tgt_pkt->print());
1368
1369 // we should never have hardware prefetches to allocated
1370 // blocks
1371 assert(!tags->findBlock(mshr->blkAddr, mshr->isSecure));
1372
1373 // We need to check the caches above us to verify that
1374 // they don't have a copy of this block in the dirty state
1375 // at the moment. Without this check we could get a stale
1376 // copy from memory that might get used in place of the
1377 // dirty one.
1378 Packet snoop_pkt(tgt_pkt, true, false);
1379 snoop_pkt.setExpressSnoop();
1380 // We are sending this packet upwards, but if it hits we will
1381 // get a snoop response that we end up treating just like a
1382 // normal response, hence it needs the MSHR as its sender
1383 // state
1384 snoop_pkt.senderState = mshr;
1385 cpuSidePort.sendTimingSnoopReq(&snoop_pkt);
1386
1387 // Check to see if the prefetch was squashed by an upper cache (to
1388 // prevent us from grabbing the line) or if a Check to see if a
1389 // writeback arrived between the time the prefetch was placed in
1390 // the MSHRs and when it was selected to be sent or if the
1391 // prefetch was squashed by an upper cache.
1392
1393 // It is important to check cacheResponding before
1394 // prefetchSquashed. If another cache has committed to
1395 // responding, it will be sending a dirty response which will
1396 // arrive at the MSHR allocated for this request. Checking the
1397 // prefetchSquash first may result in the MSHR being
1398 // prematurely deallocated.
1399 if (snoop_pkt.cacheResponding()) {
1400 GEM5_VAR_USED auto r = outstandingSnoop.insert(snoop_pkt.req);
1401 assert(r.second);
1402
1403 // if we are getting a snoop response with no sharers it
1404 // will be allocated as Modified
1405 bool pending_modified_resp = !snoop_pkt.hasSharers();
1406 markInService(mshr, pending_modified_resp);
1407
1408 DPRINTF(Cache, "Upward snoop of prefetch for addr"
1409 " %#x (%s) hit\n",
1410 tgt_pkt->getAddr(), tgt_pkt->isSecure()? "s": "ns");
1411 return false;
1412 }
1413
1414 if (snoop_pkt.isBlockCached()) {
1415 DPRINTF(Cache, "Block present, prefetch squashed by cache. "
1416 "Deallocating mshr target %#x.\n",
1417 mshr->blkAddr);
1418
1419 // Deallocate the mshr target
1420 if (mshrQueue.forceDeallocateTarget(mshr)) {
1421 // Clear block if this deallocation resulted freed an
1422 // mshr when all had previously been utilized
1423 clearBlocked(Blocked_NoMSHRs);
1424 }
1425
1426 // given that no response is expected, delete Request and Packet
1427 delete tgt_pkt;
1428
1429 return false;
1430 }
1431 }
1432
1433 return BaseCache::sendMSHRQueuePacket(mshr);
1434 }
|
Because we are currently dealing with Cache not the BaseCache,
it should first invokes sendMSHRQueuePacket of the Cache class.
Although it has pretty complicated code,
most of the code are not relevant to general
MSHR packet handling.
At the end of the function it invokes
sendMSHRQueuePacket function of the BaseCache
to handle the packets in common scenario.
BaseCache::sendMSHRQueuePacket
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
| 1789 bool
1790 BaseCache::sendMSHRQueuePacket(MSHR* mshr)
1791 {
1792 assert(mshr);
1793
1794 // use request from 1st target
1795 PacketPtr tgt_pkt = mshr->getTarget()->pkt;
1796
1797 DPRINTF(Cache, "%s: MSHR %s\n", __func__, tgt_pkt->print());
1798
1799 // if the cache is in write coalescing mode or (additionally) in
1800 // no allocation mode, and we have a write packet with an MSHR
1801 // that is not a whole-line write (due to incompatible flags etc),
1802 // then reset the write mode
1803 if (writeAllocator && writeAllocator->coalesce() && tgt_pkt->isWrite()) {
1804 if (!mshr->isWholeLineWrite()) {
1805 // if we are currently write coalescing, hold on the
1806 // MSHR as many cycles extra as we need to completely
1807 // write a cache line
1808 if (writeAllocator->delay(mshr->blkAddr)) {
1809 Tick delay = blkSize / tgt_pkt->getSize() * clockPeriod();
1810 DPRINTF(CacheVerbose, "Delaying pkt %s %llu ticks to allow "
1811 "for write coalescing\n", tgt_pkt->print(), delay);
1812 mshrQueue.delay(mshr, delay);
1813 return false;
1814 } else {
1815 writeAllocator->reset();
1816 }
1817 } else {
1818 writeAllocator->resetDelay(mshr->blkAddr);
1819 }
1820 }
1821
1822 CacheBlk *blk = tags->findBlock(mshr->blkAddr, mshr->isSecure);
1823
1824 // either a prefetch that is not present upstream, or a normal
1825 // MSHR request, proceed to get the packet to send downstream
1826 PacketPtr pkt = createMissPacket(tgt_pkt, blk, mshr->needsWritable(),
1827 mshr->isWholeLineWrite());
|
Note that we are currently have information about the MSHR entry
selected based on the priority and timing.
Therefore, the first job is find the associated cache block if exist
and generate MissPacket to send it to next level cache or memory.
createMissPacket
Remind that we are here because of the cache miss event.
However, based on the event,
the cache miss request might be already associated with
specific cache block.
For example,
when the cache block is allocated and
set as non-writable state,
the cache miss event happens and
make the allocated block as exclusively writable.
For that purpose,
it should generate proper packet
and send it through the XBar
to the other components that might share the cache block.
Let’s take a look at more details.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
| 476 PacketPtr
477 Cache::createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk,
478 bool needsWritable,
479 bool is_whole_line_write) const
480 {
481 // should never see evictions here
482 assert(!cpu_pkt->isEviction());
483
484 bool blkValid = blk && blk->isValid();
485
486 if (cpu_pkt->req->isUncacheable() ||
487 (!blkValid && cpu_pkt->isUpgrade()) ||
488 cpu_pkt->cmd == MemCmd::InvalidateReq || cpu_pkt->isClean()) {
489 // uncacheable requests and upgrades from upper-level caches
490 // that missed completely just go through as is
491 return nullptr;
492 }
493
494 assert(cpu_pkt->needsResponse());
495
496 MemCmd cmd;
497 // @TODO make useUpgrades a parameter.
498 // Note that ownership protocols require upgrade, otherwise a
499 // write miss on a shared owned block will generate a ReadExcl,
500 // which will clobber the owned copy.
501 const bool useUpgrades = true;
502 assert(cpu_pkt->cmd != MemCmd::WriteLineReq || is_whole_line_write);
503 if (is_whole_line_write) {
504 assert(!blkValid || !blk->isSet(CacheBlk::WritableBit));
505 // forward as invalidate to all other caches, this gives us
506 // the line in Exclusive state, and invalidates all other
507 // copies
508 cmd = MemCmd::InvalidateReq;
509 } else if (blkValid && useUpgrades) {
510 // only reason to be here is that blk is read only and we need
511 // it to be writable
512 assert(needsWritable);
513 assert(!blk->isSet(CacheBlk::WritableBit));
514 cmd = cpu_pkt->isLLSC() ? MemCmd::SCUpgradeReq : MemCmd::UpgradeReq;
515 } else if (cpu_pkt->cmd == MemCmd::SCUpgradeFailReq ||
516 cpu_pkt->cmd == MemCmd::StoreCondFailReq) {
517 // Even though this SC will fail, we still need to send out the
518 // request and get the data to supply it to other snoopers in the case
519 // where the determination the StoreCond fails is delayed due to
520 // all caches not being on the same local bus.
521 cmd = MemCmd::SCUpgradeFailReq;
522 } else {
523 // block is invalid
524
525 // If the request does not need a writable there are two cases
526 // where we need to ensure the response will not fetch the
527 // block in dirty state:
528 // * this cache is read only and it does not perform
529 // writebacks,
530 // * this cache is mostly exclusive and will not fill (since
531 // it does not fill it will have to writeback the dirty data
532 // immediately which generates uneccesary writebacks).
533 bool force_clean_rsp = isReadOnly || clusivity == enums::mostly_excl;
534 cmd = needsWritable ? MemCmd::ReadExReq :
535 (force_clean_rsp ? MemCmd::ReadCleanReq : MemCmd::ReadSharedReq);
536 }
537 PacketPtr pkt = new Packet(cpu_pkt->req, cmd, blkSize);
538
539 // if there are upstream caches that have already marked the
540 // packet as having sharers (not passing writable), pass that info
541 // downstream
542 if (cpu_pkt->hasSharers() && !needsWritable) {
543 // note that cpu_pkt may have spent a considerable time in the
544 // MSHR queue and that the information could possibly be out
545 // of date, however, there is no harm in conservatively
546 // assuming the block has sharers
547 pkt->setHasSharers();
548 DPRINTF(Cache, "%s: passing hasSharers from %s to %s\n",
549 __func__, cpu_pkt->print(), pkt->print());
550 }
551
552 // the packet should be block aligned
553 assert(pkt->getAddr() == pkt->getBlockAddr(blkSize));
554
555 pkt->allocate();
556 DPRINTF(Cache, "%s: created %s from %s\n", __func__, pkt->print(),
557 cpu_pkt->print());
558 return pkt;
559 }
|
Most of the time the else condition will be excuted
and the ReadExReq packet will be generated
for the cache miss event caused by read operation.
Sending miss packet !
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
| 1789 bool
1790 BaseCache::sendMSHRQueuePacket(MSHR* mshr)
1791 {
......
1829 mshr->isForward = (pkt == nullptr);
1830
1831 if (mshr->isForward) {
1832 // not a cache block request, but a response is expected
1833 // make copy of current packet to forward, keep current
1834 // copy for response handling
1835 pkt = new Packet(tgt_pkt, false, true);
1836 assert(!pkt->isWrite());
1837 }
1838
1839 // play it safe and append (rather than set) the sender state,
1840 // as forwarded packets may already have existing state
1841 pkt->pushSenderState(mshr);
1842
1843 if (pkt->isClean() && blk && blk->isSet(CacheBlk::DirtyBit)) {
1844 // A cache clean opearation is looking for a dirty block. Mark
1845 // the packet so that the destination xbar can determine that
1846 // there will be a follow-up write packet as well.
1847 pkt->setSatisfied();
1848 }
1849
1850 if (!memSidePort.sendTimingReq(pkt)) {
1851 // we are awaiting a retry, but we
1852 // delete the packet and will be creating a new packet
1853 // when we get the opportunity
1854 delete pkt;
1855
1856 // note that we have now masked any requestBus and
1857 // schedSendEvent (we will wait for a retry before
1858 // doing anything), and this is so even if we do not
1859 // care about this packet and might override it before
1860 // it gets retried
1861 return true;
1862 } else {
1863 // As part of the call to sendTimingReq the packet is
1864 // forwarded to all neighbouring caches (and any caches
1865 // above them) as a snoop. Thus at this point we know if
1866 // any of the neighbouring caches are responding, and if
1867 // so, we know it is dirty, and we can determine if it is
1868 // being passed as Modified, making our MSHR the ordering
1869 // point
1870 bool pending_modified_resp = !pkt->hasSharers() &&
1871 pkt->cacheResponding();
1872 markInService(mshr, pending_modified_resp);
1873
1874 if (pkt->isClean() && blk && blk->isSet(CacheBlk::DirtyBit)) {
1875 // A cache clean opearation is looking for a dirty
1876 // block. If a dirty block is encountered a WriteClean
1877 // will update any copies to the path to the memory
1878 // until the point of reference.
1879 DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
1880 __func__, pkt->print(), blk->print());
1881 PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(),
1882 pkt->id);
1883 PacketList writebacks;
1884 writebacks.push_back(wb_pkt);
1885 doWritebacks(writebacks, 0);
1886 }
1887
1888 return false;
1889 }
1890 }
|
end of the recvTimingReq of the cache.
Two ports in the cache
1
2
3
4
5
6
7
8
| 92 /**
93 * A basic cache interface. Implements some common functions for speed.
94 */
95 class BaseCache : public ClockedObject
96 {
......
338 CpuSidePort cpuSidePort;
339 MemSidePort memSidePort;
|
CpuSidePort: receive request from the processor and send response
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
| 307 /**
308 * The CPU-side port extends the base cache response port with access
309 * functions for functional, atomic and timing requests.
310 */
311 class CpuSidePort : public CacheResponsePort
312 {
313 private:
314
315 // a pointer to our specific cache implementation
316 BaseCache *cache;
317
318 protected:
319 virtual bool recvTimingSnoopResp(PacketPtr pkt) override;
320
321 virtual bool tryTiming(PacketPtr pkt) override;
322
323 virtual bool recvTimingReq(PacketPtr pkt) override;
324
325 virtual Tick recvAtomic(PacketPtr pkt) override;
326
327 virtual void recvFunctional(PacketPtr pkt) override;
328
329 virtual AddrRangeList getAddrRanges() const override;
330
331 public:
332
333 CpuSidePort(const std::string &_name, BaseCache *_cache,
334 const std::string &_label);
335
336 };
337
|
1
2
3
4
5
6
| 79 BaseCache::BaseCache(const BaseCacheParams &p, unsigned blk_size)
80 : ClockedObject(p),
81 cpuSidePort (p.name + ".cpu_side_port", this, "CpuSidePort"),
82 memSidePort(p.name + ".mem_side_port", this, "MemSidePort"),
83 mshrQueue("MSHRs", p.mshrs, 0, p.demand_mshr_reserve, p.name),
84 writeBuffer("write buffer", p.write_buffers, p.mshrs, p.name),
|
cpuSidePort is a member field of the BaseCache, but it has cache member field
which is a pointer to the BaseCache.
Note that this field is initialized as pointing to the BaseCache itself
that embeds the cpuSidePort.
Also, it has recvTimingReq function that will be invoked
when the processor tries to send request to the cache.
CacheResponsePort
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
| 266 /**
267 * A cache response port is used for the CPU-side port of the cache,
268 * and it is basically a simple timing port that uses a transmit
269 * list for responses to the CPU (or connected requestor). In
270 * addition, it has the functionality to block the port for
271 * incoming requests. If blocked, the port will issue a retry once
272 * unblocked.
273 */
274 class CacheResponsePort : public QueuedResponsePort
275 {
276
277 public:
278
279 /** Do not accept any new requests. */
280 void setBlocked();
281
282 /** Return to normal operation and accept new requests. */
283 void clearBlocked();
284
285 bool isBlocked() const { return blocked; }
286
287 protected:
288
289 CacheResponsePort(const std::string &_name, BaseCache *_cache,
290 const std::string &_label);
291
292 /** A normal packet queue used to store responses. */
293 RespPacketQueue queue;
294
295 bool blocked;
296
297 bool mustSendRetry;
298
299 private:
300
301 void processSendRetry();
302
303 EventFunctionWrapper sendRetryEvent;
304
305 };
|
1
2
3
4
5
6
7
8
9
| 69 BaseCache::CacheResponsePort::CacheResponsePort(const std::string &_name,
70 BaseCache *_cache,
71 const std::string &_label)
72 : QueuedResponsePort(_name, _cache, queue),
73 queue(*_cache, *this, true, _label),
74 blocked(false), mustSendRetry(false),
75 sendRetryEvent([this]{ processSendRetry(); }, _name)
76 {
77 }
|
The CpuSidePort class inherits the CacheResponsePort.
The main functionality of the CacheResponsePort is allowing the port
to be blocked while it is busy to process previous packets.
QueuedResponsePort
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
| 53 /**
54 * A queued port is a port that has an infinite queue for outgoing
55 * packets and thus decouples the module that wants to send
56 * request/responses from the flow control (retry mechanism) of the
57 * port. A queued port can be used by both a requestor and a responder. The
58 * queue is a parameter to allow tailoring of the queue implementation
59 * (used in the cache).
60 */
61 class QueuedResponsePort : public ResponsePort
62 {
63
64 protected:
65
66 /** Packet queue used to store outgoing responses. */
67 RespPacketQueue &respQueue;
68
69 void recvRespRetry() { respQueue.retry(); }
70
71 public:
72
73 /**
74 * Create a QueuedPort with a given name, owner, and a supplied
75 * implementation of a packet queue. The external definition of
76 * the queue enables e.g. the cache to implement a specific queue
77 * behaviuor in a subclass, and provide the latter to the
78 * QueuePort constructor.
79 */
80 QueuedResponsePort(const std::string& name, SimObject* owner,
81 RespPacketQueue &resp_queue, PortID id = InvalidPortID) :
82 ResponsePort(name, owner, id), respQueue(resp_queue)
83 { }
84
85 virtual ~QueuedResponsePort() { }
86
87 /**
88 * Schedule the sending of a timing response.
89 *
90 * @param pkt Packet to send
91 * @param when Absolute time (in ticks) to send packet
92 */
93 void schedTimingResp(PacketPtr pkt, Tick when)
94 { respQueue.schedSendTiming(pkt, when); }
95
96 /** Check the list of buffered packets against the supplied
97 * functional request. */
98 bool trySatisfyFunctional(PacketPtr pkt)
99 { return respQueue.trySatisfyFunctional(pkt); }
100 };
|
ResponsePort
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
| 259 /**
260 * A ResponsePort is a specialization of a port. In addition to the
261 * basic functionality of sending packets to its requestor peer, it also
262 * has functions specific to a responder, e.g. to send range changes
263 * and get the address ranges that the port responds to.
264 *
265 * The three protocols are atomic, timing, and functional, each with its own
266 * header file.
267 */
268 class ResponsePort : public Port, public AtomicResponseProtocol,
269 public TimingResponseProtocol, public FunctionalResponseProtocol
270 {
271 friend class RequestPort;
272
273 private:
274 RequestPort* _requestPort;
275
276 bool defaultBackdoorWarned;
277
278 protected:
279 SimObject& owner;
280
281 public:
282 ResponsePort(const std::string& name, SimObject* _owner,
283 PortID id=InvalidPortID);
284 virtual ~ResponsePort();
285
286 /**
287 * Find out if the peer request port is snooping or not.
288 *
289 * @return true if the peer request port is snooping
290 */
291 bool isSnooping() const { return _requestPort->isSnooping(); }
292
293 /**
294 * Called by the owner to send a range change
295 */
296 void sendRangeChange() const { _requestPort->recvRangeChange(); }
297
298 /**
299 * Get a list of the non-overlapping address ranges the owner is
300 * responsible for. All response ports must override this function
301 * and return a populated list with at least one item.
302 *
303 * @return a list of ranges responded to
304 */
305 virtual AddrRangeList getAddrRanges() const = 0;
306
307 /**
308 * We let the request port do the work, so these don't do anything.
309 */
310 void unbind() override {}
311 void bind(Port &peer) override {}
312
313 public:
314 /* The atomic protocol. */
315
316 /**
317 * Send an atomic snoop request packet, where the data is moved
318 * and the state is updated in zero time, without interleaving
319 * with other memory accesses.
320 *
321 * @param pkt Snoop packet to send.
322 *
323 * @return Estimated latency of access.
324 */
325 Tick
326 sendAtomicSnoop(PacketPtr pkt)
327 {
328 try {
329 return AtomicResponseProtocol::sendSnoop(_requestPort, pkt);
330 } catch (UnboundPortException) {
331 reportUnbound();
332 }
333 }
334
335 public:
336 /* The functional protocol. */
337
338 /**
339 * Send a functional snoop request packet, where the data is
340 * instantly updated everywhere in the memory system, without
341 * affecting the current state of any block or moving the block.
342 *
343 * @param pkt Snoop packet to send.
344 */
345 void
346 sendFunctionalSnoop(PacketPtr pkt) const
347 {
348 try {
349 FunctionalResponseProtocol::sendSnoop(_requestPort, pkt);
350 } catch (UnboundPortException) {
351 reportUnbound();
352 }
353 }
354
355 public:
356 /* The timing protocol. */
357
358 /**
359 * Attempt to send a timing response to the request port by calling
360 * its corresponding receive function. If the send does not
361 * succeed, as indicated by the return value, then the sender must
362 * wait for a recvRespRetry at which point it can re-issue a
363 * sendTimingResp.
364 *
365 * @param pkt Packet to send.
366 *
367 * @return If the send was successful or not.
368 */
369 bool
370 sendTimingResp(PacketPtr pkt)
371 {
372 try {
373 return TimingResponseProtocol::sendResp(_requestPort, pkt);
374 } catch (UnboundPortException) {
375 reportUnbound();
376 }
377 }
378
379 /**
380 * Attempt to send a timing snoop request packet to the request port
381 * by calling its corresponding receive function. Snoop requests
382 * always succeed and hence no return value is needed.
383 *
384 * @param pkt Packet to send.
385 */
386 void
387 sendTimingSnoopReq(PacketPtr pkt)
388 {
389 try {
390 TimingResponseProtocol::sendSnoopReq(_requestPort, pkt);
391 } catch (UnboundPortException) {
392 reportUnbound();
393 }
394 }
395
396 /**
397 * Send a retry to the request port that previously attempted a
398 * sendTimingReq to this response port and failed.
399 */
400 void
401 sendRetryReq()
402 {
403 try {
404 TimingResponseProtocol::sendRetryReq(_requestPort);
405 } catch (UnboundPortException) {
406 reportUnbound();
407 }
408 }
409
410 /**
411 * Send a retry to the request port that previously attempted a
412 * sendTimingSnoopResp to this response port and failed.
413 */
414 void
415 sendRetrySnoopResp()
416 {
417 try {
418 TimingResponseProtocol::sendRetrySnoopResp(_requestPort);
419 } catch (UnboundPortException) {
420 reportUnbound();
421 }
422 }
423
424 protected:
425 /**
426 * Called by the request port to unbind. Should never be called
427 * directly.
428 */
429 void responderUnbind();
430
431 /**
432 * Called by the request port to bind. Should never be called
433 * directly.
434 */
435 void responderBind(RequestPort& request_port);
436
437 /**
438 * Default implementations.
439 */
440 Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor) override;
441
442 bool
443 tryTiming(PacketPtr pkt) override
444 {
445 panic("%s was not expecting a %s\n", name(), __func__);
446 }
447
448 bool
449 recvTimingSnoopResp(PacketPtr pkt) override
450 {
451 panic("%s was not expecting a timing snoop response\n", name());
452 }
453 };
|
This is the basic class that provides most of the interfaces
required for handling receive operations.
Although some operations are not provided by the ResponsePort,
but they are provided by the TimingResponseProtocol
inherited by the ResponsePort.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
| 169 /**
170 * Response port
171 */
172 ResponsePort::ResponsePort(const std::string& name, SimObject* _owner,
173 PortID id) : Port(name, id), _requestPort(&defaultRequestPort),
174 defaultBackdoorWarned(false), owner(*_owner)
175 {
176 }
177
178 ResponsePort::~ResponsePort()
179 {
180 }
181
182 void
183 ResponsePort::responderUnbind()
184 {
185 _requestPort = &defaultRequestPort;
186 Port::unbind();
187 }
188
189 void
190 ResponsePort::responderBind(RequestPort& request_port)
191 {
192 _requestPort = &request_port;
193 Port::bind(request_port);
194 }
|
ResponsePort is initialized with defaultRequestPort by default.
Because ResponsePort needs to understand who sent the request (_requestPort),
the RequestPort object reference should be passed to the
ResponsePort at the time of construction.
Or dynamically, it can bind to another RequestPort through the responderBind method.
When proper RequestPort is not set for the ResponsePort,
it will generate error messages during execution of the GEM5.
RespPacketQueue
One thing that should be maintained by the QueuedResponsePort is
the response packets.
When the all cache accesses finished, it should pass the response packet to the processor.
However, when the processor is busy not to get the response from the cache,
then it should retry later.
For that purpose, the QueuedResponsePort contains RespPacketQueue
which maintains all the unhandled response packets.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
| 300 class RespPacketQueue : public PacketQueue
301 {
302
303 protected:
304
305 ResponsePort& cpuSidePort;
306
307 // Static definition so it can be called when constructing the parent
308 // without us being completely initialized.
309 static const std::string name(const ResponsePort& cpuSidePort,
310 const std::string& label)
311 { return cpuSidePort.name() + "-" + label; }
312
313 public:
314
315 /**
316 * Create a response packet queue, linked to an event manager, a
317 * CPU-side port, and a label that will be used for functional print
318 * request packets.
319 *
320 * @param _em Event manager used for scheduling this queue
321 * @param _cpu_side_port Cpu_side port used to send the packets
322 * @param force_order Force insertion order for packets with same address
323 * @param _label Label to push on the label stack for print request packets
324 */
325 RespPacketQueue(EventManager& _em, ResponsePort& _cpu_side_port,
326 bool force_order = false,
327 const std::string _label = "RespPacketQueue");
328
329 virtual ~RespPacketQueue() { }
330
331 const std::string name() const
332 { return name(cpuSidePort, label); }
333
334 bool sendTiming(PacketPtr pkt);
335
336 };
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| 266 RespPacketQueue::RespPacketQueue(EventManager& _em,
267 ResponsePort& _cpu_side_port,
268 bool force_order,
269 const std::string _label)
270 : PacketQueue(_em, _label, name(_cpu_side_port, _label), force_order),
271 cpuSidePort(_cpu_side_port)
272 {
273 }
274
275 bool
276 RespPacketQueue::sendTiming(PacketPtr pkt)
277 {
278 return cpuSidePort.sendTimingResp(pkt);
279 }
|
RespPacketQueue has cpuSidePort as its member and initialized by its constructor.
When the sendTiming function of the RespPacketQueue is invoked,
it sends the packet through the cpuSidePort using the sendTimingResp.
Also, note that the RespPacketQueue is initialized with the EventManager object’s reference.
However, when you take a look at its initialization
in the BaseCache::CacheResponsePort::CacheResponsePort,
the queue which is the RespPacketQueue object is initialized with
_cache as its first operand.
Yeah it is not the EventManager but the BaseCache!
Because the BaseCache is SimObject, it must inherit from EventManager class.
Therefore, the cache object itself can be handled as the EventManager object.
Let’s take a look at the PacketQueue which is the parent class of RespPacketQueue.
Also, note that RespPacketQueue itself is not capable of scheduling event
because it doesn’t have any member function or field to utilize the
passed EventManager, BaseCache.
PacketQueue
Instead of the RespPacketQueue, its parent class, PacketQueue utilizes the EventManager
and organize events using the schedule method and EventFunctionWrapper.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
| 61 /**
62 * A packet queue is a class that holds deferred packets and later
63 * sends them using the associated CPU-side port or memory-side port.
64 */
65 class PacketQueue : public Drainable
66 {
67 private:
68 /** A deferred packet, buffered to transmit later. */
69 class DeferredPacket
70 {
71 public:
72 Tick tick; ///< The tick when the packet is ready to transmit
73 PacketPtr pkt; ///< Pointer to the packet to transmit
74 DeferredPacket(Tick t, PacketPtr p)
75 : tick(t), pkt(p)
76 {}
77 };
78
79 typedef std::list<DeferredPacket> DeferredPacketList;
80
81 /** A list of outgoing packets. */
82 DeferredPacketList transmitList;
83
84 /** The manager which is used for the event queue */
85 EventManager& em;
86
87 /** Used to schedule sending of deferred packets. */
88 void processSendEvent();
89
90 /** Event used to call processSendEvent. */
91 EventFunctionWrapper sendEvent;
92
93 /*
94 * Optionally disable the sanity check
95 * on the size of the transmitList. The
96 * sanity check will be enabled by default.
97 */
98 bool _disableSanityCheck;
99
100 /**
101 * if true, inserted packets have to be unconditionally scheduled
102 * after the last packet in the queue that references the same
103 * address
104 */
105 bool forceOrder;
106
107 protected:
108
109 /** Label to use for print request packets label stack. */
110 const std::string label;
111
112 /** Remember whether we're awaiting a retry. */
113 bool waitingOnRetry;
114
115 /** Check whether we have a packet ready to go on the transmit list. */
116 bool deferredPacketReady() const
117 { return !transmitList.empty() && transmitList.front().tick <= curTick(); }
118
119 /**
120 * Attempt to send a packet. Note that a subclass of the
121 * PacketQueue can override this method and thus change the
122 * behaviour (as done by the cache for the request queue). The
123 * default implementation sends the head of the transmit list. The
124 * caller must guarantee that the list is non-empty and that the
125 * head packet is scheduled for curTick() (or earlier).
126 */
127 virtual void sendDeferredPacket();
128
129 /**
130 * Send a packet using the appropriate method for the specific
131 * subclass (request, response or snoop response).
132 */
133 virtual bool sendTiming(PacketPtr pkt) = 0;
134
135 /**
136 * Create a packet queue, linked to an event manager, and a label
137 * that will be used for functional print request packets.
138 *
139 * @param _em Event manager used for scheduling this queue
140 * @param _label Label to push on the label stack for print request packets
141 * @param force_order Force insertion order for packets with same address
142 * @param disable_sanity_check Flag used to disable the sanity check
143 * on the size of the transmitList. The check is enabled by default.
144 */
145 PacketQueue(EventManager& _em, const std::string& _label,
146 const std::string& _sendEventName,
147 bool force_order = false,
148 bool disable_sanity_check = false);
149
150 /**
151 * Virtual desctructor since the class may be used as a base class.
152 */
153 virtual ~PacketQueue();
154
155 public:
156
157 /**
158 * Provide a name to simplify debugging.
159 *
160 * @return A complete name, appended to module and port
161 */
162 virtual const std::string name() const = 0;
163
164 /**
165 * Get the size of the queue.
166 */
167 size_t size() const { return transmitList.size(); }
168
169 /**
170 * Get the next packet ready time.
171 */
172 Tick deferredPacketReadyTime() const
173 { return transmitList.empty() ? MaxTick : transmitList.front().tick; }
174
175 /**
176 * Check if a packet corresponding to the same address exists in the
177 * queue.
178 *
179 * @param pkt The packet to compare against.
180 * @param blk_size Block size in bytes.
181 * @return Whether a corresponding packet is found.
182 */
183 bool checkConflict(const PacketPtr pkt, const int blk_size) const;
184
185 /** Check the list of buffered packets against the supplied
186 * functional request. */
187 bool trySatisfyFunctional(PacketPtr pkt);
188
189 /**
190 * Schedule a send event if we are not already waiting for a
191 * retry. If the requested time is before an already scheduled
192 * send event, the event will be rescheduled. If MaxTick is
193 * passed, no event is scheduled. Instead, if we are idle and
194 * asked to drain then check and signal drained.
195 *
196 * @param when time to schedule an event
197 */
198 void schedSendEvent(Tick when);
199
200 /**
201 * Add a packet to the transmit list, and schedule a send event.
202 *
203 * @param pkt Packet to send
204 * @param when Absolute time (in ticks) to send packet
205 */
206 void schedSendTiming(PacketPtr pkt, Tick when);
207
208 /**
209 * Retry sending a packet from the queue. Note that this is not
210 * necessarily the same packet if something has been added with an
211 * earlier time stamp.
212 */
213 void retry();
214
215 /**
216 * This allows a user to explicitly disable the sanity check
217 * on the size of the transmitList, which is enabled by default.
218 * Users must use this function to explicitly disable the sanity
219 * check.
220 */
221 void disableSanityCheck() { _disableSanityCheck = true; }
222
223 DrainState drain() override;
224 };
|
Port binding
1
2
3
4
5
| 73 class BaseCache(ClockedObject):
74 type = 'BaseCache'
......
121 cpu_side = ResponsePort("Upstream port closer to the CPU and/or device")
122 mem_side = RequestPort("Downstream port closer to memory")
|
gem5/src/python/m5/params.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
| 2123 # Port description object. Like a ParamDesc object, this represents a
2124 # logical port in the SimObject class, not a particular port on a
2125 # SimObject instance. The latter are represented by PortRef objects.
2126 class Port(object):
2127 # Port("role", "description")
2128
2129 _compat_dict = { }
2130
2131 @classmethod
2132 def compat(cls, role, peer):
2133 cls._compat_dict.setdefault(role, set()).add(peer)
2134 cls._compat_dict.setdefault(peer, set()).add(role)
2135
2136 @classmethod
2137 def is_compat(cls, one, two):
2138 for port in one, two:
2139 if not port.role in Port._compat_dict:
2140 fatal("Unrecognized role '%s' for port %s\n", port.role, port)
2141 return one.role in Port._compat_dict[two.role]
2142
2143 def __init__(self, role, desc, is_source=False):
2144 self.desc = desc
2145 self.role = role
2146 self.is_source = is_source
2147
2148 # Generate a PortRef for this port on the given SimObject with the
2149 # given name
2150 def makeRef(self, simobj):
2151 return PortRef(simobj, self.name, self.role, self.is_source)
2152
2153 # Connect an instance of this port (on the given SimObject with
2154 # the given name) with the port described by the supplied PortRef
2155 def connect(self, simobj, ref):
2156 self.makeRef(simobj).connect(ref)
2157
2158 # No need for any pre-declarations at the moment as we merely rely
2159 # on an unsigned int.
2160 def cxx_predecls(self, code):
2161 pass
2162
2163 def pybind_predecls(self, code):
2164 cls.cxx_predecls(self, code)
2165
2166 # Declare an unsigned int with the same name as the port, that
2167 # will eventually hold the number of connected ports (and thus the
2168 # number of elements for a VectorPort).
2169 def cxx_decl(self, code):
2170 code('unsigned int port_$_connection_count;')
2171
2172 Port.compat('GEM5 REQUESTOR', 'GEM5 RESPONDER')
2173
2174 class RequestPort(Port):
2175 # RequestPort("description")
2176 def __init__(self, desc):
2177 super(RequestPort, self).__init__(
2178 'GEM5 REQUESTOR', desc, is_source=True)
2179
2180 class ResponsePort(Port):
2181 # ResponsePort("description")
2182 def __init__(self, desc):
2183 super(ResponsePort, self).__init__('GEM5 RESPONDER', desc)
2184
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
| 1896 #####################################################################
1897 #
1898 # Port objects
1899 #
1900 # Ports are used to interconnect objects in the memory system.
1901 #
1902 #####################################################################
1903
1904 # Port reference: encapsulates a reference to a particular port on a
1905 # particular SimObject.
1906 class PortRef(object):
......
1941 # Full connection is symmetric (both ways). Called via
1942 # SimObject.__setattr__ as a result of a port assignment, e.g.,
1943 # "obj1.portA = obj2.portB", or via VectorPortElementRef.__setitem__,
1944 # e.g., "obj1.portA[3] = obj2.portB".
1945 def connect(self, other):
1946 if isinstance(other, VectorPortRef):
1947 # reference to plain VectorPort is implicit append
1948 other = other._get_next()
1949 if self.peer and not proxy.isproxy(self.peer):
1950 fatal("Port %s is already connected to %s, cannot connect %s\n",
1951 self, self.peer, other);
1952 self.peer = other
1953
1954 if proxy.isproxy(other):
1955 other.set_param_desc(PortParamDesc())
1956 return
1957 elif not isinstance(other, PortRef):
1958 raise TypeError("assigning non-port reference '%s' to port '%s'" \
1959 % (other, self))
1960
1961 if not Port.is_compat(self, other):
1962 fatal("Ports %s and %s with roles '%s' and '%s' "
1963 "are not compatible", self, other, self.role, other.role)
1964
1965 if other.peer is not self:
1966 other.connect(self)
......
2023 # Call C++ to create corresponding port connection between C++ objects
2024 def ccConnect(self):
2025 if self.ccConnected: # already done this
2026 return
2027
2028 peer = self.peer
2029 if not self.peer: # nothing to connect to
2030 return
2031
2032 port = self.simobj.getPort(self.name, self.index)
2033 peer_port = peer.simobj.getPort(peer.name, peer.index)
2034 port.bind(peer_port)
2035
2036 self.ccConnected = True
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
| 127 void
128 RequestPort::bind(Port &peer)
129 {
130 auto *response_port = dynamic_cast<ResponsePort *>(&peer);
131 fatal_if(!response_port, "Can't bind port %s to non-response port %s.",
132 name(), peer.name());
133 // request port keeps track of the response port
134 _responsePort = response_port;
135 Port::bind(peer);
136 // response port also keeps track of request port
137 _responsePort->responderBind(*this);
138 }
189 void
190 ResponsePort::responderBind(RequestPort& request_port)
191 {
192 _requestPort = &request_port;
193 Port::bind(request_port);
194 }
|
1
2
3
4
5
6
7
8
9
10
11
12
| 58 /**
59 * Ports are used to interface objects to each other.
60 */
61 class Port
62 {
116 /** Attach to a peer port. */
117 virtual void
118 bind(Port &peer)
119 {
120 _peer = &peer;
121 _connected = true;
122 }
|
1
2
3
4
5
6
7
8
9
10
11
| 200 Port &
201 BaseCache::getPort(const std::string &if_name, PortID idx)
202 {
203 if (if_name == "mem_side") {
204 return memSidePort;
205 } else if (if_name == "cpu_side") {
206 return cpuSidePort;
207 } else {
208 return ClockedObject::getPort(if_name, idx);
209 }
210 }
|
#######################
allocateBlock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
| 1529 CacheBlk*
1530 BaseCache::allocateBlock(const PacketPtr pkt, PacketList &writebacks)
1531 {
1532 // Get address
1533 const Addr addr = pkt->getAddr();
1534
1535 // Get secure bit
1536 const bool is_secure = pkt->isSecure();
1537
1538 // Block size and compression related access latency. Only relevant if
1539 // using a compressor, otherwise there is no extra delay, and the block
1540 // is fully sized
1541 std::size_t blk_size_bits = blkSize*8;
1542 Cycles compression_lat = Cycles(0);
1543 Cycles decompression_lat = Cycles(0);
1544
1545 // If a compressor is being used, it is called to compress data before
1546 // insertion. Although in Gem5 the data is stored uncompressed, even if a
1547 // compressor is used, the compression/decompression methods are called to
1548 // calculate the amount of extra cycles needed to read or write compressed
1549 // blocks.
1550 if (compressor && pkt->hasData()) {
1551 const auto comp_data = compressor->compress(
1552 pkt->getConstPtr<uint64_t>(), compression_lat, decompression_lat);
1553 blk_size_bits = comp_data->getSizeBits();
1554 }
1555
1556 // Find replacement victim
1557 std::vector<CacheBlk*> evict_blks;
1558 CacheBlk *victim = tags->findVictim(addr, is_secure, blk_size_bits,
1559 evict_blks);
1560
1561 // It is valid to return nullptr if there is no victim
1562 if (!victim)
1563 return nullptr;
1564
1565 // Print victim block's information
1566 DPRINTF(CacheRepl, "Replacement victim: %s\n", victim->print());
1567
1568 // Try to evict blocks; if it fails, give up on allocation
1569 if (!handleEvictions(evict_blks, writebacks)) {
1570 return nullptr;
1571 }
1572
1573 // Insert new block at victimized entry
1574 tags->insertBlock(pkt, victim);
1575
1576 // If using a compressor, set compression data. This must be done after
1577 // insertion, as the compression bit may be set.
1578 if (compressor) {
1579 compressor->setSizeBits(victim, blk_size_bits);
1580 compressor->setDecompressionLatency(victim, decompression_lat);
1581 }
1582
1583 return victim;
1584 }
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
| 158 /**
159 * Find replacement victim based on address. The list of evicted blocks
160 * only contains the victim.
161 *
162 * @param addr Address to find a victim for.
163 * @param is_secure True if the target memory space is secure.
164 * @param size Size, in bits, of new block to allocate.
165 * @param evict_blks Cache blocks to be evicted.
166 * @return Cache block to be replaced.
167 */
168 CacheBlk* findVictim(Addr addr, const bool is_secure,
169 const std::size_t size,
170 std::vector<CacheBlk*>& evict_blks) override
171 {
172 // Get possible entries to be victimized
173 const std::vector<ReplaceableEntry*> entries =
174 indexingPolicy->getPossibleEntries(addr);
175
176 // Choose replacement victim from replacement candidates
177 CacheBlk* victim = static_cast<CacheBlk*>(replacementPolicy->getVictim(
178 entries));
179
180 // There is only one eviction for this replacement
181 evict_blks.push_back(victim);
182
183 return victim;
184 }
|
getPossibleEntries select entries of one set
associated with the address passed to the findVictim function.
Because it returns N-ways of entries mapped to one set,
the getVictim function should search proper entry to evict.
As a result, one entry will be selected and pushed into the eviction list.
For further memory allocation, the invalidated block is returned.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
| 864 bool
865 BaseCache::handleEvictions(std::vector<CacheBlk*> &evict_blks,
866 PacketList &writebacks)
867 {
868 bool replacement = false;
869 for (const auto& blk : evict_blks) {
870 if (blk->isValid()) {
871 replacement = true;
872
873 const MSHR* mshr =
874 mshrQueue.findMatch(regenerateBlkAddr(blk), blk->isSecure());
875 if (mshr) {
876 // Must be an outstanding upgrade or clean request on a block
877 // we're about to replace
878 assert((!blk->isSet(CacheBlk::WritableBit) &&
879 mshr->needsWritable()) || mshr->isCleaning());
880 return false;
881 }
882 }
883 }
884
885 // The victim will be replaced by a new entry, so increase the replacement
886 // counter if a valid block is being replaced
887 if (replacement) {
888 stats.replacements++;
889
890 // Evict valid blocks associated to this victim block
891 for (auto& blk : evict_blks) {
892 if (blk->isValid()) {
893 evictBlock(blk, writebacks);
894 }
895 }
896 }
897
898 return true;
899 }
|
1
2
3
4
5
6
7
8
| 1606 void
1607 BaseCache::evictBlock(CacheBlk *blk, PacketList &writebacks)
1608 {
1609 PacketPtr pkt = evictBlock(blk);
1610 if (pkt) {
1611 writebacks.push_back(pkt);
1612 }
1613 }
|
1
2
3
4
5
6
7
8
9
10
| 899 PacketPtr
900 Cache::evictBlock(CacheBlk *blk)
901 {
902 PacketPtr pkt = (blk->isSet(CacheBlk::DirtyBit) || writebackClean) ?
903 writebackBlk(blk) : cleanEvictBlk(blk);
904
905 invalidateBlock(blk);
906
907 return pkt;
908 }
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| 1586 void
1587 BaseCache::invalidateBlock(CacheBlk *blk)
1588 {
1589 // If block is still marked as prefetched, then it hasn't been used
1590 if (blk->wasPrefetched()) {
1591 prefetcher->prefetchUnused();
1592 }
1593
1594 // Notify that the data contents for this address are no longer present
1595 updateBlockData(blk, nullptr, blk->isValid());
1596
1597 // If handling a block present in the Tags, let it do its invalidation
1598 // process, which will update stats and invalidate the block itself
1599 if (blk != tempBlock) {
1600 tags->invalidate(blk);
1601 } else {
1602 tempBlock->invalidate();
1603 }
1604 }
|
gem5/src/mem/cache/tags/base_set_assoc.cc
1
2
3
4
5
6
7
8
9
10
11
| 88 void
89 BaseSetAssoc::invalidate(CacheBlk *blk)
90 {
91 BaseTags::invalidate(blk);
92
93 // Decrease the number of tags in use
94 stats.tagsInUse--;
95
96 // Invalidate replacement data
97 replacementPolicy->invalidate(blk->replacementData);
98 }
|
Because the invalidate function of the BaseTag class is virtual function,
it should be implemented by its children class.
I utilize the base_set_assoc tags for generating cache
in my system, so I will follow the implementation
of the BaseSetAssoc class.
Note that it invokes the invalidate function of the block first
and then invalidate replacement data.
gem5/src/mem/cache_blk.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| 70 class CacheBlk : public TaggedEntry
71 {
72 public:
......
197 /**
198 * Invalidate the block and clear all state.
199 */
200 virtual void invalidate() override
201 {
202 TaggedEntry::invalidate();
203
204 clearPrefetched();
205 clearCoherenceBits(AllBits);
206
207 setTaskId(context_switch_task_id::Unknown);
208 setWhenReady(MaxTick);
209 setRefCount(0);
210 setSrcRequestorId(Request::invldRequestorId);
211 lockList.clear();
212 }
|
Although the invalidate function of the CacheBlk is defined
as virtual function,
the system utilize the CahceBlk class as it is
instead of adopting another class inheriting CacheBlk.
Therefore, the invalidate function of the CacheBlk is called.
Most importantly it inovkes the invalidate function
of its parent class TaggedEntry.
Also, it clears all the coherence bits and prefetched bit
if they are set.
gem5/src/mem/tags/tagged_entry
1
2
3
4
5
6
7
8
9
10
| 46 class TaggedEntry : public ReplaceableEntry
47 {
......
102 /** Invalidate the block. Its contents are no longer valid. */
103 virtual void invalidate()
104 {
105 _valid = false;
106 setTag(MaxAddr);
107 clearSecure();
108 }
|
Finally, it sets the _valid member field
of the CacheBlk as false and clear secure flag.
Comments powered by Disqus.