Matthew Poremba has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/47179 )
Change subject: dev,dev-hsa,gpu-compute: Refactor dmaVirt calls
......................................................................
dev,dev-hsa,gpu-compute: Refactor dmaVirt calls
WIP: Untested since gcn is broken.
Remove the duplicate dmaVirt calls from HSA packet processor and GPU
command processor and move them into their own class. This removes some
duplicate code and allows a DmaVirtDevice to be created which will be
useful for upcoming full system GPU commits.
The DmaVirtDevice is an abstraction of the base DmaDevice but iterates
using ChunkGenerator over virtual addresses. Classes which inherit from
DmaVirtDevice must provide a translation function to translate from
virtual address to physical address. Once translated, the physical
address is passed to DmaDevice to do the work.
Change-Id: Idd59ccb4d9ba21c0b1150ee328ededf5a88d824e
---
M src/dev/Device.py
M src/dev/SConscript
A src/dev/dma_virt_device.cc
A src/dev/dma_virt_device.hh
M src/dev/hsa/hsa_packet_processor.cc
M src/dev/hsa/hsa_packet_processor.hh
M src/gpu-compute/gpu_command_processor.cc
M src/gpu-compute/gpu_command_processor.hh
8 files changed, 264 insertions(+), 223 deletions(-)
diff --git a/src/dev/Device.py b/src/dev/Device.py
index 46e992c..19e1927 100644
--- a/src/dev/Device.py
+++ b/src/dev/Device.py
@@ -102,6 +102,11 @@
node.append(FdtPropertyWords("iommus",
[ state.phandle(self._iommu), self.sid ]))
+class DmaVirtDevice(DmaDevice):
+ type = 'DmaVirtDevice'
+ cxx_header = "dev/dma_virt_device.hh"
+ abstract = True
+
class IsaFake(BasicPioDevice):
type = 'IsaFake'
cxx_header = "dev/isa_fake.hh"
diff --git a/src/dev/SConscript b/src/dev/SConscript
index 02f9733..e86e48b 100644
--- a/src/dev/SConscript
+++ b/src/dev/SConscript
@@ -32,6 +32,7 @@
Source('io_device.cc')
Source('isa_fake.cc')
Source('dma_device.cc')
+Source('dma_virt_device.cc')
SimObject('IntPin.py')
Source('intpin.cc')
diff --git a/src/dev/dma_virt_device.cc b/src/dev/dma_virt_device.cc
new file mode 100644
index 0000000..5b5e908
--- /dev/null
+++ b/src/dev/dma_virt_device.cc
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2021 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
notice,
+ * this list of conditions and the following disclaimer in the
documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "dev/dma_virt_device.hh"
+
+DmaVirtDevice::DmaVirtDevice(const Params& p)
+ : DmaDevice(p), pageBytes(p.system->getPageBytes())
+{
+}
+
+void
+DmaVirtDevice::dmaReadVirt(Addr host_addr, unsigned size,
+ DmaCallback *cb, void *data, Tick delay)
+{
+ dmaVirt(&DmaDevice::dmaRead, host_addr, size, cb, data, delay);
+}
+
+void
+DmaVirtDevice::dmaWriteVirt(Addr host_addr, unsigned size,
+ DmaCallback *cb, void *data, Tick delay)
+{
+ dmaVirt(&DmaDevice::dmaWrite, host_addr, size, cb, data, delay);
+}
+
+void
+DmaVirtDevice::dmaVirt(DmaFnPtr dmaFn, Addr addr, unsigned size,
+ DmaCallback *cb, void *data, Tick delay)
+{
+ if (size == 0) {
+ if (cb)
+ schedule(cb->getChunkEvent(), curTick() + delay);
+ return;
+ }
+
+ // move the buffer data pointer with the chunks
+ uint8_t *loc_data = (uint8_t*)data;
+
+ for (ChunkGenerator gen(addr, size, pageBytes); !gen.done();
gen.next()) {
+ Addr phys;
+
+ // translate pages into their corresponding frames
+ translateOrDie(gen.addr(), phys);
+
+ Event *event = cb ? cb->getChunkEvent() : nullptr;
+
+ (this->*dmaFn)(phys, gen.size(), event, loc_data, delay);
+
+ loc_data += gen.size();
+ }
+}
diff --git a/src/dev/dma_virt_device.hh b/src/dev/dma_virt_device.hh
new file mode 100644
index 0000000..20031af
--- /dev/null
+++ b/src/dev/dma_virt_device.hh
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2021 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
notice,
+ * this list of conditions and the following disclaimer in the
documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DEV_DMA_VIRT_DEVICE_HH__
+#define __DEV_DMA_VIRT_DEVICE_HH__
+
+#include "dev/dma_device.hh"
+
+class DmaVirtDevice : public DmaDevice
+{
+ private:
+ Addr pageBytes;
+
+ protected:
+ /**
+ * Wraps a std::function object in a DmaCallback. Much cleaner than
+ * defining a bunch of callback objects for each desired behavior when
a
+ * DMA completes. Contains a built in templated buffer that can be
used
+ * for DMA temporary storage.
+ */
+ template <class T>
+ class DmaVirtCallback : public DmaCallback
+ {
+ std::function<void(const T &)> _function;
+
+ virtual void
+ process() override
+ {
+ _function(dmaBuffer);
+ }
+
+ public:
+ T dmaBuffer;
+
+ DmaVirtCallback(const std::function<void(const T &)> &function,
+ T dma_buffer_value = 0)
+ : DmaCallback(), _function(function), dmaBuffer(dma_buffer_value)
+ { }
+ };
+
+ public:
+ DmaVirtDevice(const Params& p);
+ virtual ~DmaVirtDevice() { }
+
+ /**
+ * Initiate a DMA read from virtual address host_addr. Helper function
+ * for dmaVirt method.
+ *
+ * @param host_addr Virtual starting address for DMA transfer
+ * @param size Number of bytes to transfer
+ * @param cb DmaCallback to call upon completition of transfer
+ * @param data Pointer to the data to be transfered
+ * @param delay Number of ticks to wait before scheduling callback
+ */
+ void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb,
+ void *data, Tick delay = 0);
+ /**
+ * Initiate a DMA write from virtual address host_addr. Helper function
+ * for dmaVirt method.
+ *
+ * @param host_addr Virtual starting address for DMA transfer
+ * @param size Number of bytes to transfer
+ * @param cb DmaCallback to call upon completition of transfer
+ * @param data Pointer to the data to be transfered
+ * @param delay Number of ticks to wait before scheduling callback
+ */
+ void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b,
+ void *data, Tick delay = 0);
+
+ // Typedefing dmaRead and dmaWrite function pointer
+ typedef void (DmaDevice::*DmaFnPtr)(Addr, int, Event*, uint8_t*, Tick);
+
+ /**
+ * Initiate a call to DmaDevice using DmaFnPtr do a DMA starting from
+ * virtual address host_addr for size number of bytes on the data. Upon
+ * completion the DmaCallback cb is called if not nullptr.
+ *
+ * @param dmaFn Method in DmaDevice to call per transfer chunk
+ * @param host_addr Virtual starting address for DMA transfer
+ * @param size Number of bytes to transfer
+ * @param cb DmaCallback to call upon completition of transfer
+ * @param data Pointer to the data to be transfered
+ * @param delay Number of ticks to wait before scheduling callback
+ */
+ void dmaVirt(DmaFnPtr dmaFn, Addr host_addr, unsigned size,
+ DmaCallback *cb, void *data, Tick delay = 0);
+
+ /**
+ * Function used to translate from virtual to physical addresses. All
+ * classes inheriting from DmaVirtDevice must define this.
+ *
+ * @param vaddr Input virtual address
+ * @param paddr Output physical address written by reference
+ */
+ virtual void translateOrDie(Addr vaddr, Addr &paddr) = 0;
+};
+
+#endif // __DEV_DMA_VIRT_DEVICE_HH__
diff --git a/src/dev/hsa/hsa_packet_processor.cc
b/src/dev/hsa/hsa_packet_processor.cc
index 8749aa7..d8d1826 100644
--- a/src/dev/hsa/hsa_packet_processor.cc
+++ b/src/dev/hsa/hsa_packet_processor.cc
@@ -66,13 +66,10 @@
#define IS_BARRIER(PKT) ((hsa_packet_header_t)(((PKT->header) >> \
HSA_PACKET_HEADER_BARRIER) & HSA_PACKET_HEADER_WIDTH_BARRIER))
-HSAPP_EVENT_DESCRIPTION_GENERATOR(UpdateReadDispIdDmaEvent)
-HSAPP_EVENT_DESCRIPTION_GENERATOR(CmdQueueCmdDmaEvent)
HSAPP_EVENT_DESCRIPTION_GENERATOR(QueueProcessEvent)
-HSAPP_EVENT_DESCRIPTION_GENERATOR(DepSignalsReadDmaEvent)
HSAPacketProcessor::HSAPacketProcessor(const Params &p)
- : DmaDevice(p), numHWQueues(p.numHWQueues), pioAddr(p.pioAddr),
+ : DmaVirtDevice(p), numHWQueues(p.numHWQueues), pioAddr(p.pioAddr),
pioSize(PAGE_SIZE), pioDelay(10), pktProcessDelay(p.pktProcessDelay)
{
DPRINTF(HSAPacketProcessor, "%s:\n", __FUNCTION__);
@@ -171,55 +168,15 @@
fatal("failed translation: vaddr 0x%x\n", vaddr);
}
+/**
+ * this event is used to update the read_disp_id field (the read pointer)
+ * of the MQD, which is how the host code knows the status of the HQD's
+ * read pointer
+ */
void
-HSAPacketProcessor::dmaVirt(DmaFnPtr dmaFn, Addr addr, unsigned size,
- Event *event, void *data, Tick delay)
+HSAPacketProcessor::updateReadDispIdDma()
{
- if (size == 0) {
- schedule(event, curTick() + delay);
- return;
- }
-
- // move the buffer data pointer with the chunks
- uint8_t *loc_data = (uint8_t*)data;
-
- for (ChunkGenerator gen(addr, size, PAGE_SIZE); !gen.done();
gen.next()) {
- Addr phys;
-
- // translate pages into their corresponding frames
- translateOrDie(gen.addr(), phys);
-
- // only send event on last transfer; transfers complete in-order
- Event *ev = gen.last() ? event : NULL;
-
- (this->*dmaFn)(phys, gen.size(), ev, loc_data, delay);
-
- loc_data += gen.size();
- }
-}
-
-void
-HSAPacketProcessor::dmaReadVirt(Addr host_addr, unsigned size,
- Event *event, void *data, Tick delay)
-{
- DPRINTF(HSAPacketProcessor,
- "%s:host_addr = 0x%lx, size = %d\n", __FUNCTION__, host_addr,
size);
- dmaVirt(&DmaDevice::dmaRead, host_addr, size, event, data, delay);
-}
-
-void
-HSAPacketProcessor::dmaWriteVirt(Addr host_addr, unsigned size,
- Event *event, void *data, Tick delay)
-{
- dmaVirt(&DmaDevice::dmaWrite, host_addr, size, event, data, delay);
-}
-
-HSAPacketProcessor::UpdateReadDispIdDmaEvent::
- UpdateReadDispIdDmaEvent()
- : Event(Default_Pri, AutoDelete)
-{
- DPRINTF(HSAPacketProcessor, "%s:\n", __FUNCTION__);
- setFlags(AutoDelete);
+ DPRINTF(HSAPacketProcessor, "updateReaddispId\n");
}
void
@@ -227,14 +184,14 @@
{
AQLRingBuffer* aqlbuf = regdQList[rl_idx]->qCntxt.aqlBuf;
HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
- auto *dmaEvent = new UpdateReadDispIdDmaEvent();
+ auto cb = new DmaVirtCallback<uint64_t>(
+ [ = ] (const uint32_t &dma_data) { this->updateReadDispIdDma(); },
0);
DPRINTF(HSAPacketProcessor,
"%s: read-pointer offset [0x%x]\n", __FUNCTION__,
aqlbuf->rdIdx());
- dmaWriteVirt((Addr)qDesc->hostReadIndexPtr,
- sizeof(aqlbuf->rdIdx()),
- dmaEvent, aqlbuf->rdIdxPtr());
+ dmaWriteVirt((Addr)qDesc->hostReadIndexPtr, sizeof(aqlbuf->rdIdx()),
+ cb, aqlbuf->rdIdxPtr());
DPRINTF(HSAPacketProcessor,
"%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used
= %d," \
@@ -246,23 +203,10 @@
}
}
-HSAPacketProcessor::CmdQueueCmdDmaEvent::
-CmdQueueCmdDmaEvent(HSAPacketProcessor *_hsaPP, int _pid, bool _isRead,
- uint32_t _ix_start, unsigned _num_pkts,
- dma_series_ctx *_series_ctx, void *_dest_4debug)
- : Event(Default_Pri, AutoDelete), hsaPP(_hsaPP), pid(_pid),
isRead(_isRead),
- ix_start(_ix_start), num_pkts(_num_pkts), series_ctx(_series_ctx),
- dest_4debug(_dest_4debug)
-{
- setFlags(AutoDelete);
-
- DPRINTF(HSAPacketProcessor, "%s, ix = %d, npkts = %d," \
- "active list ID = %d\n", __FUNCTION__,
- _ix_start, num_pkts, series_ctx->rl_idx);
-}
-
void
-HSAPacketProcessor::CmdQueueCmdDmaEvent::process()
+HSAPacketProcessor::cmdQueueCmdDma(HSAPacketProcessor *hsaPP, int pid,
+ bool isRead, uint32_t ix_start, unsigned num_pkts,
+ dma_series_ctx *series_ctx, void *dest_4debug)
{
uint32_t rl_idx = series_ctx->rl_idx;
GEM5_VAR_USED AQLRingBuffer *aqlRingBuffer =
@@ -382,10 +326,12 @@
if (*signal_val != 0) {
// This signal is not yet ready, read it again
isReady = false;
- DepSignalsReadDmaEvent *sgnl_rd_evnt =
- new DepSignalsReadDmaEvent(dep_sgnl_rd_st);
+
+ auto cb = new DmaVirtCallback<int64_t>(
+ [ = ] (const uint32_t &dma_data)
+ { dep_sgnl_rd_st->handleReadDMA(); }, 0);
dmaReadVirt(signal_addr,
sizeof(hsa_signal_value_t),
- sgnl_rd_evnt, signal_val);
+ cb, signal_val);
dep_sgnl_rd_st->pendingReads++;
DPRINTF(HSAPacketProcessor, "%s: Pending
reads %d," \
" active list %d\n", __FUNCTION__,
@@ -394,10 +340,11 @@
} else {
// This signal is not yet ready, read it again
isReady = false;
- DepSignalsReadDmaEvent *sgnl_rd_evnt =
- new DepSignalsReadDmaEvent(dep_sgnl_rd_st);
+ auto cb = new DmaVirtCallback<int64_t>(
+ [ = ] (const uint32_t &dma_data)
+ { dep_sgnl_rd_st->handleReadDMA(); }, 0);
dmaReadVirt(signal_addr, sizeof(hsa_signal_value_t),
- sgnl_rd_evnt, signal_val);
+ cb, signal_val);
dep_sgnl_rd_st->pendingReads++;
DPRINTF(HSAPacketProcessor, "%s: Pending reads %d," \
" active list %d\n", __FUNCTION__,
@@ -579,19 +526,20 @@
}
void *aql_buf = aqlRingBuffer->ptr(dma_start_ix);
- CmdQueueCmdDmaEvent *dmaEvent
- = new CmdQueueCmdDmaEvent(this, pid, true, dma_start_ix,
- num_2_xfer, series_ctx, aql_buf);
- DPRINTF(HSAPacketProcessor,
- "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer
= %d\n",
- __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
-
+ auto cb = new DmaVirtCallback<uint64_t>(
+ [ = ] (const uint32_t &dma_data)
+ { this->cmdQueueCmdDma(this, pid, true, dma_start_ix,
+ num_2_xfer, series_ctx, aql_buf); }, 0);
dmaReadVirt(qDesc->ptr(umq_nxt), num_2_xfer * qDesc->objSize(),
- dmaEvent, aql_buf);
+ cb, aql_buf);
aqlRingBuffer->saveHostDispAddr(qDesc->ptr(umq_nxt), num_2_xfer,
dma_start_ix);
+ DPRINTF(HSAPacketProcessor,
+ "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer
= %d\n",
+ __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
+
num_umq -= num_2_xfer;
got_aql_buf -= num_2_xfer;
dma_start_ix = (dma_start_ix + num_2_xfer) % ttl_aql_buf;
diff --git a/src/dev/hsa/hsa_packet_processor.hh
b/src/dev/hsa/hsa_packet_processor.hh
index fe71612..facb20d 100644
--- a/src/dev/hsa/hsa_packet_processor.hh
+++ b/src/dev/hsa/hsa_packet_processor.hh
@@ -39,7 +39,7 @@
#include <vector>
#include "base/types.hh"
-#include "dev/dma_device.hh"
+#include "dev/dma_virt_device.hh"
#include "dev/hsa/hsa.h"
#include "dev/hsa/hsa_queue.hh"
#include "params/HSAPacketProcessor.hh"
@@ -219,7 +219,7 @@
QueueContext() : qDesc(NULL), aqlBuf(NULL), barrierBit(false) {}
} QCntxt;
-class HSAPacketProcessor: public DmaDevice
+class HSAPacketProcessor: public DmaVirtDevice
{
friend class HWScheduler;
protected:
@@ -288,15 +288,6 @@
// Keeps track of queueDescriptors of registered queues
std::vector<class RQLEntry *> regdQList;
- void translateOrDie(Addr vaddr, Addr &paddr);
- void dmaVirt(DmaFnPtr, Addr host_addr, unsigned size, Event *event,
- void *data, Tick delay = 0);
-
- void dmaReadVirt(Addr host_addr, unsigned size, Event *event,
- void *data, Tick delay = 0);
-
- void dmaWriteVirt(Addr host_addr, unsigned size, Event *event,
- void *data, Tick delay = 0);
Q_STATE processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr);
void displayQueueDescriptor(int pid, uint32_t rl_idx);
@@ -328,6 +319,7 @@
typedef HSAPacketProcessorParams Params;
HSAPacketProcessor(const Params &p);
~HSAPacketProcessor();
+ void translateOrDie(Addr vaddr, Addr &paddr) override;
void setDeviceQueueDesc(uint64_t hostReadIndexPointer,
uint64_t basePointer,
uint64_t queue_id,
@@ -350,33 +342,6 @@
hsa_signal_value_t signal);
void sendCompletionSignal(hsa_signal_value_t signal);
- class DepSignalsReadDmaEvent : public Event
- {
- protected:
- SignalState *signalState;
- public:
- DepSignalsReadDmaEvent(SignalState *ss)
- : Event(Default_Pri, AutoDelete), signalState(ss)
- {}
- virtual void process() { signalState->handleReadDMA(); }
- virtual const char *description() const;
- };
-
- /**
- * this event is used to update the read_disp_id field (the read
pointer)
- * of the MQD, which is how the host code knows the status of the HQD's
- * read pointer
- */
- class UpdateReadDispIdDmaEvent : public Event
- {
- public:
- UpdateReadDispIdDmaEvent();
-
- void process() override { }
- const char *description() const override;
-
- };
-
/**
* Calls getCurrentEntry once the queueEntry has been dmaRead.
*/
@@ -398,24 +363,11 @@
~dma_series_ctx() {};
};
- class CmdQueueCmdDmaEvent : public Event
- {
- protected:
- HSAPacketProcessor *hsaPP;
- int pid;
- bool isRead;
- uint32_t ix_start;
- uint num_pkts;
- dma_series_ctx *series_ctx;
- void *dest_4debug;
-
- public:
- CmdQueueCmdDmaEvent(HSAPacketProcessor *hsaPP, int pid, bool
isRead,
- uint32_t dma_buf_ix, uint num_bufs,
- dma_series_ctx *series_ctx, void *dest_4debug);
- virtual void process();
- virtual const char *description() const;
- };
+ void updateReadDispIdDma();
+ void cmdQueueCmdDma(HSAPacketProcessor *hsaPP, int pid, bool isRead,
+ uint32_t ix_start, unsigned num_pkts,
+ dma_series_ctx *series_ctx, void *dest_4debug);
+ void handleReadDMA();
};
#endif // __DEV_HSA_HSA_PACKET_PROCESSOR__
diff --git a/src/gpu-compute/gpu_command_processor.cc
b/src/gpu-compute/gpu_command_processor.cc
index 9bdd0b9..2c63d13 100644
--- a/src/gpu-compute/gpu_command_processor.cc
+++ b/src/gpu-compute/gpu_command_processor.cc
@@ -45,7 +45,8 @@
#include "sim/syscall_emul_buf.hh"
GPUCommandProcessor::GPUCommandProcessor(const Params &p)
- : DmaDevice(p), dispatcher(*p.dispatcher), _driver(nullptr),
hsaPP(p.hsapp)
+ : DmaVirtDevice(p), dispatcher(*p.dispatcher), _driver(nullptr),
+ hsaPP(p.hsapp)
{
assert(hsaPP);
hsaPP->setDevice(this);
@@ -59,47 +60,6 @@
}
void
-GPUCommandProcessor::dmaReadVirt(Addr host_addr, unsigned size,
- DmaCallback *cb, void *data, Tick delay)
-{
- dmaVirt(&DmaDevice::dmaRead, host_addr, size, cb, data, delay);
-}
-
-void
-GPUCommandProcessor::dmaWriteVirt(Addr host_addr, unsigned size,
- DmaCallback *cb, void *data, Tick delay)
-{
- dmaVirt(&DmaDevice::dmaWrite, host_addr, size, cb, data, delay);
-}
-
-void
-GPUCommandProcessor::dmaVirt(DmaFnPtr dmaFn, Addr addr, unsigned size,
- DmaCallback *cb, void *data, Tick delay)
-{
- if (size == 0) {
- if (cb)
- schedule(cb->getChunkEvent(), curTick() + delay);
- return;
- }
-
- // move the buffer data pointer with the chunks
- uint8_t *loc_data = (uint8_t*)data;
-
- for (ChunkGenerator gen(addr, size, PAGE_SIZE); !gen.done();
gen.next()) {
- Addr phys;
-
- // translate pages into their corresponding frames
- translateOrDie(gen.addr(), phys);
-
- Event *event = cb ? cb->getChunkEvent() : nullptr;
-
- (this->*dmaFn)(phys, gen.size(), event, loc_data, delay);
-
- loc_data += gen.size();
- }
-}
-
-void
GPUCommandProcessor::translateOrDie(Addr vaddr, Addr &paddr)
{
/**
@@ -234,7 +194,7 @@
Addr event_addr = getHsaSignalEventAddr(signal_handle);
DPRINTF(GPUCommandProc, "Triggering completion signal: %x!\n",
value_addr);
- auto cb = new CPDmaCallback<uint64_t>(function, signal_value);
+ auto cb = new DmaVirtCallback<uint64_t>(function, signal_value);
dmaWriteVirt(value_addr, sizeof(Addr), cb, &cb->dmaBuffer, 0);
@@ -373,7 +333,7 @@
void
GPUCommandProcessor::initABI(HSAQueueEntry *task)
{
- auto cb = new CPDmaCallback<uint32_t>(
+ auto cb = new DmaVirtCallback<uint32_t>(
[ = ] (const uint32_t &readDispIdOffset)
{ ReadDispIdOffsetDmaEvent(task, readDispIdOffset); }, 0);
diff --git a/src/gpu-compute/gpu_command_processor.hh
b/src/gpu-compute/gpu_command_processor.hh
index 9555e3b..7bea109 100644
--- a/src/gpu-compute/gpu_command_processor.hh
+++ b/src/gpu-compute/gpu_command_processor.hh
@@ -52,7 +52,7 @@
#include "base/trace.hh"
#include "base/types.hh"
#include "debug/GPUCommandProc.hh"
-#include "dev/dma_device.hh"
+#include "dev/dma_virt_device.hh"
#include "dev/hsa/hsa_packet_processor.hh"
#include "dev/hsa/hsa_signal.hh"
#include "gpu-compute/dispatcher.hh"
@@ -65,7 +65,7 @@
class GPUDispatcher;
class Shader;
-class GPUCommandProcessor : public DmaDevice
+class GPUCommandProcessor : public DmaVirtDevice
{
public:
typedef GPUCommandProcessorParams Params;
@@ -76,11 +76,6 @@
HSAPacketProcessor& hsaPacketProc();
- void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb,
- void *data, Tick delay = 0);
- void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b,
- void *data, Tick delay = 0);
-
void setShader(Shader *shader);
Shader* shader();
GPUComputeDriver* driver();
@@ -137,37 +132,9 @@
typedef void (DmaDevice::*DmaFnPtr)(Addr, int, Event*, uint8_t*, Tick);
void initABI(HSAQueueEntry *task);
HSAPacketProcessor *hsaPP;
- void dmaVirt(DmaFnPtr, Addr host_addr, unsigned size, DmaCallback *cb,
- void *data, Tick delay = 0);
void translateOrDie(Addr vaddr, Addr &paddr);
/**
- * Wraps a std::function object in a DmaCallback. Much cleaner than
- * defining a bunch of callback objects for each desired behavior when
a
- * DMA completes. Contains a built in templated buffer that can be
used
- * for DMA temporary storage.
- */
- template <class T>
- class CPDmaCallback : public DmaCallback
- {
- std::function<void(const T &)> _function;
-
- virtual void
- process() override
- {
- _function(dmaBuffer);
- }
-
- public:
- T dmaBuffer;
-
- CPDmaCallback(const std::function<void(const T &)> &function,
- T dma_buffer_value = 0)
- : DmaCallback(), _function(function), dmaBuffer(dma_buffer_value)
- { }
- };
-
- /**
* Perform a DMA read of the read_dispatch_id_field_base_byte_offset
* field, which follows directly after the read_dispatch_id (the read
* pointer) in the amd_hsa_queue_t struct (aka memory queue descriptor
@@ -197,7 +164,7 @@
* DMA a copy of the MQD into the task. some fields of
* the MQD will be used to initialize register state in VI
*/
- auto *mqdDmaEvent = new CPDmaCallback<int>(
+ auto *mqdDmaEvent = new DmaVirtCallback<int>(
[ = ] (const int &) { MQDDmaEvent(task); });
dmaReadVirt(task->hostAMDQueueAddr,
@@ -279,7 +246,7 @@
* TODO: Technically only need to update private segment fields
* since other MQD entries won't change since we last read them.
*/
- auto cb = new CPDmaCallback<int>(
+ auto cb = new DmaVirtCallback<int>(
[ = ] (const int &) { MQDDmaEvent(task); });
dmaReadVirt(task->hostAMDQueueAddr, sizeof(_amd_queue_t), cb,
@@ -293,7 +260,7 @@
task->amdQueue.queue_inactive_signal.handle);
DPRINTF(GPUCommandProc, "Polling queue inactive signal at "
"%p.\n", value_addr);
- auto cb = new CPDmaCallback<uint64_t>(
+ auto cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &dma_buffer)
{ WaitScratchDmaEvent(task, dma_buffer); } );
dmaReadVirt(value_addr, sizeof(Addr), cb, &cb->dmaBuffer);
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47179
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Idd59ccb4d9ba21c0b1150ee328ededf5a88d824e
Gerrit-Change-Number: 47179
Gerrit-PatchSet: 1
Gerrit-Owner: Matthew Poremba <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s