Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On 8/19/13 5:07 PM, "Jon Mason" wrote: >> >> Is this for the case where we are bouncing back and forth between >> >> sync/async? Otherwise I do not see how transactions could get out of >> >> order given you allocate a channel once per queue. Is this comment >> >> saying that the iowrite32 is somehow a fix, or is this comment a >> >> FIXME? >> > >> >There is a case for a mix, the "copy_bytes" variable above switches to >> >CPU for small transfers (which greatly increases throughput on small >> >transfers). The caveat to it is the need to flush the DMA engine to >> >prevent out-of-order. This comment is mainly an reminder of this >>issue. >> >> So this is going forward with the stall as a known issue? The next >>patch >> should just do the sync to prevent the re-ordering, right? > >There is already a dma_sync_wait in the error path of ntb_async_rx to >enforce the ordering. Do I need to change the comment (or move it) to >make it more obvious what is happening? Yeah, I think it just needs to move to the dma_sync_wait() otherwise it seems like it¹s an open issue that needs fixing. > + txd->callback = ntb_rx_copy_callback; >> >> > + txd->callback_param = entry; >> >> > + >> >> > + cookie = dmaengine_submit(txd); >> >> > + if (dma_submit_error(cookie)) >> >> > + goto err3; >> >> > + >> >> > + qp->last_cookie = cookie; >> >> > + >> >> > + dma_async_issue_pending(chan); >> >> >> >> hmm... can this go in ntb_process_rx() so that the submission is >> >> batched? Cuts down on mmio. >> > >> >I moved it down to ntb_transport_rx (after the calls to >> >ntb_process_rxc), and the performance seems to be roughly the same. >> >> Yeah, not expecting it to be noticeable, but conceptually >> >> submit >> submit >> submit >> submit >> issue >> >> >> Is nicer than: >> >> submit >> issue >> submit >> issue >> >> > >I agree, but I liked having all the dma engine awareness >compartmentalized in the ntb_async_* and callbacks. Ok, makes sense. -- Dan -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On Mon, Aug 19, 2013 at 11:36:13PM +, Dan Williams wrote: > > > On 8/19/13 1:37 PM, "Jon Mason" wrote: > > >On Mon, Aug 19, 2013 at 03:01:54AM -0700, Dan Williams wrote: > >> On Fri, Aug 2, 2013 at 10:35 AM, Jon Mason wrote: > >> > Allocate and use a DMA engine channel to transmit and receive data > >>over > >> > NTB. If none is allocated, fall back to using the CPU to transfer > >>data. > >> > > >> > Cc: Dan Williams > >> > Cc: Vinod Koul > >> > Cc: Dave Jiang > >> > Signed-off-by: Jon Mason > >> > --- > >> > drivers/ntb/ntb_hw.c| 17 +++ > >> > drivers/ntb/ntb_hw.h|1 + > >> > drivers/ntb/ntb_transport.c | 285 > >>--- > >> > 3 files changed, 258 insertions(+), 45 deletions(-) > >> > > >> > diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c > >> > index 1d8e551..014222c 100644 > >> > --- a/drivers/ntb/ntb_hw.c > >> > +++ b/drivers/ntb/ntb_hw.c > >> > @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device > >>*ndev, unsigned int idx, u32 *val) > >> > } > >> > > >> > /** > >> > + * ntb_get_mw_base() - get addr for the NTB memory window > >> > + * @ndev: pointer to ntb_device instance > >> > + * @mw: memory window number > >> > + * > >> > + * This function provides the base address of the memory window > >>specified. > >> > + * > >> > + * RETURNS: address, or NULL on error. > >> > + */ > >> > +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned > >>int mw) > >> > +{ > >> > + if (mw >= ntb_max_mw(ndev)) > >> > + return 0; > >> > + > >> > + return pci_resource_start(ndev->pdev, MW_TO_BAR(mw)); > >> > +} > > Nothing does error checking on this return value. I think the code should > either be sure that Œmw' is valid (mw_num is passed to the > ntb_get_mw_vbase helper too) and delete the check, or at least make it a > WARN_ONCE. The former seems a tad cleaner to me. Ugh! Thanks. > > > >> > + > >> > +/** > >> > * ntb_get_mw_vbase() - get virtual addr for the NTB memory window > >> > * @ndev: pointer to ntb_device instance > >> > * @mw: memory window number > >> > diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h > >> > index b03de80..ab5f768 100644 > >> > --- a/drivers/ntb/ntb_hw.h > >> > +++ b/drivers/ntb/ntb_hw.h > >> > @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, > >>unsigned int idx, u32 val); > >> > int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, > >>u32 *val); > >> > int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, > >>u32 val); > >> > int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, > >>u32 *val); > >> > +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned > >>int mw); > >> > void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int > >>mw); > >> > u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); > >> > void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); > >> > diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c > >> > index f7380e9..73a35e4 100644 > >> > --- a/drivers/ntb/ntb_transport.c > >> > +++ b/drivers/ntb/ntb_transport.c > >> > @@ -47,6 +47,7 @@ > >> > */ > >> > #include > >> > #include > >> > +#include > >> > #include > >> > #include > >> > #include > >> > @@ -68,6 +69,10 @@ static unsigned char max_num_clients; > >> > module_param(max_num_clients, byte, 0644); > >> > MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport > >>clients"); > >> > > >> > +static unsigned int copy_bytes = 1024; > >> > +module_param(copy_bytes, uint, 0644); > >> > +MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the > >>CPU to copy instead of DMA"); > >> > + > >> > struct ntb_queue_entry { > >> > /* ntb_queue list reference */ > >> > struct list_head entry; > >> > @@ -76,6 +81,13 @@ struct ntb_queue_entry { > >> > void *buf; > >> > unsigned int len; > >> > unsigned int flags; > >> > + > >> > + struct ntb_transport_qp *qp; > >> > + union { > >> > + struct ntb_payload_header __iomem *tx_hdr; > >> > + struct ntb_payload_header *rx_hdr; > >> > + }; > >> > + unsigned int index; > >> > }; > >> > > >> > struct ntb_rx_info { > >> > @@ -86,6 +98,7 @@ struct ntb_transport_qp { > >> > struct ntb_transport *transport; > >> > struct ntb_device *ndev; > >> > void *cb_data; > >> > + struct dma_chan *dma_chan; > >> > > >> > bool client_ready; > >> > bool qp_link; > >> > @@ -99,6 +112,7 @@ struct ntb_transport_qp { > >> > struct list_head tx_free_q; > >> > spinlock_t ntb_tx_free_q_lock; > >> > void __iomem *tx_mw; > >> > + dma_addr_t tx_mw_raw; > >> > unsigned int tx_index; > >> > unsigned int tx_max_entry; > >> > unsigned int tx_max_frame; > >> > @@ -114,6 +128,7 @@ struct ntb_transport_qp { >
Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On 8/19/13 1:37 PM, "Jon Mason" wrote: >On Mon, Aug 19, 2013 at 03:01:54AM -0700, Dan Williams wrote: >> On Fri, Aug 2, 2013 at 10:35 AM, Jon Mason wrote: >> > Allocate and use a DMA engine channel to transmit and receive data >>over >> > NTB. If none is allocated, fall back to using the CPU to transfer >>data. >> > >> > Cc: Dan Williams >> > Cc: Vinod Koul >> > Cc: Dave Jiang >> > Signed-off-by: Jon Mason >> > --- >> > drivers/ntb/ntb_hw.c| 17 +++ >> > drivers/ntb/ntb_hw.h|1 + >> > drivers/ntb/ntb_transport.c | 285 >>--- >> > 3 files changed, 258 insertions(+), 45 deletions(-) >> > >> > diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c >> > index 1d8e551..014222c 100644 >> > --- a/drivers/ntb/ntb_hw.c >> > +++ b/drivers/ntb/ntb_hw.c >> > @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device >>*ndev, unsigned int idx, u32 *val) >> > } >> > >> > /** >> > + * ntb_get_mw_base() - get addr for the NTB memory window >> > + * @ndev: pointer to ntb_device instance >> > + * @mw: memory window number >> > + * >> > + * This function provides the base address of the memory window >>specified. >> > + * >> > + * RETURNS: address, or NULL on error. >> > + */ >> > +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned >>int mw) >> > +{ >> > + if (mw >= ntb_max_mw(ndev)) >> > + return 0; >> > + >> > + return pci_resource_start(ndev->pdev, MW_TO_BAR(mw)); >> > +} Nothing does error checking on this return value. I think the code should either be sure that Œmw' is valid (mw_num is passed to the ntb_get_mw_vbase helper too) and delete the check, or at least make it a WARN_ONCE. The former seems a tad cleaner to me. >> > + >> > +/** >> > * ntb_get_mw_vbase() - get virtual addr for the NTB memory window >> > * @ndev: pointer to ntb_device instance >> > * @mw: memory window number >> > diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h >> > index b03de80..ab5f768 100644 >> > --- a/drivers/ntb/ntb_hw.h >> > +++ b/drivers/ntb/ntb_hw.h >> > @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, >>unsigned int idx, u32 val); >> > int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, >>u32 *val); >> > int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, >>u32 val); >> > int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, >>u32 *val); >> > +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned >>int mw); >> > void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int >>mw); >> > u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); >> > void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); >> > diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c >> > index f7380e9..73a35e4 100644 >> > --- a/drivers/ntb/ntb_transport.c >> > +++ b/drivers/ntb/ntb_transport.c >> > @@ -47,6 +47,7 @@ >> > */ >> > #include >> > #include >> > +#include >> > #include >> > #include >> > #include >> > @@ -68,6 +69,10 @@ static unsigned char max_num_clients; >> > module_param(max_num_clients, byte, 0644); >> > MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport >>clients"); >> > >> > +static unsigned int copy_bytes = 1024; >> > +module_param(copy_bytes, uint, 0644); >> > +MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the >>CPU to copy instead of DMA"); >> > + >> > struct ntb_queue_entry { >> > /* ntb_queue list reference */ >> > struct list_head entry; >> > @@ -76,6 +81,13 @@ struct ntb_queue_entry { >> > void *buf; >> > unsigned int len; >> > unsigned int flags; >> > + >> > + struct ntb_transport_qp *qp; >> > + union { >> > + struct ntb_payload_header __iomem *tx_hdr; >> > + struct ntb_payload_header *rx_hdr; >> > + }; >> > + unsigned int index; >> > }; >> > >> > struct ntb_rx_info { >> > @@ -86,6 +98,7 @@ struct ntb_transport_qp { >> > struct ntb_transport *transport; >> > struct ntb_device *ndev; >> > void *cb_data; >> > + struct dma_chan *dma_chan; >> > >> > bool client_ready; >> > bool qp_link; >> > @@ -99,6 +112,7 @@ struct ntb_transport_qp { >> > struct list_head tx_free_q; >> > spinlock_t ntb_tx_free_q_lock; >> > void __iomem *tx_mw; >> > + dma_addr_t tx_mw_raw; >> > unsigned int tx_index; >> > unsigned int tx_max_entry; >> > unsigned int tx_max_frame; >> > @@ -114,6 +128,7 @@ struct ntb_transport_qp { >> > unsigned int rx_index; >> > unsigned int rx_max_entry; >> > unsigned int rx_max_frame; >> > + dma_cookie_t last_cookie; >> > >> > void (*event_handler) (void *data, int status); >> > struct delayed_work link_work; >> > @@ -129,9 +144,14 @@ struct ntb_transport_qp { >> > u64
Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On Mon, Aug 19, 2013 at 03:01:54AM -0700, Dan Williams wrote: > On Fri, Aug 2, 2013 at 10:35 AM, Jon Mason wrote: > > Allocate and use a DMA engine channel to transmit and receive data over > > NTB. If none is allocated, fall back to using the CPU to transfer data. > > > > Cc: Dan Williams > > Cc: Vinod Koul > > Cc: Dave Jiang > > Signed-off-by: Jon Mason > > --- > > drivers/ntb/ntb_hw.c| 17 +++ > > drivers/ntb/ntb_hw.h|1 + > > drivers/ntb/ntb_transport.c | 285 > > --- > > 3 files changed, 258 insertions(+), 45 deletions(-) > > > > diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c > > index 1d8e551..014222c 100644 > > --- a/drivers/ntb/ntb_hw.c > > +++ b/drivers/ntb/ntb_hw.c > > @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, > > unsigned int idx, u32 *val) > > } > > > > /** > > + * ntb_get_mw_base() - get addr for the NTB memory window > > + * @ndev: pointer to ntb_device instance > > + * @mw: memory window number > > + * > > + * This function provides the base address of the memory window specified. > > + * > > + * RETURNS: address, or NULL on error. > > + */ > > +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) > > +{ > > + if (mw >= ntb_max_mw(ndev)) > > + return 0; > > + > > + return pci_resource_start(ndev->pdev, MW_TO_BAR(mw)); > > +} > > + > > +/** > > * ntb_get_mw_vbase() - get virtual addr for the NTB memory window > > * @ndev: pointer to ntb_device instance > > * @mw: memory window number > > diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h > > index b03de80..ab5f768 100644 > > --- a/drivers/ntb/ntb_hw.h > > +++ b/drivers/ntb/ntb_hw.h > > @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, > > unsigned int idx, u32 val); > > int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 > > *val); > > int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 > > val); > > int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 > > *val); > > +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); > > void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); > > u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); > > void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); > > diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c > > index f7380e9..73a35e4 100644 > > --- a/drivers/ntb/ntb_transport.c > > +++ b/drivers/ntb/ntb_transport.c > > @@ -47,6 +47,7 @@ > > */ > > #include > > #include > > +#include > > #include > > #include > > #include > > @@ -68,6 +69,10 @@ static unsigned char max_num_clients; > > module_param(max_num_clients, byte, 0644); > > MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport > > clients"); > > > > +static unsigned int copy_bytes = 1024; > > +module_param(copy_bytes, uint, 0644); > > +MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU > > to copy instead of DMA"); > > + > > struct ntb_queue_entry { > > /* ntb_queue list reference */ > > struct list_head entry; > > @@ -76,6 +81,13 @@ struct ntb_queue_entry { > > void *buf; > > unsigned int len; > > unsigned int flags; > > + > > + struct ntb_transport_qp *qp; > > + union { > > + struct ntb_payload_header __iomem *tx_hdr; > > + struct ntb_payload_header *rx_hdr; > > + }; > > + unsigned int index; > > }; > > > > struct ntb_rx_info { > > @@ -86,6 +98,7 @@ struct ntb_transport_qp { > > struct ntb_transport *transport; > > struct ntb_device *ndev; > > void *cb_data; > > + struct dma_chan *dma_chan; > > > > bool client_ready; > > bool qp_link; > > @@ -99,6 +112,7 @@ struct ntb_transport_qp { > > struct list_head tx_free_q; > > spinlock_t ntb_tx_free_q_lock; > > void __iomem *tx_mw; > > + dma_addr_t tx_mw_raw; > > unsigned int tx_index; > > unsigned int tx_max_entry; > > unsigned int tx_max_frame; > > @@ -114,6 +128,7 @@ struct ntb_transport_qp { > > unsigned int rx_index; > > unsigned int rx_max_entry; > > unsigned int rx_max_frame; > > + dma_cookie_t last_cookie; > > > > void (*event_handler) (void *data, int status); > > struct delayed_work link_work; > > @@ -129,9 +144,14 @@ struct ntb_transport_qp { > > u64 rx_err_no_buf; > > u64 rx_err_oflow; > > u64 rx_err_ver; > > + u64 rx_memcpy; > > + u64 rx_async; > > u64 tx_bytes; > > u64 tx_pkts; > > u64 tx_ring_full; > > + u64 tx_err_no_buf; > > + u64 tx_memcpy; > > + u64 tx_async; > > }; > > > > struct ntb_transport_mw { > > @@ -381,7 +401,7 @@ static ssize_t debugfs_read(struct file *filp, char > > __user
Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On Fri, Aug 2, 2013 at 10:35 AM, Jon Mason wrote: > Allocate and use a DMA engine channel to transmit and receive data over > NTB. If none is allocated, fall back to using the CPU to transfer data. > > Cc: Dan Williams > Cc: Vinod Koul > Cc: Dave Jiang > Signed-off-by: Jon Mason > --- > drivers/ntb/ntb_hw.c| 17 +++ > drivers/ntb/ntb_hw.h|1 + > drivers/ntb/ntb_transport.c | 285 > --- > 3 files changed, 258 insertions(+), 45 deletions(-) > > diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c > index 1d8e551..014222c 100644 > --- a/drivers/ntb/ntb_hw.c > +++ b/drivers/ntb/ntb_hw.c > @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, > unsigned int idx, u32 *val) > } > > /** > + * ntb_get_mw_base() - get addr for the NTB memory window > + * @ndev: pointer to ntb_device instance > + * @mw: memory window number > + * > + * This function provides the base address of the memory window specified. > + * > + * RETURNS: address, or NULL on error. > + */ > +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) > +{ > + if (mw >= ntb_max_mw(ndev)) > + return 0; > + > + return pci_resource_start(ndev->pdev, MW_TO_BAR(mw)); > +} > + > +/** > * ntb_get_mw_vbase() - get virtual addr for the NTB memory window > * @ndev: pointer to ntb_device instance > * @mw: memory window number > diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h > index b03de80..ab5f768 100644 > --- a/drivers/ntb/ntb_hw.h > +++ b/drivers/ntb/ntb_hw.h > @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, > unsigned int idx, u32 val); > int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); > int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 > val); > int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 > *val); > +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); > void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); > u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); > void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); > diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c > index f7380e9..73a35e4 100644 > --- a/drivers/ntb/ntb_transport.c > +++ b/drivers/ntb/ntb_transport.c > @@ -47,6 +47,7 @@ > */ > #include > #include > +#include > #include > #include > #include > @@ -68,6 +69,10 @@ static unsigned char max_num_clients; > module_param(max_num_clients, byte, 0644); > MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients"); > > +static unsigned int copy_bytes = 1024; > +module_param(copy_bytes, uint, 0644); > +MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to > copy instead of DMA"); > + > struct ntb_queue_entry { > /* ntb_queue list reference */ > struct list_head entry; > @@ -76,6 +81,13 @@ struct ntb_queue_entry { > void *buf; > unsigned int len; > unsigned int flags; > + > + struct ntb_transport_qp *qp; > + union { > + struct ntb_payload_header __iomem *tx_hdr; > + struct ntb_payload_header *rx_hdr; > + }; > + unsigned int index; > }; > > struct ntb_rx_info { > @@ -86,6 +98,7 @@ struct ntb_transport_qp { > struct ntb_transport *transport; > struct ntb_device *ndev; > void *cb_data; > + struct dma_chan *dma_chan; > > bool client_ready; > bool qp_link; > @@ -99,6 +112,7 @@ struct ntb_transport_qp { > struct list_head tx_free_q; > spinlock_t ntb_tx_free_q_lock; > void __iomem *tx_mw; > + dma_addr_t tx_mw_raw; > unsigned int tx_index; > unsigned int tx_max_entry; > unsigned int tx_max_frame; > @@ -114,6 +128,7 @@ struct ntb_transport_qp { > unsigned int rx_index; > unsigned int rx_max_entry; > unsigned int rx_max_frame; > + dma_cookie_t last_cookie; > > void (*event_handler) (void *data, int status); > struct delayed_work link_work; > @@ -129,9 +144,14 @@ struct ntb_transport_qp { > u64 rx_err_no_buf; > u64 rx_err_oflow; > u64 rx_err_ver; > + u64 rx_memcpy; > + u64 rx_async; > u64 tx_bytes; > u64 tx_pkts; > u64 tx_ring_full; > + u64 tx_err_no_buf; > + u64 tx_memcpy; > + u64 tx_async; > }; > > struct ntb_transport_mw { > @@ -381,7 +401,7 @@ static ssize_t debugfs_read(struct file *filp, char > __user *ubuf, size_t count, > char *buf; > ssize_t ret, out_offset, out_count; > > - out_count = 600; > + out_count = 1000; > > buf = kmalloc(out_count, GFP_KERNEL); > if (!buf) > @@ -396,6 +416,10 @@ static ssize_t debugfs_read(struct file *filp, char > __user *ubuf, size_t count, > out_offset +=
Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On Fri, Aug 2, 2013 at 10:35 AM, Jon Mason wrote: > Allocate and use a DMA engine channel to transmit and receive data over > NTB. If none is allocated, fall back to using the CPU to transfer data. > > Cc: Dan Williams > Cc: Vinod Koul > Cc: Dave Jiang > Signed-off-by: Jon Mason > --- > drivers/ntb/ntb_hw.c| 17 +++ > drivers/ntb/ntb_hw.h|1 + > drivers/ntb/ntb_transport.c | 285 > --- > 3 files changed, 258 insertions(+), 45 deletions(-) > > diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c > index 1d8e551..014222c 100644 > --- a/drivers/ntb/ntb_hw.c > +++ b/drivers/ntb/ntb_hw.c > @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, > unsigned int idx, u32 *val) > } > > /** > + * ntb_get_mw_base() - get addr for the NTB memory window > + * @ndev: pointer to ntb_device instance > + * @mw: memory window number > + * > + * This function provides the base address of the memory window specified. > + * > + * RETURNS: address, or NULL on error. > + */ > +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) > +{ > + if (mw >= ntb_max_mw(ndev)) > + return 0; > + > + return pci_resource_start(ndev->pdev, MW_TO_BAR(mw)); > +} > + > +/** > * ntb_get_mw_vbase() - get virtual addr for the NTB memory window > * @ndev: pointer to ntb_device instance > * @mw: memory window number > diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h > index b03de80..ab5f768 100644 > --- a/drivers/ntb/ntb_hw.h > +++ b/drivers/ntb/ntb_hw.h > @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, > unsigned int idx, u32 val); > int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); > int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 > val); > int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 > *val); > +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); > void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); > u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); > void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); > diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c > index f7380e9..73a35e4 100644 > --- a/drivers/ntb/ntb_transport.c > +++ b/drivers/ntb/ntb_transport.c > @@ -47,6 +47,7 @@ > */ > #include > #include > +#include > #include > #include > #include > @@ -68,6 +69,10 @@ static unsigned char max_num_clients; > module_param(max_num_clients, byte, 0644); > MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients"); > > +static unsigned int copy_bytes = 1024; > +module_param(copy_bytes, uint, 0644); > +MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to > copy instead of DMA"); > + > struct ntb_queue_entry { > /* ntb_queue list reference */ > struct list_head entry; > @@ -76,6 +81,13 @@ struct ntb_queue_entry { > void *buf; > unsigned int len; > unsigned int flags; > + > + struct ntb_transport_qp *qp; > + union { > + struct ntb_payload_header __iomem *tx_hdr; > + struct ntb_payload_header *rx_hdr; > + }; > + unsigned int index; > }; > > struct ntb_rx_info { > @@ -86,6 +98,7 @@ struct ntb_transport_qp { > struct ntb_transport *transport; > struct ntb_device *ndev; > void *cb_data; > + struct dma_chan *dma_chan; > > bool client_ready; > bool qp_link; > @@ -99,6 +112,7 @@ struct ntb_transport_qp { > struct list_head tx_free_q; > spinlock_t ntb_tx_free_q_lock; > void __iomem *tx_mw; > + dma_addr_t tx_mw_raw; > unsigned int tx_index; > unsigned int tx_max_entry; > unsigned int tx_max_frame; > @@ -114,6 +128,7 @@ struct ntb_transport_qp { > unsigned int rx_index; > unsigned int rx_max_entry; > unsigned int rx_max_frame; > + dma_cookie_t last_cookie; > > void (*event_handler) (void *data, int status); > struct delayed_work link_work; > @@ -129,9 +144,14 @@ struct ntb_transport_qp { > u64 rx_err_no_buf; > u64 rx_err_oflow; > u64 rx_err_ver; > + u64 rx_memcpy; > + u64 rx_async; > u64 tx_bytes; > u64 tx_pkts; > u64 tx_ring_full; > + u64 tx_err_no_buf; > + u64 tx_memcpy; > + u64 tx_async; > }; > > struct ntb_transport_mw { > @@ -381,7 +401,7 @@ static ssize_t debugfs_read(struct file *filp, char > __user *ubuf, size_t count, > char *buf; > ssize_t ret, out_offset, out_count; > > - out_count = 600; > + out_count = 1000; > > buf = kmalloc(out_count, GFP_KERNEL); > if (!buf) > @@ -396,6 +416,10 @@ static ssize_t debugfs_read(struct file *filp, char > __user *ubuf, size_t count, > out_offset +=
Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On Fri, Aug 2, 2013 at 10:35 AM, Jon Mason jon.ma...@intel.com wrote: Allocate and use a DMA engine channel to transmit and receive data over NTB. If none is allocated, fall back to using the CPU to transfer data. Cc: Dan Williams d...@fb.com Cc: Vinod Koul vinod.k...@intel.com Cc: Dave Jiang dave.ji...@intel.com Signed-off-by: Jon Mason jon.ma...@intel.com --- drivers/ntb/ntb_hw.c| 17 +++ drivers/ntb/ntb_hw.h|1 + drivers/ntb/ntb_transport.c | 285 --- 3 files changed, 258 insertions(+), 45 deletions(-) diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c index 1d8e551..014222c 100644 --- a/drivers/ntb/ntb_hw.c +++ b/drivers/ntb/ntb_hw.c @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) } /** + * ntb_get_mw_base() - get addr for the NTB memory window + * @ndev: pointer to ntb_device instance + * @mw: memory window number + * + * This function provides the base address of the memory window specified. + * + * RETURNS: address, or NULL on error. + */ +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) +{ + if (mw = ntb_max_mw(ndev)) + return 0; + + return pci_resource_start(ndev-pdev, MW_TO_BAR(mw)); +} + +/** * ntb_get_mw_vbase() - get virtual addr for the NTB memory window * @ndev: pointer to ntb_device instance * @mw: memory window number diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h index b03de80..ab5f768 100644 --- a/drivers/ntb/ntb_hw.h +++ b/drivers/ntb/ntb_hw.h @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f7380e9..73a35e4 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -47,6 +47,7 @@ */ #include linux/debugfs.h #include linux/delay.h +#include linux/dmaengine.h #include linux/dma-mapping.h #include linux/errno.h #include linux/export.h @@ -68,6 +69,10 @@ static unsigned char max_num_clients; module_param(max_num_clients, byte, 0644); MODULE_PARM_DESC(max_num_clients, Maximum number of NTB transport clients); +static unsigned int copy_bytes = 1024; +module_param(copy_bytes, uint, 0644); +MODULE_PARM_DESC(copy_bytes, Threshold under which NTB will use the CPU to copy instead of DMA); + struct ntb_queue_entry { /* ntb_queue list reference */ struct list_head entry; @@ -76,6 +81,13 @@ struct ntb_queue_entry { void *buf; unsigned int len; unsigned int flags; + + struct ntb_transport_qp *qp; + union { + struct ntb_payload_header __iomem *tx_hdr; + struct ntb_payload_header *rx_hdr; + }; + unsigned int index; }; struct ntb_rx_info { @@ -86,6 +98,7 @@ struct ntb_transport_qp { struct ntb_transport *transport; struct ntb_device *ndev; void *cb_data; + struct dma_chan *dma_chan; bool client_ready; bool qp_link; @@ -99,6 +112,7 @@ struct ntb_transport_qp { struct list_head tx_free_q; spinlock_t ntb_tx_free_q_lock; void __iomem *tx_mw; + dma_addr_t tx_mw_raw; unsigned int tx_index; unsigned int tx_max_entry; unsigned int tx_max_frame; @@ -114,6 +128,7 @@ struct ntb_transport_qp { unsigned int rx_index; unsigned int rx_max_entry; unsigned int rx_max_frame; + dma_cookie_t last_cookie; void (*event_handler) (void *data, int status); struct delayed_work link_work; @@ -129,9 +144,14 @@ struct ntb_transport_qp { u64 rx_err_no_buf; u64 rx_err_oflow; u64 rx_err_ver; + u64 rx_memcpy; + u64 rx_async; u64 tx_bytes; u64 tx_pkts; u64 tx_ring_full; + u64 tx_err_no_buf; + u64 tx_memcpy; + u64 tx_async; }; struct ntb_transport_mw { @@ -381,7 +401,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, char *buf; ssize_t ret, out_offset, out_count; - out_count = 600; + out_count = 1000; buf = kmalloc(out_count, GFP_KERNEL); if (!buf) @@ -396,6 +416,10 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On Fri, Aug 2, 2013 at 10:35 AM, Jon Mason jon.ma...@intel.com wrote: Allocate and use a DMA engine channel to transmit and receive data over NTB. If none is allocated, fall back to using the CPU to transfer data. Cc: Dan Williams d...@fb.com Cc: Vinod Koul vinod.k...@intel.com Cc: Dave Jiang dave.ji...@intel.com Signed-off-by: Jon Mason jon.ma...@intel.com --- drivers/ntb/ntb_hw.c| 17 +++ drivers/ntb/ntb_hw.h|1 + drivers/ntb/ntb_transport.c | 285 --- 3 files changed, 258 insertions(+), 45 deletions(-) diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c index 1d8e551..014222c 100644 --- a/drivers/ntb/ntb_hw.c +++ b/drivers/ntb/ntb_hw.c @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) } /** + * ntb_get_mw_base() - get addr for the NTB memory window + * @ndev: pointer to ntb_device instance + * @mw: memory window number + * + * This function provides the base address of the memory window specified. + * + * RETURNS: address, or NULL on error. + */ +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) +{ + if (mw = ntb_max_mw(ndev)) + return 0; + + return pci_resource_start(ndev-pdev, MW_TO_BAR(mw)); +} + +/** * ntb_get_mw_vbase() - get virtual addr for the NTB memory window * @ndev: pointer to ntb_device instance * @mw: memory window number diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h index b03de80..ab5f768 100644 --- a/drivers/ntb/ntb_hw.h +++ b/drivers/ntb/ntb_hw.h @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f7380e9..73a35e4 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -47,6 +47,7 @@ */ #include linux/debugfs.h #include linux/delay.h +#include linux/dmaengine.h #include linux/dma-mapping.h #include linux/errno.h #include linux/export.h @@ -68,6 +69,10 @@ static unsigned char max_num_clients; module_param(max_num_clients, byte, 0644); MODULE_PARM_DESC(max_num_clients, Maximum number of NTB transport clients); +static unsigned int copy_bytes = 1024; +module_param(copy_bytes, uint, 0644); +MODULE_PARM_DESC(copy_bytes, Threshold under which NTB will use the CPU to copy instead of DMA); + struct ntb_queue_entry { /* ntb_queue list reference */ struct list_head entry; @@ -76,6 +81,13 @@ struct ntb_queue_entry { void *buf; unsigned int len; unsigned int flags; + + struct ntb_transport_qp *qp; + union { + struct ntb_payload_header __iomem *tx_hdr; + struct ntb_payload_header *rx_hdr; + }; + unsigned int index; }; struct ntb_rx_info { @@ -86,6 +98,7 @@ struct ntb_transport_qp { struct ntb_transport *transport; struct ntb_device *ndev; void *cb_data; + struct dma_chan *dma_chan; bool client_ready; bool qp_link; @@ -99,6 +112,7 @@ struct ntb_transport_qp { struct list_head tx_free_q; spinlock_t ntb_tx_free_q_lock; void __iomem *tx_mw; + dma_addr_t tx_mw_raw; unsigned int tx_index; unsigned int tx_max_entry; unsigned int tx_max_frame; @@ -114,6 +128,7 @@ struct ntb_transport_qp { unsigned int rx_index; unsigned int rx_max_entry; unsigned int rx_max_frame; + dma_cookie_t last_cookie; void (*event_handler) (void *data, int status); struct delayed_work link_work; @@ -129,9 +144,14 @@ struct ntb_transport_qp { u64 rx_err_no_buf; u64 rx_err_oflow; u64 rx_err_ver; + u64 rx_memcpy; + u64 rx_async; u64 tx_bytes; u64 tx_pkts; u64 tx_ring_full; + u64 tx_err_no_buf; + u64 tx_memcpy; + u64 tx_async; }; struct ntb_transport_mw { @@ -381,7 +401,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, char *buf; ssize_t ret, out_offset, out_count; - out_count = 600; + out_count = 1000; buf = kmalloc(out_count, GFP_KERNEL); if (!buf) @@ -396,6 +416,10 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On Mon, Aug 19, 2013 at 03:01:54AM -0700, Dan Williams wrote: On Fri, Aug 2, 2013 at 10:35 AM, Jon Mason jon.ma...@intel.com wrote: Allocate and use a DMA engine channel to transmit and receive data over NTB. If none is allocated, fall back to using the CPU to transfer data. Cc: Dan Williams d...@fb.com Cc: Vinod Koul vinod.k...@intel.com Cc: Dave Jiang dave.ji...@intel.com Signed-off-by: Jon Mason jon.ma...@intel.com --- drivers/ntb/ntb_hw.c| 17 +++ drivers/ntb/ntb_hw.h|1 + drivers/ntb/ntb_transport.c | 285 --- 3 files changed, 258 insertions(+), 45 deletions(-) diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c index 1d8e551..014222c 100644 --- a/drivers/ntb/ntb_hw.c +++ b/drivers/ntb/ntb_hw.c @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) } /** + * ntb_get_mw_base() - get addr for the NTB memory window + * @ndev: pointer to ntb_device instance + * @mw: memory window number + * + * This function provides the base address of the memory window specified. + * + * RETURNS: address, or NULL on error. + */ +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) +{ + if (mw = ntb_max_mw(ndev)) + return 0; + + return pci_resource_start(ndev-pdev, MW_TO_BAR(mw)); +} + +/** * ntb_get_mw_vbase() - get virtual addr for the NTB memory window * @ndev: pointer to ntb_device instance * @mw: memory window number diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h index b03de80..ab5f768 100644 --- a/drivers/ntb/ntb_hw.h +++ b/drivers/ntb/ntb_hw.h @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f7380e9..73a35e4 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -47,6 +47,7 @@ */ #include linux/debugfs.h #include linux/delay.h +#include linux/dmaengine.h #include linux/dma-mapping.h #include linux/errno.h #include linux/export.h @@ -68,6 +69,10 @@ static unsigned char max_num_clients; module_param(max_num_clients, byte, 0644); MODULE_PARM_DESC(max_num_clients, Maximum number of NTB transport clients); +static unsigned int copy_bytes = 1024; +module_param(copy_bytes, uint, 0644); +MODULE_PARM_DESC(copy_bytes, Threshold under which NTB will use the CPU to copy instead of DMA); + struct ntb_queue_entry { /* ntb_queue list reference */ struct list_head entry; @@ -76,6 +81,13 @@ struct ntb_queue_entry { void *buf; unsigned int len; unsigned int flags; + + struct ntb_transport_qp *qp; + union { + struct ntb_payload_header __iomem *tx_hdr; + struct ntb_payload_header *rx_hdr; + }; + unsigned int index; }; struct ntb_rx_info { @@ -86,6 +98,7 @@ struct ntb_transport_qp { struct ntb_transport *transport; struct ntb_device *ndev; void *cb_data; + struct dma_chan *dma_chan; bool client_ready; bool qp_link; @@ -99,6 +112,7 @@ struct ntb_transport_qp { struct list_head tx_free_q; spinlock_t ntb_tx_free_q_lock; void __iomem *tx_mw; + dma_addr_t tx_mw_raw; unsigned int tx_index; unsigned int tx_max_entry; unsigned int tx_max_frame; @@ -114,6 +128,7 @@ struct ntb_transport_qp { unsigned int rx_index; unsigned int rx_max_entry; unsigned int rx_max_frame; + dma_cookie_t last_cookie; void (*event_handler) (void *data, int status); struct delayed_work link_work; @@ -129,9 +144,14 @@ struct ntb_transport_qp { u64 rx_err_no_buf; u64 rx_err_oflow; u64 rx_err_ver; + u64 rx_memcpy; + u64 rx_async; u64 tx_bytes; u64 tx_pkts; u64 tx_ring_full; + u64 tx_err_no_buf; + u64 tx_memcpy; + u64 tx_async; }; struct ntb_transport_mw { @@ -381,7 +401,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, char *buf; ssize_t ret, out_offset, out_count; -
Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On 8/19/13 1:37 PM, Jon Mason jon.ma...@intel.com wrote: On Mon, Aug 19, 2013 at 03:01:54AM -0700, Dan Williams wrote: On Fri, Aug 2, 2013 at 10:35 AM, Jon Mason jon.ma...@intel.com wrote: Allocate and use a DMA engine channel to transmit and receive data over NTB. If none is allocated, fall back to using the CPU to transfer data. Cc: Dan Williams d...@fb.com Cc: Vinod Koul vinod.k...@intel.com Cc: Dave Jiang dave.ji...@intel.com Signed-off-by: Jon Mason jon.ma...@intel.com --- drivers/ntb/ntb_hw.c| 17 +++ drivers/ntb/ntb_hw.h|1 + drivers/ntb/ntb_transport.c | 285 --- 3 files changed, 258 insertions(+), 45 deletions(-) diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c index 1d8e551..014222c 100644 --- a/drivers/ntb/ntb_hw.c +++ b/drivers/ntb/ntb_hw.c @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) } /** + * ntb_get_mw_base() - get addr for the NTB memory window + * @ndev: pointer to ntb_device instance + * @mw: memory window number + * + * This function provides the base address of the memory window specified. + * + * RETURNS: address, or NULL on error. + */ +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) +{ + if (mw = ntb_max_mw(ndev)) + return 0; + + return pci_resource_start(ndev-pdev, MW_TO_BAR(mw)); +} Nothing does error checking on this return value. I think the code should either be sure that Œmw' is valid (mw_num is passed to the ntb_get_mw_vbase helper too) and delete the check, or at least make it a WARN_ONCE. The former seems a tad cleaner to me. + +/** * ntb_get_mw_vbase() - get virtual addr for the NTB memory window * @ndev: pointer to ntb_device instance * @mw: memory window number diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h index b03de80..ab5f768 100644 --- a/drivers/ntb/ntb_hw.h +++ b/drivers/ntb/ntb_hw.h @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f7380e9..73a35e4 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -47,6 +47,7 @@ */ #include linux/debugfs.h #include linux/delay.h +#include linux/dmaengine.h #include linux/dma-mapping.h #include linux/errno.h #include linux/export.h @@ -68,6 +69,10 @@ static unsigned char max_num_clients; module_param(max_num_clients, byte, 0644); MODULE_PARM_DESC(max_num_clients, Maximum number of NTB transport clients); +static unsigned int copy_bytes = 1024; +module_param(copy_bytes, uint, 0644); +MODULE_PARM_DESC(copy_bytes, Threshold under which NTB will use the CPU to copy instead of DMA); + struct ntb_queue_entry { /* ntb_queue list reference */ struct list_head entry; @@ -76,6 +81,13 @@ struct ntb_queue_entry { void *buf; unsigned int len; unsigned int flags; + + struct ntb_transport_qp *qp; + union { + struct ntb_payload_header __iomem *tx_hdr; + struct ntb_payload_header *rx_hdr; + }; + unsigned int index; }; struct ntb_rx_info { @@ -86,6 +98,7 @@ struct ntb_transport_qp { struct ntb_transport *transport; struct ntb_device *ndev; void *cb_data; + struct dma_chan *dma_chan; bool client_ready; bool qp_link; @@ -99,6 +112,7 @@ struct ntb_transport_qp { struct list_head tx_free_q; spinlock_t ntb_tx_free_q_lock; void __iomem *tx_mw; + dma_addr_t tx_mw_raw; unsigned int tx_index; unsigned int tx_max_entry; unsigned int tx_max_frame; @@ -114,6 +128,7 @@ struct ntb_transport_qp { unsigned int rx_index; unsigned int rx_max_entry; unsigned int rx_max_frame; + dma_cookie_t last_cookie; void (*event_handler) (void *data, int status); struct delayed_work link_work; @@ -129,9 +144,14 @@ struct ntb_transport_qp { u64 rx_err_no_buf; u64 rx_err_oflow; u64 rx_err_ver; + u64 rx_memcpy; + u64 rx_async; u64 tx_bytes; u64 tx_pkts; u64 tx_ring_full; +
Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On Mon, Aug 19, 2013 at 11:36:13PM +, Dan Williams wrote: On 8/19/13 1:37 PM, Jon Mason jon.ma...@intel.com wrote: On Mon, Aug 19, 2013 at 03:01:54AM -0700, Dan Williams wrote: On Fri, Aug 2, 2013 at 10:35 AM, Jon Mason jon.ma...@intel.com wrote: Allocate and use a DMA engine channel to transmit and receive data over NTB. If none is allocated, fall back to using the CPU to transfer data. Cc: Dan Williams d...@fb.com Cc: Vinod Koul vinod.k...@intel.com Cc: Dave Jiang dave.ji...@intel.com Signed-off-by: Jon Mason jon.ma...@intel.com --- drivers/ntb/ntb_hw.c| 17 +++ drivers/ntb/ntb_hw.h|1 + drivers/ntb/ntb_transport.c | 285 --- 3 files changed, 258 insertions(+), 45 deletions(-) diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c index 1d8e551..014222c 100644 --- a/drivers/ntb/ntb_hw.c +++ b/drivers/ntb/ntb_hw.c @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) } /** + * ntb_get_mw_base() - get addr for the NTB memory window + * @ndev: pointer to ntb_device instance + * @mw: memory window number + * + * This function provides the base address of the memory window specified. + * + * RETURNS: address, or NULL on error. + */ +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) +{ + if (mw = ntb_max_mw(ndev)) + return 0; + + return pci_resource_start(ndev-pdev, MW_TO_BAR(mw)); +} Nothing does error checking on this return value. I think the code should either be sure that Œmw' is valid (mw_num is passed to the ntb_get_mw_vbase helper too) and delete the check, or at least make it a WARN_ONCE. The former seems a tad cleaner to me. Ugh! Thanks. + +/** * ntb_get_mw_vbase() - get virtual addr for the NTB memory window * @ndev: pointer to ntb_device instance * @mw: memory window number diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h index b03de80..ab5f768 100644 --- a/drivers/ntb/ntb_hw.h +++ b/drivers/ntb/ntb_hw.h @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f7380e9..73a35e4 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -47,6 +47,7 @@ */ #include linux/debugfs.h #include linux/delay.h +#include linux/dmaengine.h #include linux/dma-mapping.h #include linux/errno.h #include linux/export.h @@ -68,6 +69,10 @@ static unsigned char max_num_clients; module_param(max_num_clients, byte, 0644); MODULE_PARM_DESC(max_num_clients, Maximum number of NTB transport clients); +static unsigned int copy_bytes = 1024; +module_param(copy_bytes, uint, 0644); +MODULE_PARM_DESC(copy_bytes, Threshold under which NTB will use the CPU to copy instead of DMA); + struct ntb_queue_entry { /* ntb_queue list reference */ struct list_head entry; @@ -76,6 +81,13 @@ struct ntb_queue_entry { void *buf; unsigned int len; unsigned int flags; + + struct ntb_transport_qp *qp; + union { + struct ntb_payload_header __iomem *tx_hdr; + struct ntb_payload_header *rx_hdr; + }; + unsigned int index; }; struct ntb_rx_info { @@ -86,6 +98,7 @@ struct ntb_transport_qp { struct ntb_transport *transport; struct ntb_device *ndev; void *cb_data; + struct dma_chan *dma_chan; bool client_ready; bool qp_link; @@ -99,6 +112,7 @@ struct ntb_transport_qp { struct list_head tx_free_q; spinlock_t ntb_tx_free_q_lock; void __iomem *tx_mw; + dma_addr_t tx_mw_raw; unsigned int tx_index; unsigned int tx_max_entry; unsigned int tx_max_frame; @@ -114,6 +128,7 @@ struct ntb_transport_qp { unsigned int rx_index; unsigned int rx_max_entry; unsigned int rx_max_frame; + dma_cookie_t last_cookie; void (*event_handler) (void *data, int status); struct delayed_work link_work; @@ -129,9 +144,14 @@ struct
Re: [PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
On 8/19/13 5:07 PM, Jon Mason jon.ma...@intel.com wrote: Is this for the case where we are bouncing back and forth between sync/async? Otherwise I do not see how transactions could get out of order given you allocate a channel once per queue. Is this comment saying that the iowrite32 is somehow a fix, or is this comment a FIXME? There is a case for a mix, the copy_bytes variable above switches to CPU for small transfers (which greatly increases throughput on small transfers). The caveat to it is the need to flush the DMA engine to prevent out-of-order. This comment is mainly an reminder of this issue. So this is going forward with the stall as a known issue? The next patch should just do the sync to prevent the re-ordering, right? There is already a dma_sync_wait in the error path of ntb_async_rx to enforce the ordering. Do I need to change the comment (or move it) to make it more obvious what is happening? Yeah, I think it just needs to move to the dma_sync_wait() otherwise it seems like it¹s an open issue that needs fixing. + txd-callback = ntb_rx_copy_callback; + txd-callback_param = entry; + + cookie = dmaengine_submit(txd); + if (dma_submit_error(cookie)) + goto err3; + + qp-last_cookie = cookie; + + dma_async_issue_pending(chan); hmm... can this go in ntb_process_rx() so that the submission is batched? Cuts down on mmio. I moved it down to ntb_transport_rx (after the calls to ntb_process_rxc), and the performance seems to be roughly the same. Yeah, not expecting it to be noticeable, but conceptually submit submit submit submit issue Is nicer than: submit issue submit issue I agree, but I liked having all the dma engine awareness compartmentalized in the ntb_async_* and callbacks. Ok, makes sense. -- Dan -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
Allocate and use a DMA engine channel to transmit and receive data over NTB. If none is allocated, fall back to using the CPU to transfer data. Cc: Dan Williams Cc: Vinod Koul Cc: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_hw.c| 17 +++ drivers/ntb/ntb_hw.h|1 + drivers/ntb/ntb_transport.c | 285 --- 3 files changed, 258 insertions(+), 45 deletions(-) diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c index 1d8e551..014222c 100644 --- a/drivers/ntb/ntb_hw.c +++ b/drivers/ntb/ntb_hw.c @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) } /** + * ntb_get_mw_base() - get addr for the NTB memory window + * @ndev: pointer to ntb_device instance + * @mw: memory window number + * + * This function provides the base address of the memory window specified. + * + * RETURNS: address, or NULL on error. + */ +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) +{ + if (mw >= ntb_max_mw(ndev)) + return 0; + + return pci_resource_start(ndev->pdev, MW_TO_BAR(mw)); +} + +/** * ntb_get_mw_vbase() - get virtual addr for the NTB memory window * @ndev: pointer to ntb_device instance * @mw: memory window number diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h index b03de80..ab5f768 100644 --- a/drivers/ntb/ntb_hw.h +++ b/drivers/ntb/ntb_hw.h @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f7380e9..73a35e4 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -47,6 +47,7 @@ */ #include #include +#include #include #include #include @@ -68,6 +69,10 @@ static unsigned char max_num_clients; module_param(max_num_clients, byte, 0644); MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients"); +static unsigned int copy_bytes = 1024; +module_param(copy_bytes, uint, 0644); +MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to copy instead of DMA"); + struct ntb_queue_entry { /* ntb_queue list reference */ struct list_head entry; @@ -76,6 +81,13 @@ struct ntb_queue_entry { void *buf; unsigned int len; unsigned int flags; + + struct ntb_transport_qp *qp; + union { + struct ntb_payload_header __iomem *tx_hdr; + struct ntb_payload_header *rx_hdr; + }; + unsigned int index; }; struct ntb_rx_info { @@ -86,6 +98,7 @@ struct ntb_transport_qp { struct ntb_transport *transport; struct ntb_device *ndev; void *cb_data; + struct dma_chan *dma_chan; bool client_ready; bool qp_link; @@ -99,6 +112,7 @@ struct ntb_transport_qp { struct list_head tx_free_q; spinlock_t ntb_tx_free_q_lock; void __iomem *tx_mw; + dma_addr_t tx_mw_raw; unsigned int tx_index; unsigned int tx_max_entry; unsigned int tx_max_frame; @@ -114,6 +128,7 @@ struct ntb_transport_qp { unsigned int rx_index; unsigned int rx_max_entry; unsigned int rx_max_frame; + dma_cookie_t last_cookie; void (*event_handler) (void *data, int status); struct delayed_work link_work; @@ -129,9 +144,14 @@ struct ntb_transport_qp { u64 rx_err_no_buf; u64 rx_err_oflow; u64 rx_err_ver; + u64 rx_memcpy; + u64 rx_async; u64 tx_bytes; u64 tx_pkts; u64 tx_ring_full; + u64 tx_err_no_buf; + u64 tx_memcpy; + u64 tx_async; }; struct ntb_transport_mw { @@ -381,7 +401,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, char *buf; ssize_t ret, out_offset, out_count; - out_count = 600; + out_count = 1000; buf = kmalloc(out_count, GFP_KERNEL); if (!buf) @@ -396,6 +416,10 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_pkts - \t%llu\n", qp->rx_pkts); out_offset += snprintf(buf + out_offset, out_count - out_offset, + "rx_memcpy - \t%llu\n", qp->rx_memcpy); + out_offset += snprintf(buf + out_offset, out_count - out_offset,
[PATCH 09/15] NTB: Use DMA Engine to Transmit and Receive
Allocate and use a DMA engine channel to transmit and receive data over NTB. If none is allocated, fall back to using the CPU to transfer data. Cc: Dan Williams d...@fb.com Cc: Vinod Koul vinod.k...@intel.com Cc: Dave Jiang dave.ji...@intel.com Signed-off-by: Jon Mason jon.ma...@intel.com --- drivers/ntb/ntb_hw.c| 17 +++ drivers/ntb/ntb_hw.h|1 + drivers/ntb/ntb_transport.c | 285 --- 3 files changed, 258 insertions(+), 45 deletions(-) diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c index 1d8e551..014222c 100644 --- a/drivers/ntb/ntb_hw.c +++ b/drivers/ntb/ntb_hw.c @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) } /** + * ntb_get_mw_base() - get addr for the NTB memory window + * @ndev: pointer to ntb_device instance + * @mw: memory window number + * + * This function provides the base address of the memory window specified. + * + * RETURNS: address, or NULL on error. + */ +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) +{ + if (mw = ntb_max_mw(ndev)) + return 0; + + return pci_resource_start(ndev-pdev, MW_TO_BAR(mw)); +} + +/** * ntb_get_mw_vbase() - get virtual addr for the NTB memory window * @ndev: pointer to ntb_device instance * @mw: memory window number diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h index b03de80..ab5f768 100644 --- a/drivers/ntb/ntb_hw.h +++ b/drivers/ntb/ntb_hw.h @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); +resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f7380e9..73a35e4 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -47,6 +47,7 @@ */ #include linux/debugfs.h #include linux/delay.h +#include linux/dmaengine.h #include linux/dma-mapping.h #include linux/errno.h #include linux/export.h @@ -68,6 +69,10 @@ static unsigned char max_num_clients; module_param(max_num_clients, byte, 0644); MODULE_PARM_DESC(max_num_clients, Maximum number of NTB transport clients); +static unsigned int copy_bytes = 1024; +module_param(copy_bytes, uint, 0644); +MODULE_PARM_DESC(copy_bytes, Threshold under which NTB will use the CPU to copy instead of DMA); + struct ntb_queue_entry { /* ntb_queue list reference */ struct list_head entry; @@ -76,6 +81,13 @@ struct ntb_queue_entry { void *buf; unsigned int len; unsigned int flags; + + struct ntb_transport_qp *qp; + union { + struct ntb_payload_header __iomem *tx_hdr; + struct ntb_payload_header *rx_hdr; + }; + unsigned int index; }; struct ntb_rx_info { @@ -86,6 +98,7 @@ struct ntb_transport_qp { struct ntb_transport *transport; struct ntb_device *ndev; void *cb_data; + struct dma_chan *dma_chan; bool client_ready; bool qp_link; @@ -99,6 +112,7 @@ struct ntb_transport_qp { struct list_head tx_free_q; spinlock_t ntb_tx_free_q_lock; void __iomem *tx_mw; + dma_addr_t tx_mw_raw; unsigned int tx_index; unsigned int tx_max_entry; unsigned int tx_max_frame; @@ -114,6 +128,7 @@ struct ntb_transport_qp { unsigned int rx_index; unsigned int rx_max_entry; unsigned int rx_max_frame; + dma_cookie_t last_cookie; void (*event_handler) (void *data, int status); struct delayed_work link_work; @@ -129,9 +144,14 @@ struct ntb_transport_qp { u64 rx_err_no_buf; u64 rx_err_oflow; u64 rx_err_ver; + u64 rx_memcpy; + u64 rx_async; u64 tx_bytes; u64 tx_pkts; u64 tx_ring_full; + u64 tx_err_no_buf; + u64 tx_memcpy; + u64 tx_async; }; struct ntb_transport_mw { @@ -381,7 +401,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, char *buf; ssize_t ret, out_offset, out_count; - out_count = 600; + out_count = 1000; buf = kmalloc(out_count, GFP_KERNEL); if (!buf) @@ -396,6 +416,10 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, rx_pkts - \t%llu\n, qp-rx_pkts); out_offset += snprintf(buf + out_offset, out_count -