Re: [PATCH] xhci: Don't add NOP TRB for aligned bulk sg transfers

2014-01-27 Thread Sarah Sharp
It helps if you Cc testers on your patches.  Adding Jérôme, please test.

Sarah Sharp

On Thu, Jan 23, 2014 at 01:42:28PM +, David Laight wrote:
> Transfer requests for usb disks can contain a large number of 4k fragments.
> Assume that it doesn't matter if there are LINK TRB in fragment lists
> that are 1k aligned.
> 
> This should stop errors when transfer requests for usb disks contain
> more fragments that will fit in a ring segment.
> 
> Signed-off-by: David Laight 
> ---
> Note that this patch just allows large numbers of aligned fragments for
> bulk scatter-gather transfers.
> It doesn't remove the need for the patch that changes the way the ownership
> of the the first TRB is set in order to avoid the controller seeing 'dangling'
> LINK TRBs.
> However it does reduce the likelyhood of dangling LINK TRBs, so might seem to
> make the other patch unnecessary.
> 
> I've only compile tested it - but it should be fine.
> 
>  drivers/usb/host/xhci-ring.c | 49 
> ++--
>  1 file changed, 38 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
> index a0b248c..7a4efd2 100644
> --- a/drivers/usb/host/xhci-ring.c
> +++ b/drivers/usb/host/xhci-ring.c
> @@ -2932,7 +2932,8 @@ static void queue_trb(struct xhci_hcd *xhci, struct 
> xhci_ring *ring,
>   * FIXME allocate segments if the ring is full.
>   */
>  static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
> - u32 ep_state, unsigned int num_trbs, gfp_t mem_flags)
> + u32 ep_state, unsigned int num_trbs, gfp_t mem_flags,
> + bool aligned_xfer)
>  {
>   unsigned int num_trbs_needed;
>  
> @@ -2980,6 +2981,9 @@ static int prepare_ring(struct xhci_hcd *xhci, struct 
> xhci_ring *ep_ring,
>* Simplest solution is to fill the trb before the
>* LINK with nop commands.
>*/
> + if (aligned_xfer)
> + /* Caller says buffer is aligned */
> + break;
>   if (num_trbs == 1 || num_trbs <= usable || usable == 0)
>   break;
>  
> @@ -3073,7 +3077,8 @@ static int prepare_transfer(struct xhci_hcd *xhci,
>   unsigned int num_trbs,
>   struct urb *urb,
>   unsigned int td_index,
> - gfp_t mem_flags)
> + gfp_t mem_flags,
> + bool aligned_xfer)
>  {
>   int ret;
>   struct urb_priv *urb_priv;
> @@ -3090,7 +3095,7 @@ static int prepare_transfer(struct xhci_hcd *xhci,
>  
>   ret = prepare_ring(xhci, ep_ring,
>  le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK,
> -num_trbs, mem_flags);
> +num_trbs, mem_flags, aligned_xfer);
>   if (ret)
>   return ret;
>  
> @@ -3117,7 +3122,8 @@ static int prepare_transfer(struct xhci_hcd *xhci,
>   return 0;
>  }
>  
> -static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb 
> *urb)
> +static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb 
> *urb,
> + bool *aligned_xfer)
>  {
>   int num_sgs, num_trbs, running_total, temp, i;
>   struct scatterlist *sg;
> @@ -3125,11 +3131,30 @@ static unsigned int count_sg_trbs_needed(struct 
> xhci_hcd *xhci, struct urb *urb)
>   sg = NULL;
>   num_sgs = urb->num_mapped_sgs;
>   temp = urb->transfer_buffer_length;
> + *aligned_xfer = true;
>  
>   num_trbs = 0;
>   for_each_sg(urb->sg, sg, num_sgs, i) {
>   unsigned int len = sg_dma_len(sg);
>  
> + /*
> +  * The 1.0 spec says LINK TRB are only allowed at data offsets
> +  * that are multiples of the transfer burst size.
> +  * While the burst size might be 16k the effect is probably
> +  * that the transfer is split. In particular a non-full-length
> +  * data block might be sent - terminating a bulk transfer.
> +  *
> +  * prepare_ring() ensures that the data TRB don't cross a LINK,
> +  * but for usb disks we see large numbers of fragments of 4k
> +  * and that exceeds the size of each ring segment.
> +  *
> +  * If all the fragments start and end on 1k boundaries tell
> +  * prepare_ring() not to worry about LINK TRB.
> +  */
> + if ((sg_dma_address(sg) | sg_dma_len(sg)) & 1023)
> + /* Fragment not 1k aligned */
> + *aligned_xfer = false;
> +
>   /* Scatter gather list entries may cross 64KB boundaries */
>   running_total = TRB_MAX_BUFF_SIZE -
>   (sg_dma_address(sg) & (TRB_MAX_BUFF_SIZE - 1));
> @@ -3284,6 +3309,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, 
> gfp_t mem_flags,
>   b

RE: [PATCH] xhci: Don't add NOP TRB for aligned bulk sg transfers

2014-01-24 Thread David Laight
From: David Laight
> Transfer requests for usb disks can contain a large number of 4k fragments.
> Assume that it doesn't matter if there are LINK TRB in fragment lists
> that are 1k aligned.
> 
> This should stop errors when transfer requests for usb disks contain
> more fragments that will fit in a ring segment.
> 
> Signed-off-by: David Laight 
> ---
> Note that this patch just allows large numbers of aligned fragments for
> bulk scatter-gather transfers.
> It doesn't remove the need for the patch that changes the way the ownership
> of the the first TRB is set in order to avoid the controller seeing 'dangling'
> LINK TRBs.
> However it does reduce the likelyhood of dangling LINK TRBs, so might seem to
> make the other patch unnecessary.
> 
> I've only compile tested it - but it should be fine.

Tested with aligned transfers for a usb memory stick and unaligned
ones for ethernet.

I failed to generate more than 30 sg entries.
Interestingly the disk transfers (eg from dd bs=1M) are pairs of
122880 (15 * 8192) and 8192 bytes. Seems suboptimal!

David



--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] xhci: Don't add NOP TRB for aligned bulk sg transfers

2014-01-23 Thread David Laight
Transfer requests for usb disks can contain a large number of 4k fragments.
Assume that it doesn't matter if there are LINK TRB in fragment lists
that are 1k aligned.

This should stop errors when transfer requests for usb disks contain
more fragments that will fit in a ring segment.

Signed-off-by: David Laight 
---
Note that this patch just allows large numbers of aligned fragments for
bulk scatter-gather transfers.
It doesn't remove the need for the patch that changes the way the ownership
of the the first TRB is set in order to avoid the controller seeing 'dangling'
LINK TRBs.
However it does reduce the likelyhood of dangling LINK TRBs, so might seem to
make the other patch unnecessary.

I've only compile tested it - but it should be fine.

 drivers/usb/host/xhci-ring.c | 49 ++--
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index a0b248c..7a4efd2 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2932,7 +2932,8 @@ static void queue_trb(struct xhci_hcd *xhci, struct 
xhci_ring *ring,
  * FIXME allocate segments if the ring is full.
  */
 static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
-   u32 ep_state, unsigned int num_trbs, gfp_t mem_flags)
+   u32 ep_state, unsigned int num_trbs, gfp_t mem_flags,
+   bool aligned_xfer)
 {
unsigned int num_trbs_needed;
 
@@ -2980,6 +2981,9 @@ static int prepare_ring(struct xhci_hcd *xhci, struct 
xhci_ring *ep_ring,
 * Simplest solution is to fill the trb before the
 * LINK with nop commands.
 */
+   if (aligned_xfer)
+   /* Caller says buffer is aligned */
+   break;
if (num_trbs == 1 || num_trbs <= usable || usable == 0)
break;
 
@@ -3073,7 +3077,8 @@ static int prepare_transfer(struct xhci_hcd *xhci,
unsigned int num_trbs,
struct urb *urb,
unsigned int td_index,
-   gfp_t mem_flags)
+   gfp_t mem_flags,
+   bool aligned_xfer)
 {
int ret;
struct urb_priv *urb_priv;
@@ -3090,7 +3095,7 @@ static int prepare_transfer(struct xhci_hcd *xhci,
 
ret = prepare_ring(xhci, ep_ring,
   le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK,
-  num_trbs, mem_flags);
+  num_trbs, mem_flags, aligned_xfer);
if (ret)
return ret;
 
@@ -3117,7 +3122,8 @@ static int prepare_transfer(struct xhci_hcd *xhci,
return 0;
 }
 
-static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb 
*urb)
+static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb 
*urb,
+   bool *aligned_xfer)
 {
int num_sgs, num_trbs, running_total, temp, i;
struct scatterlist *sg;
@@ -3125,11 +3131,30 @@ static unsigned int count_sg_trbs_needed(struct 
xhci_hcd *xhci, struct urb *urb)
sg = NULL;
num_sgs = urb->num_mapped_sgs;
temp = urb->transfer_buffer_length;
+   *aligned_xfer = true;
 
num_trbs = 0;
for_each_sg(urb->sg, sg, num_sgs, i) {
unsigned int len = sg_dma_len(sg);
 
+   /*
+* The 1.0 spec says LINK TRB are only allowed at data offsets
+* that are multiples of the transfer burst size.
+* While the burst size might be 16k the effect is probably
+* that the transfer is split. In particular a non-full-length
+* data block might be sent - terminating a bulk transfer.
+*
+* prepare_ring() ensures that the data TRB don't cross a LINK,
+* but for usb disks we see large numbers of fragments of 4k
+* and that exceeds the size of each ring segment.
+*
+* If all the fragments start and end on 1k boundaries tell
+* prepare_ring() not to worry about LINK TRB.
+*/
+   if ((sg_dma_address(sg) | sg_dma_len(sg)) & 1023)
+   /* Fragment not 1k aligned */
+   *aligned_xfer = false;
+
/* Scatter gather list entries may cross 64KB boundaries */
running_total = TRB_MAX_BUFF_SIZE -
(sg_dma_address(sg) & (TRB_MAX_BUFF_SIZE - 1));
@@ -3284,6 +3309,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t 
mem_flags,
bool first_trb;
u64 addr;
bool more_trbs_coming;
+   bool aligned_xfer;
 
struct xhci_generic_trb *start_trb;
int start_cycle;
@@ -3292,14 +3318,14 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, 
gfp_t mem_flags,
i