RE: [PATCH v2 2/7] iommu/vt-d: Multiple descriptors per qi_submit_sync()

2020-04-15 Thread Tian, Kevin
> From: Lu Baolu 
> Sent: Wednesday, April 15, 2020 4:30 PM
> 
> On 2020/4/15 16:18, Tian, Kevin wrote:
> >> From: Lu Baolu
> >> Sent: Wednesday, April 15, 2020 1:26 PM
> >>
> >> Extend qi_submit_sync() function to support multiple descriptors.
> >>
> >> Signed-off-by: Jacob Pan
> >> Signed-off-by: Lu Baolu
> >> ---
> >>   drivers/iommu/dmar.c| 39 +++--
> >>   include/linux/intel-iommu.h |  1 +
> >>   2 files changed, 25 insertions(+), 15 deletions(-)
> >>
> >> diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
> >> index bb42177e2369..61d049e91f84 100644
> >> --- a/drivers/iommu/dmar.c
> >> +++ b/drivers/iommu/dmar.c
> >> @@ -1157,12 +1157,11 @@ static inline void reclaim_free_desc(struct
> >> q_inval *qi)
> >>}
> >>   }
> >>
> >> -static int qi_check_fault(struct intel_iommu *iommu, int index)
> >> +static int qi_check_fault(struct intel_iommu *iommu, int index, int
> >> wait_index)
> >>   {
> >>u32 fault;
> >>int head, tail;
> >>struct q_inval *qi = iommu->qi;
> >> -  int wait_index = (index + 1) % QI_LENGTH;
> >>int shift = qi_shift(iommu);
> >>
> >>if (qi->desc_status[wait_index] == QI_ABORT)
> >> @@ -1234,12 +1233,12 @@ static int qi_check_fault(struct intel_iommu
> >> *iommu, int index)
> >>   int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
> >>   unsigned int count, unsigned long options)
> >>   {
> >> -  int rc;
> >>struct q_inval *qi = iommu->qi;
> >> -  int offset, shift, length;
> >>struct qi_desc wait_desc;
> >>int wait_index, index;
> >>unsigned long flags;
> >> +  int offset, shift;
> >> +  int rc, i;
> >>
> >>if (!qi)
> >>return 0;
> >> @@ -1248,32 +1247,41 @@ int qi_submit_sync(struct intel_iommu
> *iommu,
> >> struct qi_desc *desc,
> >>rc = 0;
> >>
> >>raw_spin_lock_irqsave(>q_lock, flags);
> >> -  while (qi->free_cnt < 3) {
> >> +  /*
> >> +   * Check if we have enough empty slots in the queue to submit,
> >> +   * the calculation is based on:
> >> +   * # of desc + 1 wait desc + 1 space between head and tail
> >> +   */
> >> +  while (qi->free_cnt < count + 2) {
> >>raw_spin_unlock_irqrestore(>q_lock, flags);
> >>cpu_relax();
> >>raw_spin_lock_irqsave(>q_lock, flags);
> >>}
> >>
> >>index = qi->free_head;
> >> -  wait_index = (index + 1) % QI_LENGTH;
> >> +  wait_index = (index + count) % QI_LENGTH;
> >>shift = qi_shift(iommu);
> >> -  length = 1 << shift;
> >>
> >> -  qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
> >> +  for (i = 0; i < count; i++) {
> >> +  offset = ((index + i) % QI_LENGTH) << shift;
> >> +  memcpy(qi->desc + offset, [i], 1 << shift);
> >> +  qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
> >> +  }
> > what about doing one memcpy and leave the loop only for updating
> > qi status?
> >
> 
> One memcpy might cross the table boundary.
> 

Thanks. you are right.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 2/7] iommu/vt-d: Multiple descriptors per qi_submit_sync()

2020-04-15 Thread Lu Baolu

On 2020/4/15 16:18, Tian, Kevin wrote:

From: Lu Baolu
Sent: Wednesday, April 15, 2020 1:26 PM

Extend qi_submit_sync() function to support multiple descriptors.

Signed-off-by: Jacob Pan
Signed-off-by: Lu Baolu
---
  drivers/iommu/dmar.c| 39 +++--
  include/linux/intel-iommu.h |  1 +
  2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index bb42177e2369..61d049e91f84 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1157,12 +1157,11 @@ static inline void reclaim_free_desc(struct
q_inval *qi)
}
  }

-static int qi_check_fault(struct intel_iommu *iommu, int index)
+static int qi_check_fault(struct intel_iommu *iommu, int index, int
wait_index)
  {
u32 fault;
int head, tail;
struct q_inval *qi = iommu->qi;
-   int wait_index = (index + 1) % QI_LENGTH;
int shift = qi_shift(iommu);

if (qi->desc_status[wait_index] == QI_ABORT)
@@ -1234,12 +1233,12 @@ static int qi_check_fault(struct intel_iommu
*iommu, int index)
  int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
   unsigned int count, unsigned long options)
  {
-   int rc;
struct q_inval *qi = iommu->qi;
-   int offset, shift, length;
struct qi_desc wait_desc;
int wait_index, index;
unsigned long flags;
+   int offset, shift;
+   int rc, i;

if (!qi)
return 0;
@@ -1248,32 +1247,41 @@ int qi_submit_sync(struct intel_iommu *iommu,
struct qi_desc *desc,
rc = 0;

raw_spin_lock_irqsave(>q_lock, flags);
-   while (qi->free_cnt < 3) {
+   /*
+* Check if we have enough empty slots in the queue to submit,
+* the calculation is based on:
+* # of desc + 1 wait desc + 1 space between head and tail
+*/
+   while (qi->free_cnt < count + 2) {
raw_spin_unlock_irqrestore(>q_lock, flags);
cpu_relax();
raw_spin_lock_irqsave(>q_lock, flags);
}

index = qi->free_head;
-   wait_index = (index + 1) % QI_LENGTH;
+   wait_index = (index + count) % QI_LENGTH;
shift = qi_shift(iommu);
-   length = 1 << shift;

-   qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
+   for (i = 0; i < count; i++) {
+   offset = ((index + i) % QI_LENGTH) << shift;
+   memcpy(qi->desc + offset, [i], 1 << shift);
+   qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
+   }

what about doing one memcpy and leave the loop only for updating
qi status?



One memcpy might cross the table boundary.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v2 2/7] iommu/vt-d: Multiple descriptors per qi_submit_sync()

2020-04-15 Thread Tian, Kevin
> From: Lu Baolu 
> Sent: Wednesday, April 15, 2020 1:26 PM
> 
> Extend qi_submit_sync() function to support multiple descriptors.
> 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Lu Baolu 
> ---
>  drivers/iommu/dmar.c| 39 +++--
>  include/linux/intel-iommu.h |  1 +
>  2 files changed, 25 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
> index bb42177e2369..61d049e91f84 100644
> --- a/drivers/iommu/dmar.c
> +++ b/drivers/iommu/dmar.c
> @@ -1157,12 +1157,11 @@ static inline void reclaim_free_desc(struct
> q_inval *qi)
>   }
>  }
> 
> -static int qi_check_fault(struct intel_iommu *iommu, int index)
> +static int qi_check_fault(struct intel_iommu *iommu, int index, int
> wait_index)
>  {
>   u32 fault;
>   int head, tail;
>   struct q_inval *qi = iommu->qi;
> - int wait_index = (index + 1) % QI_LENGTH;
>   int shift = qi_shift(iommu);
> 
>   if (qi->desc_status[wait_index] == QI_ABORT)
> @@ -1234,12 +1233,12 @@ static int qi_check_fault(struct intel_iommu
> *iommu, int index)
>  int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
>  unsigned int count, unsigned long options)
>  {
> - int rc;
>   struct q_inval *qi = iommu->qi;
> - int offset, shift, length;
>   struct qi_desc wait_desc;
>   int wait_index, index;
>   unsigned long flags;
> + int offset, shift;
> + int rc, i;
> 
>   if (!qi)
>   return 0;
> @@ -1248,32 +1247,41 @@ int qi_submit_sync(struct intel_iommu *iommu,
> struct qi_desc *desc,
>   rc = 0;
> 
>   raw_spin_lock_irqsave(>q_lock, flags);
> - while (qi->free_cnt < 3) {
> + /*
> +  * Check if we have enough empty slots in the queue to submit,
> +  * the calculation is based on:
> +  * # of desc + 1 wait desc + 1 space between head and tail
> +  */
> + while (qi->free_cnt < count + 2) {
>   raw_spin_unlock_irqrestore(>q_lock, flags);
>   cpu_relax();
>   raw_spin_lock_irqsave(>q_lock, flags);
>   }
> 
>   index = qi->free_head;
> - wait_index = (index + 1) % QI_LENGTH;
> + wait_index = (index + count) % QI_LENGTH;
>   shift = qi_shift(iommu);
> - length = 1 << shift;
> 
> - qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
> + for (i = 0; i < count; i++) {
> + offset = ((index + i) % QI_LENGTH) << shift;
> + memcpy(qi->desc + offset, [i], 1 << shift);
> + qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
> + }

what about doing one memcpy and leave the loop only for updating
qi status?

> + qi->desc_status[wait_index] = QI_IN_USE;
> 
> - offset = index << shift;
> - memcpy(qi->desc + offset, desc, length);
>   wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
>   QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
> + if (options & QI_OPT_WAIT_DRAIN)
> + wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
>   wait_desc.qw1 = virt_to_phys(>desc_status[wait_index]);
>   wait_desc.qw2 = 0;
>   wait_desc.qw3 = 0;
> 
>   offset = wait_index << shift;
> - memcpy(qi->desc + offset, _desc, length);
> + memcpy(qi->desc + offset, _desc, 1 << shift);
> 
> - qi->free_head = (qi->free_head + 2) % QI_LENGTH;
> - qi->free_cnt -= 2;
> + qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
> + qi->free_cnt -= count + 1;
> 
>   /*
>* update the HW tail register indicating the presence of
> @@ -1289,7 +1297,7 @@ int qi_submit_sync(struct intel_iommu *iommu,
> struct qi_desc *desc,
>* a deadlock where the interrupt context can wait
> indefinitely
>* for free slots in the queue.
>*/
> - rc = qi_check_fault(iommu, index);
> + rc = qi_check_fault(iommu, index, wait_index);
>   if (rc)
>   break;
> 
> @@ -1298,7 +1306,8 @@ int qi_submit_sync(struct intel_iommu *iommu,
> struct qi_desc *desc,
>   raw_spin_lock(>q_lock);
>   }
> 
> - qi->desc_status[index] = QI_DONE;
> + for (i = 0; i < count; i++)
> + qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
> 
>   reclaim_free_desc(qi);
>   raw_spin_unlock_irqrestore(>q_lock, flags);
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index ee2d5cdd8339..cca1e5f9aeaa 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -333,6 +333,7 @@ enum {
> 
>  #define QI_IWD_STATUS_DATA(d)(((u64)d) << 32)
>  #define QI_IWD_STATUS_WRITE  (((u64)1) << 5)
> +#define QI_IWD_PRQ_DRAIN (((u64)1) << 7)
> 
>  #define QI_IOTLB_DID(did)(((u64)did) << 16)
>  #define QI_IOTLB_DR(dr)  (((u64)dr) << 7)
> --
> 2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org

[PATCH v2 2/7] iommu/vt-d: Multiple descriptors per qi_submit_sync()

2020-04-14 Thread Lu Baolu
Extend qi_submit_sync() function to support multiple descriptors.

Signed-off-by: Jacob Pan 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dmar.c| 39 +++--
 include/linux/intel-iommu.h |  1 +
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index bb42177e2369..61d049e91f84 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1157,12 +1157,11 @@ static inline void reclaim_free_desc(struct q_inval *qi)
}
 }
 
-static int qi_check_fault(struct intel_iommu *iommu, int index)
+static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
 {
u32 fault;
int head, tail;
struct q_inval *qi = iommu->qi;
-   int wait_index = (index + 1) % QI_LENGTH;
int shift = qi_shift(iommu);
 
if (qi->desc_status[wait_index] == QI_ABORT)
@@ -1234,12 +1233,12 @@ static int qi_check_fault(struct intel_iommu *iommu, 
int index)
 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
   unsigned int count, unsigned long options)
 {
-   int rc;
struct q_inval *qi = iommu->qi;
-   int offset, shift, length;
struct qi_desc wait_desc;
int wait_index, index;
unsigned long flags;
+   int offset, shift;
+   int rc, i;
 
if (!qi)
return 0;
@@ -1248,32 +1247,41 @@ int qi_submit_sync(struct intel_iommu *iommu, struct 
qi_desc *desc,
rc = 0;
 
raw_spin_lock_irqsave(>q_lock, flags);
-   while (qi->free_cnt < 3) {
+   /*
+* Check if we have enough empty slots in the queue to submit,
+* the calculation is based on:
+* # of desc + 1 wait desc + 1 space between head and tail
+*/
+   while (qi->free_cnt < count + 2) {
raw_spin_unlock_irqrestore(>q_lock, flags);
cpu_relax();
raw_spin_lock_irqsave(>q_lock, flags);
}
 
index = qi->free_head;
-   wait_index = (index + 1) % QI_LENGTH;
+   wait_index = (index + count) % QI_LENGTH;
shift = qi_shift(iommu);
-   length = 1 << shift;
 
-   qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
+   for (i = 0; i < count; i++) {
+   offset = ((index + i) % QI_LENGTH) << shift;
+   memcpy(qi->desc + offset, [i], 1 << shift);
+   qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
+   }
+   qi->desc_status[wait_index] = QI_IN_USE;
 
-   offset = index << shift;
-   memcpy(qi->desc + offset, desc, length);
wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
+   if (options & QI_OPT_WAIT_DRAIN)
+   wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
wait_desc.qw1 = virt_to_phys(>desc_status[wait_index]);
wait_desc.qw2 = 0;
wait_desc.qw3 = 0;
 
offset = wait_index << shift;
-   memcpy(qi->desc + offset, _desc, length);
+   memcpy(qi->desc + offset, _desc, 1 << shift);
 
-   qi->free_head = (qi->free_head + 2) % QI_LENGTH;
-   qi->free_cnt -= 2;
+   qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
+   qi->free_cnt -= count + 1;
 
/*
 * update the HW tail register indicating the presence of
@@ -1289,7 +1297,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct 
qi_desc *desc,
 * a deadlock where the interrupt context can wait indefinitely
 * for free slots in the queue.
 */
-   rc = qi_check_fault(iommu, index);
+   rc = qi_check_fault(iommu, index, wait_index);
if (rc)
break;
 
@@ -1298,7 +1306,8 @@ int qi_submit_sync(struct intel_iommu *iommu, struct 
qi_desc *desc,
raw_spin_lock(>q_lock);
}
 
-   qi->desc_status[index] = QI_DONE;
+   for (i = 0; i < count; i++)
+   qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
 
reclaim_free_desc(qi);
raw_spin_unlock_irqrestore(>q_lock, flags);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index ee2d5cdd8339..cca1e5f9aeaa 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -333,6 +333,7 @@ enum {
 
 #define QI_IWD_STATUS_DATA(d)  (((u64)d) << 32)
 #define QI_IWD_STATUS_WRITE(((u64)1) << 5)
+#define QI_IWD_PRQ_DRAIN   (((u64)1) << 7)
 
 #define QI_IOTLB_DID(did)  (((u64)did) << 16)
 #define QI_IOTLB_DR(dr)(((u64)dr) << 7)
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu