Re: [Intel-gfx] [PATCH] drm/i915/breadcrumbs: Ignore unsubmitted signalers

2018-02-13 Thread Rodrigo Vivi
On Tue, Feb 13, 2018 at 09:01:54AM +, Chris Wilson wrote:
> When a request is preempted, it is unsubmitted from the HW queue and
> removed from the active list of breadcrumbs. In the process, this
> however triggers the signaler and it may see the clear rbtree with the
> old, and still valid, seqno, or it may match the cleared seqno with the
> now zero rq->global_seqno. This confuses the signaler into action and
> signaling the fence.
> 
> Fixes: d6a2289d9d6b ("drm/i915: Remove the preempted request from the 
> execution queue")
> Signed-off-by: Chris Wilson 
> Cc: Tvrtko Ursulin 
> Cc: Joonas Lahtinen 
> Cc:  # v4.12+
> Reviewed-by: Tvrtko Ursulin 
> Link: 
> https://patchwork.freedesktop.org/patch/msgid/20180206094633.30181-1-ch...@chris-wilson.co.uk
> (cherry picked from commit fd10e2ce9905030d922e179a8047a4d50daffd8e)

applied to fixes. Thanks

> ---
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 29 ++---
>  1 file changed, 10 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
> b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index bd40fea16b4f..f54ddda9fdad 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -594,29 +594,16 @@ void intel_engine_remove_wait(struct intel_engine_cs 
> *engine,
>   spin_unlock_irq(>rb_lock);
>  }
>  
> -static bool signal_valid(const struct drm_i915_gem_request *request)
> -{
> - return intel_wait_check_request(>signaling.wait, request);
> -}
> -
>  static bool signal_complete(const struct drm_i915_gem_request *request)
>  {
>   if (!request)
>   return false;
>  
> - /* If another process served as the bottom-half it may have already
> -  * signalled that this wait is already completed.
> -  */
> - if (intel_wait_complete(>signaling.wait))
> - return signal_valid(request);
> -
> - /* Carefully check if the request is complete, giving time for the
> + /*
> +  * Carefully check if the request is complete, giving time for the
>* seqno to be visible or if the GPU hung.
>*/
> - if (__i915_request_irq_complete(request))
> - return true;
> -
> - return false;
> + return __i915_request_irq_complete(request);
>  }
>  
>  static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
> @@ -659,9 +646,13 @@ static int intel_breadcrumbs_signaler(void *arg)
>   request = i915_gem_request_get_rcu(request);
>   rcu_read_unlock();
>   if (signal_complete(request)) {
> - local_bh_disable();
> - dma_fence_signal(>fence);
> - local_bh_enable(); /* kick start the tasklets */
> + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> +   >fence.flags)) {
> + local_bh_disable();
> + dma_fence_signal(>fence);
> + 
> GEM_BUG_ON(!i915_gem_request_completed(request));
> + local_bh_enable(); /* kick start the tasklets */
> + }
>  
>   spin_lock_irq(>rb_lock);
>  
> -- 
> 2.16.1
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/breadcrumbs: Ignore unsubmitted signalers

2018-02-13 Thread Chris Wilson
When a request is preempted, it is unsubmitted from the HW queue and
removed from the active list of breadcrumbs. In the process, this
however triggers the signaler and it may see the clear rbtree with the
old, and still valid, seqno, or it may match the cleared seqno with the
now zero rq->global_seqno. This confuses the signaler into action and
signaling the fence.

Fixes: d6a2289d9d6b ("drm/i915: Remove the preempted request from the execution 
queue")
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: Joonas Lahtinen 
Cc:  # v4.12+
Reviewed-by: Tvrtko Ursulin 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20180206094633.30181-1-ch...@chris-wilson.co.uk
(cherry picked from commit fd10e2ce9905030d922e179a8047a4d50daffd8e)
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 29 ++---
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index bd40fea16b4f..f54ddda9fdad 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -594,29 +594,16 @@ void intel_engine_remove_wait(struct intel_engine_cs 
*engine,
spin_unlock_irq(>rb_lock);
 }
 
-static bool signal_valid(const struct drm_i915_gem_request *request)
-{
-   return intel_wait_check_request(>signaling.wait, request);
-}
-
 static bool signal_complete(const struct drm_i915_gem_request *request)
 {
if (!request)
return false;
 
-   /* If another process served as the bottom-half it may have already
-* signalled that this wait is already completed.
-*/
-   if (intel_wait_complete(>signaling.wait))
-   return signal_valid(request);
-
-   /* Carefully check if the request is complete, giving time for the
+   /*
+* Carefully check if the request is complete, giving time for the
 * seqno to be visible or if the GPU hung.
 */
-   if (__i915_request_irq_complete(request))
-   return true;
-
-   return false;
+   return __i915_request_irq_complete(request);
 }
 
 static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
@@ -659,9 +646,13 @@ static int intel_breadcrumbs_signaler(void *arg)
request = i915_gem_request_get_rcu(request);
rcu_read_unlock();
if (signal_complete(request)) {
-   local_bh_disable();
-   dma_fence_signal(>fence);
-   local_bh_enable(); /* kick start the tasklets */
+   if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ >fence.flags)) {
+   local_bh_disable();
+   dma_fence_signal(>fence);
+   
GEM_BUG_ON(!i915_gem_request_completed(request));
+   local_bh_enable(); /* kick start the tasklets */
+   }
 
spin_lock_irq(>rb_lock);
 
-- 
2.16.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/breadcrumbs: Ignore unsubmitted signalers

2018-02-07 Thread Chris Wilson
Quoting Tvrtko Ursulin (2018-02-07 10:40:46)
> 
> On 06/02/2018 09:46, Chris Wilson wrote:
> > When a request is preempted, it is unsubmitted from the HW queue and
> > removed from the active list of breadcrumbs. In the process, this
> > however triggers the signaler and it may see the clear rbtree with the
> > old, and still valid, seqno. This confuses the signaler into action and
> > signaling the fence.
> > 
> > Fixes: d6a2289d9d6b ("drm/i915: Remove the preempted request from the 
> > execution queue")
> > Signed-off-by: Chris Wilson 
> > Cc: Tvrtko Ursulin 
> > Cc: Joonas Lahtinen 
> > Cc:  # v4.12+
> > ---
> Looks OK. But I can't say it's straightforward to understand it.

But I hope the GEM_BUG_ON() was nice icing on the cake :)
 
> Reviewed-by: Tvrtko Ursulin 

Thanks, added your explanation to the changelog and pushed.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/breadcrumbs: Ignore unsubmitted signalers

2018-02-07 Thread Chris Wilson
Quoting Tvrtko Ursulin (2018-02-07 10:40:46)
> 
> On 06/02/2018 09:46, Chris Wilson wrote:
> > When a request is preempted, it is unsubmitted from the HW queue and
> > removed from the active list of breadcrumbs. In the process, this
> > however triggers the signaler and it may see the clear rbtree with the
> > old, and still valid, seqno. This confuses the signaler into action and
> > signaling the fence.
> > 
> > Fixes: d6a2289d9d6b ("drm/i915: Remove the preempted request from the 
> > execution queue")
> > Signed-off-by: Chris Wilson 
> > Cc: Tvrtko Ursulin 
> > Cc: Joonas Lahtinen 
> > Cc:  # v4.12+
> > ---
> >   drivers/gpu/drm/i915/intel_breadcrumbs.c | 20 
> >   1 file changed, 4 insertions(+), 16 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
> > b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > index efbc627a2a25..b955f7d7bd0f 100644
> > --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > @@ -588,29 +588,16 @@ void intel_engine_remove_wait(struct intel_engine_cs 
> > *engine,
> >   spin_unlock_irq(>rb_lock);
> >   }
> >   
> > -static bool signal_valid(const struct drm_i915_gem_request *request)
> > -{
> > - return intel_wait_check_request(>signaling.wait, request);
> > -}
> > -
> >   static bool signal_complete(const struct drm_i915_gem_request *request)
> >   {
> >   if (!request)
> >   return false;
> >   
> > - /* If another process served as the bottom-half it may have already
> > -  * signalled that this wait is already completed.
> > -  */
> > - if (intel_wait_complete(>signaling.wait))
> > - return signal_valid(request);
> 
> Okay so this can return true for unsubmitted requests since rb node will 
> be empty and global_seqno == wait.seqno == 0.

Hmm, ah, signal_valid() operated under the belief that its wait.seqno
was untouched. That makes a bit more sense now. I was having to concoct
some scary data races to try and explain how global_seqno and wait.seqno
had the same non-zero value.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/breadcrumbs: Ignore unsubmitted signalers

2018-02-07 Thread Tvrtko Ursulin


On 06/02/2018 09:46, Chris Wilson wrote:

When a request is preempted, it is unsubmitted from the HW queue and
removed from the active list of breadcrumbs. In the process, this
however triggers the signaler and it may see the clear rbtree with the
old, and still valid, seqno. This confuses the signaler into action and
signaling the fence.

Fixes: d6a2289d9d6b ("drm/i915: Remove the preempted request from the execution 
queue")
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: Joonas Lahtinen 
Cc:  # v4.12+
---
  drivers/gpu/drm/i915/intel_breadcrumbs.c | 20 
  1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index efbc627a2a25..b955f7d7bd0f 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -588,29 +588,16 @@ void intel_engine_remove_wait(struct intel_engine_cs 
*engine,
spin_unlock_irq(>rb_lock);
  }
  
-static bool signal_valid(const struct drm_i915_gem_request *request)

-{
-   return intel_wait_check_request(>signaling.wait, request);
-}
-
  static bool signal_complete(const struct drm_i915_gem_request *request)
  {
if (!request)
return false;
  
-	/* If another process served as the bottom-half it may have already

-* signalled that this wait is already completed.
-*/
-   if (intel_wait_complete(>signaling.wait))
-   return signal_valid(request);


Okay so this can return true for unsubmitted requests since rb node will 
be empty and global_seqno == wait.seqno == 0.


I just panic when thinking about races and ordering, since these checks 
used to run unlocked. So even better that they are gone.



-
-   /* Carefully check if the request is complete, giving time for the
+   /*
+* Carefully check if the request is complete, giving time for the
 * seqno to be visible or if the GPU hung.
 */
-   if (__i915_request_irq_complete(request))
-   return true;
-
-   return false;
+   return __i915_request_irq_complete(request); >   }
  
  static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)

@@ -712,6 +699,7 @@ static int intel_breadcrumbs_signaler(void *arg)
  >fence.flags)) {
local_bh_disable();
dma_fence_signal(>fence);
+   
GEM_BUG_ON(!i915_gem_request_completed(request));
local_bh_enable(); /* kick start the tasklets */
}
  



Looks OK. But I can't say it's straightforward to understand it.

Reviewed-by: Tvrtko Ursulin 

Regards,

Tvrtko


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/breadcrumbs: Ignore unsubmitted signalers

2018-02-06 Thread Chris Wilson
Quoting Chris Wilson (2018-02-06 09:46:33)
> When a request is preempted, it is unsubmitted from the HW queue and
> removed from the active list of breadcrumbs. In the process, this
> however triggers the signaler and it may see the clear rbtree with the
> old, and still valid, seqno. This confuses the signaler into action and
> signaling the fence.
> 
> Fixes: d6a2289d9d6b ("drm/i915: Remove the preempted request from the 
> execution queue")
> Signed-off-by: Chris Wilson 
> Cc: Tvrtko Ursulin 
> Cc: Joonas Lahtinen 
> Cc:  # v4.12+

Any takers for this brown paper bug?

> ---
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 20 
>  1 file changed, 4 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
> b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index efbc627a2a25..b955f7d7bd0f 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -588,29 +588,16 @@ void intel_engine_remove_wait(struct intel_engine_cs 
> *engine,
> spin_unlock_irq(>rb_lock);
>  }
>  
> -static bool signal_valid(const struct drm_i915_gem_request *request)
> -{
> -   return intel_wait_check_request(>signaling.wait, request);
> -}
> -
>  static bool signal_complete(const struct drm_i915_gem_request *request)
>  {
> if (!request)
> return false;
>  
> -   /* If another process served as the bottom-half it may have already
> -* signalled that this wait is already completed.
> -*/
> -   if (intel_wait_complete(>signaling.wait))
> -   return signal_valid(request);
> -
> -   /* Carefully check if the request is complete, giving time for the
> +   /*
> +* Carefully check if the request is complete, giving time for the
>  * seqno to be visible or if the GPU hung.
>  */
> -   if (__i915_request_irq_complete(request))
> -   return true;
> -
> -   return false;
> +   return __i915_request_irq_complete(request);
>  }
>  
>  static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
> @@ -712,6 +699,7 @@ static int intel_breadcrumbs_signaler(void *arg)
>   >fence.flags)) {
> local_bh_disable();
> dma_fence_signal(>fence);
> +   
> GEM_BUG_ON(!i915_gem_request_completed(request));
> local_bh_enable(); /* kick start the tasklets 
> */
> }
>  
> -- 
> 2.15.1
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/breadcrumbs: Ignore unsubmitted signalers

2018-02-06 Thread Chris Wilson
Quoting Chris Wilson (2018-02-06 09:46:33)
> When a request is preempted, it is unsubmitted from the HW queue and
> removed from the active list of breadcrumbs. In the process, this
> however triggers the signaler and it may see the clear rbtree with the
> old, and still valid, seqno. This confuses the signaler into action and
> signaling the fence.
> 
> Fixes: d6a2289d9d6b ("drm/i915: Remove the preempted request from the 
> execution queue")
Testcase: igt/gem_exec_whisper/*-priority # reproducibility ~10%
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/breadcrumbs: Ignore unsubmitted signalers

2018-02-06 Thread Chris Wilson
When a request is preempted, it is unsubmitted from the HW queue and
removed from the active list of breadcrumbs. In the process, this
however triggers the signaler and it may see the clear rbtree with the
old, and still valid, seqno. This confuses the signaler into action and
signaling the fence.

Fixes: d6a2289d9d6b ("drm/i915: Remove the preempted request from the execution 
queue")
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: Joonas Lahtinen 
Cc:  # v4.12+
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 20 
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index efbc627a2a25..b955f7d7bd0f 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -588,29 +588,16 @@ void intel_engine_remove_wait(struct intel_engine_cs 
*engine,
spin_unlock_irq(>rb_lock);
 }
 
-static bool signal_valid(const struct drm_i915_gem_request *request)
-{
-   return intel_wait_check_request(>signaling.wait, request);
-}
-
 static bool signal_complete(const struct drm_i915_gem_request *request)
 {
if (!request)
return false;
 
-   /* If another process served as the bottom-half it may have already
-* signalled that this wait is already completed.
-*/
-   if (intel_wait_complete(>signaling.wait))
-   return signal_valid(request);
-
-   /* Carefully check if the request is complete, giving time for the
+   /*
+* Carefully check if the request is complete, giving time for the
 * seqno to be visible or if the GPU hung.
 */
-   if (__i915_request_irq_complete(request))
-   return true;
-
-   return false;
+   return __i915_request_irq_complete(request);
 }
 
 static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
@@ -712,6 +699,7 @@ static int intel_breadcrumbs_signaler(void *arg)
  >fence.flags)) {
local_bh_disable();
dma_fence_signal(>fence);
+   
GEM_BUG_ON(!i915_gem_request_completed(request));
local_bh_enable(); /* kick start the tasklets */
}
 
-- 
2.15.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx