If acked by airlied, please consider for longterm inclusion,
it got shipped with 2.6.36.

attached the trivially backported patch for Debian 2.6.32 (has .33 drm),
tested on a RV620, please consider that one for 2.6.33 longterm.

thank you

-- 
maks


----- Forwarded message from Linux Kernel Mailing List 
<[email protected]> -----

Date: Thu, 7 Oct 2010 21:59:37 GMT
From: Linux Kernel Mailing List <[email protected]>
To: [email protected]
Subject: drm/ttm: Fix two race conditions + fix busy codepaths

Gitweb:     http://git.kernel.org/linus/1df6a2ebd75067aefbdf07482bf8e3d0584e04ee
Commit:     1df6a2ebd75067aefbdf07482bf8e3d0584e04ee
Parent:     e1d9694cae722d00a94fb58f901aa69c9c324a16
Author:     Thomas Hellstrom <[email protected]>
AuthorDate: Thu Sep 30 12:36:45 2010 +0200
Committer:  Dave Airlie <[email protected]>
CommitDate: Wed Oct 6 09:04:43 2010 +1000

    drm/ttm: Fix two race conditions + fix busy codepaths
    
    This fixes a race pointed out by Dave Airlie where we don't take a buffer
    object about to be destroyed off the LRU lists properly. It also fixes a 
rare
    case where a buffer object could be destroyed in the middle of an
    accelerated eviction.
    
    The patch also adds a utility function that can be used to prematurely
    release GPU memory space usage of an object waiting to be destroyed.
    For example during eviction or swapout.
    
    The above mentioned commit didn't queue the buffer on the delayed destroy
    list under some rare circumstances. It also didn't completely honor the
    remove_all parameter.
    
    Fixes:
    https://bugzilla.redhat.com/show_bug.cgi?id=615505
    http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=591061
    
    Signed-off-by: Thomas Hellstrom <[email protected]>
    Signed-off-by: Dave Airlie <[email protected]>
---
 drivers/gpu/drm/ttm/ttm_bo.c |   83 ++++++++++++++++++++++++++++++++++++------
 include/drm/ttm/ttm_bo_api.h |    4 ++-
 2 files changed, 74 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index cb4cf7e..db809e0 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -442,6 +442,43 @@ out_err:
 }
 
 /**
+ * Call bo::reserved and with the lru lock held.
+ * Will release GPU memory type usage on destruction.
+ * This is the place to put in driver specific hooks.
+ * Will release the bo::reserved lock and the
+ * lru lock on exit.
+ */
+
+static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
+{
+       struct ttm_bo_global *glob = bo->glob;
+
+       if (bo->ttm) {
+
+               /**
+                * Release the lru_lock, since we don't want to have
+                * an atomic requirement on ttm_tt[unbind|destroy].
+                */
+
+               spin_unlock(&glob->lru_lock);
+               ttm_tt_unbind(bo->ttm);
+               ttm_tt_destroy(bo->ttm);
+               bo->ttm = NULL;
+               spin_lock(&glob->lru_lock);
+       }
+
+       if (bo->mem.mm_node) {
+               drm_mm_put_block(bo->mem.mm_node);
+               bo->mem.mm_node = NULL;
+       }
+
+       atomic_set(&bo->reserved, 0);
+       wake_up_all(&bo->event_queue);
+       spin_unlock(&glob->lru_lock);
+}
+
+
+/**
  * If bo idle, remove from delayed- and lru lists, and unref.
  * If not idle, and already on delayed list, do nothing.
  * If not idle, and not on delayed list, put on delayed list,
@@ -456,6 +493,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
*bo, bool remove_all)
        int ret;
 
        spin_lock(&bo->lock);
+retry:
        (void) ttm_bo_wait(bo, false, false, !remove_all);
 
        if (!bo->sync_obj) {
@@ -464,31 +502,52 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
*bo, bool remove_all)
                spin_unlock(&bo->lock);
 
                spin_lock(&glob->lru_lock);
-               put_count = ttm_bo_del_from_lru(bo);
+               ret = ttm_bo_reserve_locked(bo, false, !remove_all, false, 0);
+
+               /**
+                * Someone else has the object reserved. Bail and retry.
+                */
 
-               ret = ttm_bo_reserve_locked(bo, false, false, false, 0);
-               BUG_ON(ret);
-               if (bo->ttm)
-                       ttm_tt_unbind(bo->ttm);
+               if (unlikely(ret == -EBUSY)) {
+                       spin_unlock(&glob->lru_lock);
+                       spin_lock(&bo->lock);
+                       goto requeue;
+               }
+
+               /**
+                * We can re-check for sync object without taking
+                * the bo::lock since setting the sync object requires
+                * also bo::reserved. A busy object at this point may
+                * be caused by another thread starting an accelerated
+                * eviction.
+                */
+
+               if (unlikely(bo->sync_obj)) {
+                       atomic_set(&bo->reserved, 0);
+                       wake_up_all(&bo->event_queue);
+                       spin_unlock(&glob->lru_lock);
+                       spin_lock(&bo->lock);
+                       if (remove_all)
+                               goto retry;
+                       else
+                               goto requeue;
+               }
+
+               put_count = ttm_bo_del_from_lru(bo);
 
                if (!list_empty(&bo->ddestroy)) {
                        list_del_init(&bo->ddestroy);
                        ++put_count;
                }
-               if (bo->mem.mm_node) {
-                       drm_mm_put_block(bo->mem.mm_node);
-                       bo->mem.mm_node = NULL;
-               }
-               spin_unlock(&glob->lru_lock);
 
-               atomic_set(&bo->reserved, 0);
+               ttm_bo_cleanup_memtype_use(bo);
 
                while (put_count--)
                        kref_put(&bo->list_kref, ttm_bo_ref_bug);
 
                return 0;
        }
-
+requeue:
        spin_lock(&glob->lru_lock);
        if (list_empty(&bo->ddestroy)) {
                void *sync_obj = bo->sync_obj;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 267a86c..2040e6c 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -246,9 +246,11 @@ struct ttm_buffer_object {
 
        atomic_t reserved;
 
-
        /**
         * Members protected by the bo::lock
+        * In addition, setting sync_obj to anything else
+        * than NULL requires bo::reserved to be held. This allows for
+        * checking NULL while reserved but not holding bo::lock.
         */
 
        void *sync_obj_arg;
--
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

----- End forwarded message -----
From: Thomas Hellstrom <[email protected]>
Date: Thu, 30 Sep 2010 12:36:45 +0200
Subject: [PATCH] drm/ttm: Fix two race conditions + fix busy codepaths

commit 1df6a2ebd75067aefbdf07482bf8e3d0584e04ee upstream.

This fixes a race pointed out by Dave Airlie where we don't take a buffer
object about to be destroyed off the LRU lists properly. It also fixes a rare
case where a buffer object could be destroyed in the middle of an
accelerated eviction.

The patch also adds a utility function that can be used to prematurely
release GPU memory space usage of an object waiting to be destroyed.
For example during eviction or swapout.

The above mentioned commit didn't queue the buffer on the delayed destroy
list under some rare circumstances. It also didn't completely honor the
remove_all parameter.

Fixes:
https://bugzilla.redhat.com/show_bug.cgi?id=615505
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=591061

Signed-off-by: Thomas Hellstrom <[email protected]>
Signed-off-by: Dave Airlie <[email protected]>
[ Backported to 2.6.33 -maks ]
---
 drivers/gpu/drm/ttm/ttm_bo.c |   83 ++++++++++++++++++++++++++++++++++++------
 include/drm/ttm/ttm_bo_api.h |    4 ++-
 2 files changed, 74 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index cb4cf7e..db809e0 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -442,6 +442,43 @@ out_err:
 }
 
 /**
+ * Call bo::reserved and with the lru lock held.
+ * Will release GPU memory type usage on destruction.
+ * This is the place to put in driver specific hooks.
+ * Will release the bo::reserved lock and the
+ * lru lock on exit.
+ */
+
+static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_global *glob = bo->glob;
+
+	if (bo->ttm) {
+
+		/**
+		 * Release the lru_lock, since we don't want to have
+		 * an atomic requirement on ttm_tt[unbind|destroy].
+		 */
+
+		spin_unlock(&glob->lru_lock);
+		ttm_tt_unbind(bo->ttm);
+		ttm_tt_destroy(bo->ttm);
+		bo->ttm = NULL;
+		spin_lock(&glob->lru_lock);
+	}
+
+	if (bo->mem.mm_node) {
+		drm_mm_put_block(bo->mem.mm_node);
+		bo->mem.mm_node = NULL;
+	}
+
+	atomic_set(&bo->reserved, 0);
+	wake_up_all(&bo->event_queue);
+	spin_unlock(&glob->lru_lock);
+}
+
+
+/**
  * If bo idle, remove from delayed- and lru lists, and unref.
  * If not idle, and already on delayed list, do nothing.
  * If not idle, and not on delayed list, put on delayed list,
@@ -456,6 +493,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 	int ret;
 
 	spin_lock(&bo->lock);
+retry:
 	(void) ttm_bo_wait(bo, false, false, !remove_all);
 
 	if (!bo->sync_obj) {
@@ -464,32 +502,52 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 		spin_unlock(&bo->lock);
 
 		spin_lock(&glob->lru_lock);
-		put_count = ttm_bo_del_from_lru(bo);
+		ret = ttm_bo_reserve_locked(bo, false, !remove_all, false, 0);
+
+		/**
+		 * Someone else has the object reserved. Bail and retry.
+		 */
 
-		ret = ttm_bo_reserve_locked(bo, false, false, false, 0);
-		BUG_ON(ret);
-		if (bo->ttm)
-			ttm_tt_unbind(bo->ttm);
+		if (unlikely(ret == -EBUSY)) {
+			spin_unlock(&glob->lru_lock);
+			spin_lock(&bo->lock);
+			goto requeue;
+		}
+
+		/**
+		 * We can re-check for sync object without taking
+		 * the bo::lock since setting the sync object requires
+		 * also bo::reserved. A busy object at this point may
+		 * be caused by another thread starting an accelerated
+		 * eviction.
+		 */
+
+		if (unlikely(bo->sync_obj)) {
+			atomic_set(&bo->reserved, 0);
+			wake_up_all(&bo->event_queue);
+			spin_unlock(&glob->lru_lock);
+			spin_lock(&bo->lock);
+			if (remove_all)
+				goto retry;
+			else
+				goto requeue;
+		}
+
+		put_count = ttm_bo_del_from_lru(bo);
 
 		if (!list_empty(&bo->ddestroy)) {
 			list_del_init(&bo->ddestroy);
 			++put_count;
 		}
-		if (bo->mem.mm_node) {
-			bo->mem.mm_node->private = NULL;
-			drm_mm_put_block(bo->mem.mm_node);
-			bo->mem.mm_node = NULL;
-		}
-		spin_unlock(&glob->lru_lock);
 
-		atomic_set(&bo->reserved, 0);
+		ttm_bo_cleanup_memtype_use(bo);
 
 		while (put_count--)
 			kref_put(&bo->list_kref, ttm_bo_ref_bug);
 
 		return 0;
 	}
-
+requeue:
 	spin_lock(&glob->lru_lock);
 	if (list_empty(&bo->ddestroy)) {
 		void *sync_obj = bo->sync_obj;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 267a86c..2040e6c 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -246,9 +246,11 @@ struct ttm_buffer_object {
 
 	atomic_t reserved;
 
-
 	/**
 	 * Members protected by the bo::lock
+	 * In addition, setting sync_obj to anything else
+	 * than NULL requires bo::reserved to be held. This allows for
+	 * checking NULL while reserved but not holding bo::lock.
 	 */
 
 	void *sync_obj_arg;
-- 
1.7.2.3

_______________________________________________
stable mailing list
[email protected]
http://linux.kernel.org/mailman/listinfo/stable

Reply via email to