Hey,
On 6/4/26 21:44, Simona Vetter wrote:
> On-list because the cat is out of the bag and we're clearly not good
> enough to figure this out in private. The story thus far:
>
> 5e28b7b94408 ("drm: Set old handle to NULL before prime swap in
> change_handle") tried to fix a race condition between the gem_close and
> gem_change_handle ioctls, but got a few things wrong:
>
> - There's a confusion with the local variable handle, which is actually
> the new handle, and so the two-stage trick was actually applied to the
> wrong idr slot. 7164d78559b0 ("drm/gem: fix race between
> change_handle and handle_delete") tried to fix that by adding yet
> another code block, but forgot to add the error handling. Which meant
> we now have two paths, both kinda wrong.
>
> - dc366607c41c ("drm: Replace old pointer to new idr") tried to apply
> another fix, but inconsistently, again because of the handle confusion
> - this would be the right fix (kinda, somewhat, it's a mess) if we'd
> do the two-stage approach for the new handle. Except that wasn't the
> intent of the original fix.
>
> We also didn't have an igt merged for the original ioctl, which is a big
> no-go. This was attempted to address off-list in the original bugfix,
> and amd QA people claimed the bug was fixed now. Very clearly that's not
> the case. Here's my attempt to sort this out:
>
> - Rename the local variable to new_handle, the old aliasing with
> args->handle is just too dangerously confusing.
>
> - Merge the gem obj lookup with the two-stage idr_replace so that we
> avoid getting ourselves confused there.
>
> - This means we don't have a surplus temporary reference anymore, only
> an inherited from the idr. A concurrent gem_close on the new_handle
> could steal that. Fix that with the same two-stage approach
> create_tail uses. This is a bit overkill as documented in the comment,
> but I also don't trust my ability to understand this all correctly, so
> go with the established pattern we have from other ioctls instead for
> maximum paranoia.
>
> - Adjust error paths. I've tried to make the error and success paths
> common, because they are identical except for which handle is removed
> and on which we call idr_replace to (re)install the object again. But
> that made things messier to read, so I've left it at the more verbose
> version, which unfortunately hides the symmetry in the entire code
> flow a bit.
>
> - While at it, also replace the 7 space indent with 1 tab.
>
> And finally, because I flat out don't trust my abilities here at all
> anymore:
>
> - Disable the ioctl until we have the igt situation and everything else
> sorted out on-list and with full consensus.
>
Can you push the revert first, and then worry about fixing change_handle
parts of the ioctl properly later, so that part can be merged ASAP?
---
> v2:
>
> Sashiko noticed that I didn't handle the error path for idr_replace
> correctly, it must be checked with IS_ERR_OR_NULL like in
> gem_handle_delete. So yeah, definitely should just the existing paths
> 1:1 because this is endless amounts of tricky.
>
> Also add the Fixes: line for the original ioctl, I forgot that too.
>
> Reported-by: DARKNAVY (@DarkNavyOrg) <[email protected]>
> Signed-off-by: Simona Vetter <[email protected]>
> Fixes: dc366607c41c ("drm: Replace old pointer to new idr")
> Cc: [email protected]
> Cc: [email protected]
> Cc: Edward Adam Davis <[email protected]>
> Cc: Dave Airlie <[email protected]>
> Cc: Maarten Lankhorst <[email protected]>
> Cc: Maxime Ripard <[email protected]>
> Cc: Thomas Zimmermann <[email protected]>
> Fixes: 5e28b7b94408 ("drm: Set old handle to NULL before prime swap in
> change_handle")
> Cc: David Francis <[email protected]>
> Cc: Puttimet Thammasaeng <[email protected]>
> Cc: Christian Koenig <[email protected]>
> Fixes: 7164d78559b0 ("drm/gem: fix race between change_handle and
> handle_delete")
> Cc: Zhenghang Xiao <[email protected]>
> Fixes: 5e28b7b94408 ("drm: Set old handle to NULL before prime swap in
> change_handle")
> ---
> drivers/gpu/drm/drm_gem.c | 62 +++++++++++++------------------------
> drivers/gpu/drm/drm_ioctl.c | 2 +-
> 2 files changed, 23 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
> index e12cdf91f4dc..f49f1724eda5 100644
> --- a/drivers/gpu/drm/drm_gem.c
> +++ b/drivers/gpu/drm/drm_gem.c
> @@ -1019,8 +1019,8 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev,
> void *data,
> struct drm_file *file_priv)
> {
> struct drm_gem_change_handle *args = data;
> - struct drm_gem_object *obj, *idrobj;
> - int handle, ret;
> + struct drm_gem_object *obj;
> + int new_handle, ret;
>
> if (!drm_core_check_feature(dev, DRIVER_GEM))
> return -EOPNOTSUPP;
> @@ -1028,52 +1028,36 @@ int drm_gem_change_handle_ioctl(struct drm_device
> *dev, void *data,
> /* idr_alloc() limitation. */
> if (args->new_handle > INT_MAX)
> return -EINVAL;
> - handle = args->new_handle;
> -
> - obj = drm_gem_object_lookup(file_priv, args->handle);
> - if (!obj)
> - return -ENOENT;
> + new_handle = args->new_handle;
>
> - if (args->handle == handle) {
> - ret = 0;
> - goto out;
> - }
> + if (args->handle == new_handle)
> + return 0;
>
> mutex_lock(&file_priv->prime.lock);
> -
> spin_lock(&file_priv->table_lock);
> -
> - /* When create_tail allocs an obj idr, it needs to first alloc as
> NULL,
> - * then later replace with the correct object. This is not necessary
> - * here, because the only operations that could race are drm_prime
> - * bookkeeping, and we hold the prime lock.
> - */
> - ret = idr_alloc(&file_priv->object_idr, obj, handle, handle + 1,
> + ret = idr_alloc(&file_priv->object_idr, NULL, new_handle, new_handle +
> 1,
> GFP_NOWAIT);
>
> - if (ret < 0) {
> - spin_unlock(&file_priv->table_lock);
> - goto out_unlock;
> - }
> -
> - idrobj = idr_replace(&file_priv->object_idr, NULL, handle);
> - if (idrobj != obj) {
> - idr_replace(&file_priv->object_idr, idrobj, handle);
> - idr_remove(&file_priv->object_idr, args->new_handle);
> - spin_unlock(&file_priv->table_lock);
> - ret = -ENOENT;
> - goto out_unlock;
> - }
> -
> - idr_replace(&file_priv->object_idr, NULL, args->handle);
> + if (ret < 0) {
> + spin_unlock(&file_priv->table_lock);
> + goto out_unlock;
> + }
> +
> + obj = idr_replace(&file_priv->object_idr, NULL, args->handle);
> + if (IS_ERR_OR_NULL(obj)) {
> + idr_remove(&file_priv->object_idr, new_handle);
> + spin_unlock(&file_priv->table_lock);
> + ret = -ENOENT;
> + goto out_unlock;
> + }
> spin_unlock(&file_priv->table_lock);
>
> if (obj->dma_buf) {
> ret = drm_prime_add_buf_handle(&file_priv->prime, obj->dma_buf,
> - handle);
> + new_handle);
> if (ret < 0) {
> spin_lock(&file_priv->table_lock);
> - idr_remove(&file_priv->object_idr, handle);
> + idr_remove(&file_priv->object_idr, new_handle);
> idr_replace(&file_priv->object_idr, obj, args->handle);
> spin_unlock(&file_priv->table_lock);
> goto out_unlock;
> @@ -1086,14 +1070,12 @@ int drm_gem_change_handle_ioctl(struct drm_device
> *dev, void *data,
>
> spin_lock(&file_priv->table_lock);
> idr_remove(&file_priv->object_idr, args->handle);
> - idrobj = idr_replace(&file_priv->object_idr, obj, handle);
> + obj = idr_replace(&file_priv->object_idr, obj, new_handle);
> spin_unlock(&file_priv->table_lock);
> - WARN_ON(idrobj != NULL);
> + WARN_ON(obj != NULL);
>
> out_unlock:
> mutex_unlock(&file_priv->prime.lock);
> -out:
> - drm_gem_object_put(obj);
>
> return ret;
> }
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index ff193155129e..937fc1e2c017 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -660,7 +660,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
> DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl,
> DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH),
> DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH),
> - DRM_IOCTL_DEF(DRM_IOCTL_GEM_CHANGE_HANDLE, drm_gem_change_handle_ioctl,
> DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF(DRM_IOCTL_GEM_CHANGE_HANDLE, drm_invalid_op,
> DRM_RENDER_ALLOW),
>
> DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, 0),
>