On Tue, 13 Feb 2024, Umesh Nerlige Ramappa <[email protected]> 
wrote:
> Once a user opens an fd for a perf event, if the driver undergoes a
> function level reset (FLR), the resources are not cleaned up as
> expected. For this discussion FLR is defined as a PCI unbind followed by
> a bind. perf_pmu_unregister() would cleanup everything, but when the user
> closes the perf fd, perf_release is executed and we encounter null
> pointer dereferences and/or list corruption in that path which require a
> reboot to recover.
>
> The only approach that worked to resolve this was to close the file
> associated with the event such that the relevant cleanup happens w.r.t.
> the open file. To do so, use the event->owner task and find the file
> relevant to the event and close it. This relies on the
> file->private_data matching the event object.
>
> Note:
> - Closing the event file is a delayed work that gets queued to system_wq.
> The close is seen to happen when kernel returns to user space following
> the unbind.
>
> - perf framework will access the pmu object after the last event has
> been destroyed. The drm device is refcounted in the init and destroy
> hooks, so this causes a use after free if we are releasing the drm
> device reference after unbind has been called. To work around this, we
> take an extra reference in the unbind path and release it using a
> delayed work in the destroy patch. The delayed work is queued to
> system_wq.
>
> Ref: 
> https://lore.kernel.org/lkml/[email protected]/T/#me72abfa2771e6fc94b167ce47efdbf391cc313ab
>
> Opens:
> - Synchronization may be needed between i915_pmu_unregister and
> i915_pmu_event_destroy to avoid any races.
>
> - If unbind and bind happen from the same process the event fd is closed
> after bind completes. This means that the cleanup would not happen
> until bind completes. In this case, i915 loads fine, but pmu
> registration fails with an error that the sysfs entries are already
> present. There is no solution feasible here. Since this is not a fatal
> error (reloading i915 works fine) and the usual case is to have bind and
> unbind in separate processes, there is no intention to solve this.
>
> Other solutions/aspects tried:
> - Call perf_event_disable() followed by perf_event_release_kernel() in
> the unbind path to clean up the events. This still causes issues when
> user closes the fd since perf_event_release_kernel() is called again and
> fails requiring reboot.
>
> - Close all event fds in unbind and wait for the close to complete by
> checking if list is empty. This wait does not work since the files
> are actually closed when unbind returns to user space.
>
> Testing:
> - New IGT tests have been added for this and are run with KASAN and
>   kmemleak enabled.
>
> Signed-off-by: Umesh Nerlige Ramappa <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_pmu.c | 96 ++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_pmu.h | 15 ++++++
>  2 files changed, 110 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 4d2a289f848a..2f365c7f5db7 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -4,6 +4,8 @@
>   * Copyright © 2017-2018 Intel Corporation
>   */
>  
> +#include <linux/fdtable.h>
> +#include <linux/fs.h>
>  #include <linux/pm_runtime.h>
>  
>  #include "gt/intel_engine.h"
> @@ -573,9 +575,21 @@ static void i915_pmu_event_destroy(struct perf_event 
> *event)
>  {
>       struct i915_pmu *pmu = event_to_pmu(event);
>       struct drm_i915_private *i915 = pmu_to_i915(pmu);
> +     struct i915_event *e = event->pmu_private;
>  
>       drm_WARN_ON(&i915->drm, event->parent);
>  
> +     if (e) {
> +             event->pmu_private = NULL;
> +             list_del(&e->link);
> +             kfree(e);
> +     }
> +
> +     if (i915->pmu.closed && list_empty(&i915->pmu.initialized_events)) {
> +             pmu_teardown(&i915->pmu);
> +             mod_delayed_work(system_wq, &i915->pmu.work, 50);
> +     }
> +
>       drm_dev_put(&i915->drm);
>  }
>  
> @@ -684,6 +698,14 @@ static int i915_pmu_event_init(struct perf_event *event)
>               return ret;
>  
>       if (!event->parent) {
> +             struct i915_event *e = kzalloc(sizeof(*e), GFP_KERNEL);
> +
> +             if (!e)
> +                     return -ENOMEM;
> +
> +             e->event = event;
> +             list_add(&e->link, &pmu->initialized_events);
> +             event->pmu_private = e;
>               drm_dev_get(&i915->drm);
>               event->destroy = i915_pmu_event_destroy;
>       }
> @@ -1256,6 +1278,14 @@ void i915_pmu_exit(void)
>               cpuhp_remove_multi_state(cpuhp_slot);
>  }
>  
> +static void i915_pmu_release(struct work_struct *work)
> +{
> +     struct i915_pmu *pmu = container_of(work, typeof(*pmu), work.work);
> +     struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
> +
> +     drm_dev_put(&i915->drm);
> +}
> +
>  void i915_pmu_register(struct drm_i915_private *i915)
>  {
>       struct i915_pmu *pmu = &i915->pmu;
> @@ -1313,6 +1343,9 @@ void i915_pmu_register(struct drm_i915_private *i915)
>       pmu->base.read          = i915_pmu_event_read;
>       pmu->base.event_idx     = i915_pmu_event_event_idx;
>  
> +     INIT_LIST_HEAD(&pmu->initialized_events);
> +     INIT_DELAYED_WORK(&pmu->work, i915_pmu_release);
> +
>       ret = perf_pmu_register(&pmu->base, pmu->name, -1);
>       if (ret)
>               goto err_groups;
> @@ -1337,6 +1370,64 @@ void i915_pmu_register(struct drm_i915_private *i915)
>       drm_notice(&i915->drm, "Failed to register PMU!\n");
>  }
>  
> +/* Ref: close_fd() */
> +static unsigned int __open_files(struct fdtable *fdt)
> +{
> +     unsigned int size = fdt->max_fds;
> +     unsigned int i;
> +
> +     for (i = size / BITS_PER_LONG; i > 0; ) {
> +             if (fdt->open_fds[--i])
> +                     break;
> +     }
> +     return (i + 1) * BITS_PER_LONG;
> +}
> +
> +static void close_event_file(struct perf_event *event)
> +{
> +     unsigned int max_open_fds, fd;
> +     struct files_struct *files;
> +     struct task_struct *task;
> +     struct fdtable *fdt;
> +
> +     task = event->owner;
> +     if (!task)
> +             return;
> +
> +     files = task->files;
> +     if (!files)
> +             return;
> +
> +     spin_lock(&files->file_lock);
> +     fdt = files_fdtable(files);
> +     max_open_fds = __open_files(fdt);
> +     for (fd = 0; fd < max_open_fds; fd++) {
> +             struct file *file = fdt->fd[fd];
> +
> +             if (!file || file->private_data != event)
> +                     continue;
> +
> +             rcu_assign_pointer(fdt->fd[fd], NULL);
> +             __clear_bit(fd, fdt->open_fds);
> +             __clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
> +             if (fd < files->next_fd)
> +                     files->next_fd = fd;
> +             filp_close(file, files);
> +             break;
> +     }
> +     spin_unlock(&files->file_lock);
> +}
> +
> +static void cleanup_events(struct i915_pmu *pmu)
> +{
> +     struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
> +     struct i915_event *e, *tmp;
> +
> +     drm_dev_get(&i915->drm);
> +     list_for_each_entry_safe(e, tmp, &pmu->initialized_events, link)
> +             close_event_file(e->event);
> +}
> +
>  void i915_pmu_unregister(struct drm_i915_private *i915)
>  {
>       struct i915_pmu *pmu = &i915->pmu;
> @@ -1354,5 +1445,8 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
>  
>       hrtimer_cancel(&pmu->timer);
>  
> -     pmu_teardown(pmu);
> +     if (list_empty(&pmu->initialized_events))
> +             pmu_teardown(pmu);
> +     else
> +             cleanup_events(pmu);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 41af038c3738..6f62e820f34d 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -55,6 +55,11 @@ struct i915_pmu_sample {
>       u64 cur;
>  };
>  
> +struct i915_event {
> +     struct perf_event *event;
> +     struct list_head link;
> +};
> +

Nobody needs this outside of i915_pmu.c.

>  struct i915_pmu {
>       /**
>        * @cpuhp: Struct used for CPU hotplug handling.
> @@ -152,6 +157,16 @@ struct i915_pmu {
>        * @pmu_attr: Memory block holding device attributes.
>        */
>       void *pmu_attr;
> +
> +     /**
> +      * @initialized_events: List of initialized events
> +      */
> +     struct list_head initialized_events;
> +
> +     /**
> +      * @work: worker to delay release of drm device reference 
> +      */
> +     struct delayed_work work;
>  };
>  
>  #ifdef CONFIG_PERF_EVENTS

-- 
Jani Nikula, Intel

Reply via email to