On 13/11/2025 10:39, Boris Brezillon wrote:
> Move the lock/flush_mem operations around the gpuvm_sm_[un]map() calls
> so we can implement true atomic page updates, where any access in the
> locked range done by the GPU has to wait for the page table updates
> to land before proceeding.
> 
> This is needed for vkQueueBindSparse(), so we can replace the dummy
> page mapped over the entire object by actual BO backed pages in an atomic
> way. But it's also useful to avoid "AS_ACTIVE bit stuck" failures in
> the sm_[un]map() path, leading to gpuvm state inconsistencies.
> 
> v2:
> - Adjust to match the two new preliminary patches
> 
> Signed-off-by: Boris Brezillon <[email protected]>

Looks fine to me.

Reviewed-by: Steven Price <[email protected]>

> ---
>  drivers/gpu/drm/panthor/panthor_mmu.c | 184 +++++++++++++-------------
>  1 file changed, 95 insertions(+), 89 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c 
> b/drivers/gpu/drm/panthor/panthor_mmu.c
> index f109c1588186..21389137a324 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -389,6 +389,15 @@ struct panthor_vm {
>        * flagged as faulty as a result.
>        */
>       bool unhandled_fault;
> +
> +     /** @locked_region: Information about the currently locked region 
> currently. */
> +     struct {
> +             /** @locked_region.start: Start of the locked region. */
> +             u64 start;
> +
> +             /** @locked_region.size: Size of the locked region. */
> +             u64 size;
> +     } locked_region;
>  };
>  
>  /**
> @@ -566,76 +575,9 @@ static u64 pack_region_range(u64 region_start, u64 size)
>       return region_width | region_start;
>  }
>  
> -static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int 
> as_nr,
> -                                   u64 iova, u64 size, u32 op)
> -{
> -     const u32 l2_flush_op = CACHE_CLEAN | CACHE_INV;
> -     u32 lsc_flush_op;
> -     int ret;
> -
> -     lockdep_assert_held(&ptdev->mmu->as.slots_lock);
> -
> -     switch (op) {
> -     case AS_COMMAND_FLUSH_MEM:
> -             lsc_flush_op = CACHE_CLEAN | CACHE_INV;
> -             break;
> -     case AS_COMMAND_FLUSH_PT:
> -             lsc_flush_op = 0;
> -             break;
> -     default:
> -             drm_WARN(&ptdev->base, 1, "Unexpected AS_COMMAND: %d", op);
> -             return -EINVAL;
> -     }
> -
> -     if (as_nr < 0)
> -             return 0;
> -
> -     /*
> -      * If the AS number is greater than zero, then we can be sure
> -      * the device is up and running, so we don't need to explicitly
> -      * power it up
> -      */
> -
> -     /* Lock the region that needs to be updated */
> -     gpu_write64(ptdev, AS_LOCKADDR(as_nr), pack_region_range(iova, size));
> -     ret = as_send_cmd_and_wait(ptdev, as_nr, AS_COMMAND_LOCK);
> -     if (ret)
> -             return ret;
> -
> -     ret = panthor_gpu_flush_caches(ptdev, l2_flush_op, lsc_flush_op, 0);
> -     if (ret)
> -             return ret;
> -
> -     /*
> -      * Explicitly unlock the region as the AS is not unlocked automatically
> -      * at the end of the GPU_CONTROL cache flush command, unlike
> -      * AS_COMMAND_FLUSH_MEM or AS_COMMAND_FLUSH_PT.
> -      */
> -     return as_send_cmd_and_wait(ptdev, as_nr, AS_COMMAND_UNLOCK);
> -}
> -
> -static int mmu_hw_do_operation(struct panthor_vm *vm,
> -                            u64 iova, u64 size, u32 op)
> -{
> -     struct panthor_device *ptdev = vm->ptdev;
> -     int ret;
> -
> -     mutex_lock(&ptdev->mmu->as.slots_lock);
> -     ret = mmu_hw_do_operation_locked(ptdev, vm->as.id, iova, size, op);
> -     mutex_unlock(&ptdev->mmu->as.slots_lock);
> -
> -     return ret;
> -}
> -
>  static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr,
>                                u64 transtab, u64 transcfg, u64 memattr)
>  {
> -     int ret;
> -
> -     ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, 
> AS_COMMAND_FLUSH_MEM);
> -     if (ret)
> -             return ret;
> -
>       gpu_write64(ptdev, AS_TRANSTAB(as_nr), transtab);
>       gpu_write64(ptdev, AS_MEMATTR(as_nr), memattr);
>       gpu_write64(ptdev, AS_TRANSCFG(as_nr), transcfg);
> @@ -647,7 +589,9 @@ static int panthor_mmu_as_disable(struct panthor_device 
> *ptdev, u32 as_nr)
>  {
>       int ret;
>  
> -     ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, 
> AS_COMMAND_FLUSH_MEM);
> +     /* Flush+invalidate RW caches, invalidate RO ones. */
> +     ret = panthor_gpu_flush_caches(ptdev, CACHE_CLEAN | CACHE_INV,
> +                                    CACHE_CLEAN | CACHE_INV, CACHE_INV);
>       if (ret)
>               return ret;
>  
> @@ -729,6 +673,10 @@ int panthor_vm_active(struct panthor_vm *vm)
>       if (refcount_inc_not_zero(&vm->as.active_cnt))
>               goto out_dev_exit;
>  
> +     /* Make sure we don't race with lock/unlock_region() calls
> +      * happening around VM bind operations.
> +      */
> +     mutex_lock(&vm->op_lock);
>       mutex_lock(&ptdev->mmu->as.slots_lock);
>  
>       if (refcount_inc_not_zero(&vm->as.active_cnt))
> @@ -796,6 +744,10 @@ int panthor_vm_active(struct panthor_vm *vm)
>               gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
>       }
>  
> +     /* The VM update is guarded by ::op_lock, which we take at the beginning
> +      * of this function, so we don't expect any locked region here.
> +      */
> +     drm_WARN_ON(&vm->ptdev->base, vm->locked_region.size > 0);
>       ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, 
> vm->memattr);
>  
>  out_make_active:
> @@ -806,6 +758,7 @@ int panthor_vm_active(struct panthor_vm *vm)
>  
>  out_unlock:
>       mutex_unlock(&ptdev->mmu->as.slots_lock);
> +     mutex_unlock(&vm->op_lock);
>  
>  out_dev_exit:
>       drm_dev_exit(cookie);
> @@ -889,30 +842,15 @@ static size_t get_pgsize(u64 addr, size_t size, size_t 
> *count)
>       return SZ_2M;
>  }
>  
> -static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size)
> -{
> -     struct panthor_device *ptdev = vm->ptdev;
> -     int ret = 0, cookie;
> -
> -     if (vm->as.id < 0)
> -             return 0;
> -
> -     /* If the device is unplugged, we just silently skip the flush. */
> -     if (!drm_dev_enter(&ptdev->base, &cookie))
> -             return 0;
> -
> -     ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT);
> -
> -     drm_dev_exit(cookie);
> -     return ret;
> -}
> -
>  static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
>  {
>       struct panthor_device *ptdev = vm->ptdev;
>       struct io_pgtable_ops *ops = vm->pgtbl_ops;
>       u64 offset = 0;
>  
> +     drm_WARN_ON(&ptdev->base,
> +                 (iova < vm->locked_region.start) ||
> +                 (iova + size > vm->locked_region.start + 
> vm->locked_region.size));
>       drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, 
> iova, size);
>  
>       while (offset < size) {
> @@ -926,13 +864,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm 
> *vm, u64 iova, u64 size)
>                               iova + offset + unmapped_sz,
>                               iova + offset + pgsize * pgcount,
>                               iova, iova + size);
> -                     panthor_vm_flush_range(vm, iova, offset + unmapped_sz);
>                       return  -EINVAL;
>               }
>               offset += unmapped_sz;
>       }
>  
> -     return panthor_vm_flush_range(vm, iova, size);
> +     return 0;
>  }
>  
>  static int
> @@ -949,6 +886,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, 
> int prot,
>       if (!size)
>               return 0;
>  
> +     drm_WARN_ON(&ptdev->base,
> +                 (iova < vm->locked_region.start) ||
> +                 (iova + size > vm->locked_region.start + 
> vm->locked_region.size));
> +
>       for_each_sgtable_dma_sg(sgt, sgl, count) {
>               dma_addr_t paddr = sg_dma_address(sgl);
>               size_t len = sg_dma_len(sgl);
> @@ -996,7 +937,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int 
> prot,
>               offset = 0;
>       }
>  
> -     return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
> +     return 0;
>  }
>  
>  static int flags_to_prot(u32 flags)
> @@ -1679,6 +1620,61 @@ static const char *access_type_name(struct 
> panthor_device *ptdev,
>       }
>  }
>  
> +static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64 size)
> +{
> +     struct panthor_device *ptdev = vm->ptdev;
> +     int ret = 0;
> +
> +     mutex_lock(&ptdev->mmu->as.slots_lock);
> +     drm_WARN_ON(&ptdev->base, vm->locked_region.start || 
> vm->locked_region.size);
> +     if (vm->as.id >= 0 && size) {
> +             /* Lock the region that needs to be updated */
> +             gpu_write64(ptdev, AS_LOCKADDR(vm->as.id), 
> pack_region_range(start, size));
> +
> +             /* If the lock succeeded, update the locked_region info. */
> +             ret = as_send_cmd_and_wait(ptdev, vm->as.id, AS_COMMAND_LOCK);
> +     }
> +
> +     if (!ret) {
> +             vm->locked_region.start = start;
> +             vm->locked_region.size = size;
> +     }
> +     mutex_unlock(&ptdev->mmu->as.slots_lock);
> +
> +     return ret;
> +}
> +
> +static void panthor_vm_unlock_region(struct panthor_vm *vm)
> +{
> +     struct panthor_device *ptdev = vm->ptdev;
> +
> +     mutex_lock(&ptdev->mmu->as.slots_lock);
> +     if (vm->as.id >= 0) {
> +             int ret;
> +
> +             /* flush+invalidate RW caches and invalidate RO ones.
> +              * TODO: See if we can use FLUSH_PA_RANGE when the physical
> +              * range is narrow enough and the HW supports it.
> +              */
> +             ret = panthor_gpu_flush_caches(ptdev, CACHE_CLEAN | CACHE_INV,
> +                                            CACHE_CLEAN | CACHE_INV,
> +                                            CACHE_INV);
> +
> +             /* Unlock the region if the flush is effective. */
> +             if (!ret)
> +                     ret = as_send_cmd_and_wait(ptdev, vm->as.id, 
> AS_COMMAND_UNLOCK);
> +
> +             /* If we fail to flush or unlock the region, schedule a GPU 
> reset
> +              * to unblock the situation.
> +              */
> +             if (ret)
> +                     panthor_device_schedule_reset(ptdev);
> +     }
> +     vm->locked_region.start = 0;
> +     vm->locked_region.size = 0;
> +     mutex_unlock(&ptdev->mmu->as.slots_lock);
> +}
> +
>  static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
>  {
>       bool has_unhandled_faults = false;
> @@ -1883,6 +1879,7 @@ static void panthor_vm_free(struct drm_gpuvm *gpuvm)
>       drm_sched_entity_destroy(&vm->entity);
>       drm_sched_fini(&vm->sched);
>  
> +     mutex_lock(&vm->op_lock);
>       mutex_lock(&ptdev->mmu->as.slots_lock);
>       if (vm->as.id >= 0) {
>               int cookie;
> @@ -1897,6 +1894,7 @@ static void panthor_vm_free(struct drm_gpuvm *gpuvm)
>               list_del(&vm->as.lru_node);
>       }
>       mutex_unlock(&ptdev->mmu->as.slots_lock);
> +     mutex_unlock(&vm->op_lock);
>  
>       free_io_pgtable_ops(vm->pgtbl_ops);
>  
> @@ -2206,6 +2204,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct 
> panthor_vm_op_ctx *op,
>  
>       mutex_lock(&vm->op_lock);
>       vm->op_ctx = op;
> +
> +     ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range);
> +     if (ret)
> +             goto out;
> +
>       switch (op_type) {
>       case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: {
>               const struct drm_gpuvm_map_req map_req = {
> @@ -2233,6 +2236,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct 
> panthor_vm_op_ctx *op,
>               break;
>       }
>  
> +     panthor_vm_unlock_region(vm);
> +
> +out:
>       if (ret && flag_vm_unusable_on_failure)
>               vm->unusable = true;
>  

Reply via email to