On Mon, Apr 21, 2025 at 12:09:15PM +0530, Ghimiray, Himal Prasad wrote: > > > On 17-04-2025 09:43, Matthew Brost wrote: > > Mixing GPU and CPU atomics does not work unless a strict migration > > policy of GPU atomics must be device memory. Enforce a policy of must be > > in VRAM with a retry loop of 2 attempts, if retry loop fails abort > > fault. > > > > v2: > > - Only retry migration on atomics > > - Drop alway migrate modparam > > > > Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimi...@intel.com> > > Signed-off-by: Matthew Brost <matthew.br...@intel.com> > > --- > > drivers/gpu/drm/xe/xe_module.c | 3 -- > > drivers/gpu/drm/xe/xe_module.h | 1 - > > drivers/gpu/drm/xe/xe_svm.c | 57 ++++++++++++++++++++++++++-------- > > drivers/gpu/drm/xe/xe_svm.h | 5 --- > > 4 files changed, 44 insertions(+), 22 deletions(-) > > > > diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c > > index 05c7d0ae6d83..1c4dfafbcd0b 100644 > > --- a/drivers/gpu/drm/xe/xe_module.c > > +++ b/drivers/gpu/drm/xe/xe_module.c > > @@ -33,9 +33,6 @@ struct xe_modparam xe_modparam = { > > module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, > > uint, 0600); > > MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), > > must be power of 2"); > > -module_param_named(always_migrate_to_vram, > > xe_modparam.always_migrate_to_vram, bool, 0444); > > -MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on GPU > > fault"); > > - > > module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, > > bool, 0444); > > MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); > > diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h > > index 84339e509c80..5a3bfea8b7b4 100644 > > --- a/drivers/gpu/drm/xe/xe_module.h > > +++ b/drivers/gpu/drm/xe/xe_module.h > > @@ -12,7 +12,6 @@ > > struct xe_modparam { > > bool force_execlist; > > bool probe_display; > > - bool always_migrate_to_vram; > > u32 force_vram_bar_size; > > int guc_log_level; > > char *guc_firmware_path; > > diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c > > index 56b18a293bbc..1cc41ce7b684 100644 > > --- a/drivers/gpu/drm/xe/xe_svm.c > > +++ b/drivers/gpu/drm/xe/xe_svm.c > > @@ -726,6 +726,35 @@ static int xe_svm_alloc_vram(struct xe_vm *vm, struct > > xe_tile *tile, > > } > > #endif > > +static bool supports_4K_migration(struct xe_device *xe) > > +{ > > + if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) > > + return false; > > + > > + return true; > > +} > > + > > +static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, > > + struct xe_vma *vma) > > +{ > > + struct xe_vm *vm = range_to_vm(&range->base); > > + u64 range_size = xe_svm_range_size(range); > > + > > + if (!range->base.flags.migrate_devmem) > > + return false; > > + > > + if (xe_svm_range_in_vram(range)) { > > + drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); > > + return false; > > + } > > + > > + if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { > > + drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range > > migration\n"); > > + return false; > > + } > > + > > + return true; > > +} > > /** > > * xe_svm_handle_pagefault() - SVM handle page fault > > @@ -750,12 +779,14 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct > > xe_vma *vma, > > IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), > > .check_pages_threshold = IS_DGFX(vm->xe) && > > IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, > > + .vram_only = atomic, > > atomic && is_dgfx.
Yes, indeed. Matt > > }; > > struct xe_svm_range *range; > > struct drm_gpusvm_range *r; > > struct drm_exec exec; > > struct dma_fence *fence; > > struct xe_tile *tile = gt_to_tile(gt); > > + int migrate_try_count = atomic ? 3 : 1; > > ktime_t end = 0; > > int err; > > @@ -782,18 +813,21 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct > > xe_vma *vma, > > range_debug(range, "PAGE FAULT"); > > - /* XXX: Add migration policy, for now migrate range once */ > > - if (!range->skip_migrate && range->base.flags.migrate_devmem && > > - xe_svm_range_size(range) >= SZ_64K) { > > - range->skip_migrate = true; > > - > > + if (--migrate_try_count >= 0 && > > + xe_svm_range_needs_migrate_to_vram(range, vma)) { > > err = xe_svm_alloc_vram(vm, tile, range, &ctx); > > if (err) { > > - drm_dbg(&vm->xe->drm, > > - "VRAM allocation failed, falling back to " > > - "retrying fault, asid=%u, errno=%pe\n", > > - vm->usm.asid, ERR_PTR(err)); > > - goto retry; > > + if (migrate_try_count || !ctx.vram_only) { > > + drm_dbg(&vm->xe->drm, > > + "VRAM allocation failed, falling back > > to retrying fault, asid=%u, errno=%pe\n", > > + vm->usm.asid, ERR_PTR(err)); > > + goto retry; > > + } else { > > + drm_err(&vm->xe->drm, > > + "VRAM allocation failed, retry count > > exceeded, asid=%u, errno=%pe\n", > > + vm->usm.asid, ERR_PTR(err)); > > + return err; > > + } > > } > > } > > @@ -843,9 +877,6 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct > > xe_vma *vma, > > } > > drm_exec_fini(&exec); > > - if (xe_modparam.always_migrate_to_vram) > > - range->skip_migrate = false; > > - > > dma_fence_wait(fence, false); > > dma_fence_put(fence); > > diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h > > index 3d441eb1f7ea..0e1f376a7471 100644 > > --- a/drivers/gpu/drm/xe/xe_svm.h > > +++ b/drivers/gpu/drm/xe/xe_svm.h > > @@ -39,11 +39,6 @@ struct xe_svm_range { > > * range. Protected by GPU SVM notifier lock. > > */ > > u8 tile_invalidated; > > - /** > > - * @skip_migrate: Skip migration to VRAM, protected by GPU fault handler > > - * locking. > > - */ > > - u8 skip_migrate :1; > > }; > > /** >