On Mon, May 31, 2021 at 02:38:34PM +0200, Matthias Pressfreund wrote: > After applying the diffs you sent on 2021-05-17 and 2021-05-27, I > booted the new kernel which completed until the login prompt. On > the way I got this: > > ... > scsibus2 at vscsi0: 256 targets > softraid0 at root > scsibus3 at softraid0: 256 targets > root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b > > i915_ggtt_pin called with NULL vma > WARNING !list_empty(&dev->mode_config.connector_list) failed at > /usr/src/sys/dev/pci/drm/drm_mode_config.c:487 > [drm] *ERROR* connector DP-2 leaked! > drm : drm_WARN_ON(d->wake_count)drm : drm_WARN_ON(d->wake_count)Device > initialization failed (-22) > WARNING ({ typeof(vblank->enabled) __tmp = *(volatile typeof(vblank->enabled) > *)&(vblank->enabled); membar_datadep_consumer(); __tmp; }) && > drm_core_check_feature(dev, DRIVER_MODESET) failed at > /usr/src/sys/dev/pci/drm/drm_vblank.c:440 > Automatic boot in progress: starting file system checks. > /dev/sd0a (5e1040cb2dc494f4.a): file system is clean; not checking > ... > > > Then I rebooted a few times without problems. Then, this happened: > > ... > scsibus2 at vscsi0: 256 targets > softraid0 at root > scsibus3 at softraid0: 256 targets > root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b > uvm_fault(0xffffffff8218aa20, 0xb9, 0, 1) -> e > kernel: page fault trap, code=0 > Stopped at i915_ggtt_pin+0x31: movq 0xb8(%rdi),%r12 > ddb{0}> trace > i915_ggtt_pin(1,10000,20) at i915_ggtt_pin+0x31 > gen6_ppgtt_pin(ffff80000080f000) at gen6_ppgtt_pin+0x7c > __intel_context_do_pin(fffffd817adb6d80) at __intel_context_do_pin+0xca > intel_engines_init(ffff800000104c38) at intel_engines_init+0x4b5 > intel_gt_init(ffff800000104c38) at intel_gt_init+0x130 > i915_gem_init(ffff800000100000) at i915_gem_init+0xa3 > i915_driver_probe(ffff800000100000,ffffffff8207c330) at > i915_driver_probe+0x7ed > > inteldrm_attachhook(ffff800000100000) at inteldrm_attachhook+0x43 > config_process_deferred_mountroot() at config_process_deferred_mountroot+0x6b
I believe something is wrongly writing over the struct with the vma. Can you drop the previous patches and try this? Index: sys/dev/pci/drm/i915/i915_vma.c =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/i915_vma.c,v retrieving revision 1.5 diff -u -p -r1.5 i915_vma.c --- sys/dev/pci/drm/i915/i915_vma.c 11 Oct 2020 05:45:33 -0000 1.5 +++ sys/dev/pci/drm/i915/i915_vma.c 1 Jun 2021 13:16:37 -0000 @@ -484,7 +484,8 @@ void __iomem *i915_vma_pin_iomap(struct err = agp_map_subregion(dev_priv->agph, vma->node.start, vma->node.size, &vma->bsh); if (err) { - err = -err; + printf("%s agp_map_subregion failed\n", __func__); + err = -ENOMEM; goto err; } ptr = bus_space_vaddr(dev_priv->bst, vma->bsh); @@ -851,6 +852,13 @@ static int vma_get_pages(struct i915_vma if (mutex_lock_interruptible(&vma->pages_mutex)) return -EINTR; + if (((vaddr_t)vma->obj) < VM_MIN_KERNEL_ADDRESS) { + printf("%s bad obj ptr %p\n", __func__, vma->obj); + vma->obj = NULL; + err = -EINVAL; + goto unlock; + } + if (!atomic_read(&vma->pages_count)) { if (vma->obj) { err = i915_gem_object_pin_pages(vma->obj); @@ -1029,8 +1037,14 @@ static void flush_idle_contexts(struct i int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags) { - struct i915_address_space *vm = vma->vm; + struct i915_address_space *vm; int err; + + if (((vaddr_t)vma) < VM_MIN_KERNEL_ADDRESS) { + printf("%s called with %p vma\n", __func__, vma); + return -EINVAL; + } + vm = vma->vm; GEM_BUG_ON(!i915_vma_is_ggtt(vma)); Index: sys/dev/pci/drm/i915/gt/gen6_ppgtt.c =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/gen6_ppgtt.c,v retrieving revision 1.1 diff -u -p -r1.1 gen6_ppgtt.c --- sys/dev/pci/drm/i915/gt/gen6_ppgtt.c 8 Jun 2020 04:48:13 -0000 1.1 +++ sys/dev/pci/drm/i915/gt/gen6_ppgtt.c 1 Jun 2021 13:18:33 -0000 @@ -354,8 +354,10 @@ static struct i915_vma *pd_vma_create(st GEM_BUG_ON(size > ggtt->vm.total); vma = i915_vma_alloc(); - if (!vma) + if (!vma) { + printf("%s i915_vma_alloc() returned NULL\n", __func__); return ERR_PTR(-ENOMEM); + } i915_active_init(&vma->active, NULL, NULL); @@ -401,8 +403,13 @@ int gen6_ppgtt_pin(struct i915_ppgtt *ba * size. We allocate at the top of the GTT to avoid fragmentation. */ err = 0; - if (!atomic_read(&ppgtt->pin_count)) + if (!atomic_read(&ppgtt->pin_count)) { + printf("gen6 pad1 0 %x\n", ppgtt->pad1[0]); + printf("pad1 4095 %x\n", ppgtt->pad1[4095]); + printf("pad2 0 %x\n", ppgtt->pad2[0]); + printf("pad2 4095 %x\n", ppgtt->pad2[4095]); err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH); + } if (!err) atomic_inc(&ppgtt->pin_count); mutex_unlock(&ppgtt->pin_mutex); Index: sys/dev/pci/drm/i915/gt/gen6_ppgtt.h =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/gen6_ppgtt.h,v retrieving revision 1.1 diff -u -p -r1.1 gen6_ppgtt.h --- sys/dev/pci/drm/i915/gt/gen6_ppgtt.h 8 Jun 2020 04:48:13 -0000 1.1 +++ sys/dev/pci/drm/i915/gt/gen6_ppgtt.h 1 Jun 2021 12:59:09 -0000 @@ -12,7 +12,9 @@ struct gen6_ppgtt { struct i915_ppgtt base; struct rwlock flush; + unsigned int pad1[PAGE_SIZE]; struct i915_vma *vma; + unsigned int pad2[PAGE_SIZE]; gen6_pte_t __iomem *pd_addr; atomic_t pin_count; Index: sys/dev/pci/drm/i915/gt/intel_ggtt.c =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/intel_ggtt.c,v retrieving revision 1.1 diff -u -p -r1.1 intel_ggtt.c --- sys/dev/pci/drm/i915/gt/intel_ggtt.c 8 Jun 2020 04:48:13 -0000 1.1 +++ sys/dev/pci/drm/i915/gt/intel_ggtt.c 1 Jun 2021 10:44:43 -0000 @@ -699,8 +699,11 @@ int i915_init_ggtt(struct drm_i915_priva if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { ret = init_aliasing_ppgtt(&i915->ggtt); - if (ret) + if (ret) { + printf("\n%s init_aliasing_ppgtt() returned %d\n", + __func__, ret); cleanup_init_ggtt(&i915->ggtt); + } } return 0;