On Mon, May 31, 2021 at 02:38:34PM +0200, Matthias Pressfreund wrote:
> After applying the diffs you sent on 2021-05-17 and 2021-05-27, I
> booted the new kernel which completed until the login prompt. On
> the way I got this:
>
> ...
> scsibus2 at vscsi0: 256 targets
> softraid0 at root
> scsibus3 at softraid0: 256 targets
> root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b
>
> i915_ggtt_pin called with NULL vma
> WARNING !list_empty(&dev->mode_config.connector_list) failed at
> /usr/src/sys/dev/pci/drm/drm_mode_config.c:487
> [drm] *ERROR* connector DP-2 leaked!
> drm : drm_WARN_ON(d->wake_count)drm : drm_WARN_ON(d->wake_count)Device
> initialization failed (-22)
> WARNING ({ typeof(vblank->enabled) __tmp = *(volatile typeof(vblank->enabled)
> *)&(vblank->enabled); membar_datadep_consumer(); __tmp; }) &&
> drm_core_check_feature(dev, DRIVER_MODESET) failed at
> /usr/src/sys/dev/pci/drm/drm_vblank.c:440
> Automatic boot in progress: starting file system checks.
> /dev/sd0a (5e1040cb2dc494f4.a): file system is clean; not checking
> ...
>
>
> Then I rebooted a few times without problems. Then, this happened:
>
> ...
> scsibus2 at vscsi0: 256 targets
> softraid0 at root
> scsibus3 at softraid0: 256 targets
> root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b
> uvm_fault(0xffffffff8218aa20, 0xb9, 0, 1) -> e
> kernel: page fault trap, code=0
> Stopped at i915_ggtt_pin+0x31: movq 0xb8(%rdi),%r12
> ddb{0}> trace
> i915_ggtt_pin(1,10000,20) at i915_ggtt_pin+0x31
> gen6_ppgtt_pin(ffff80000080f000) at gen6_ppgtt_pin+0x7c
> __intel_context_do_pin(fffffd817adb6d80) at __intel_context_do_pin+0xca
> intel_engines_init(ffff800000104c38) at intel_engines_init+0x4b5
> intel_gt_init(ffff800000104c38) at intel_gt_init+0x130
> i915_gem_init(ffff800000100000) at i915_gem_init+0xa3
> i915_driver_probe(ffff800000100000,ffffffff8207c330) at
> i915_driver_probe+0x7ed
>
> inteldrm_attachhook(ffff800000100000) at inteldrm_attachhook+0x43
> config_process_deferred_mountroot() at config_process_deferred_mountroot+0x6b
I believe something is wrongly writing over the struct with the vma.
Can you drop the previous patches and try this?
Index: sys/dev/pci/drm/i915/i915_vma.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/i915_vma.c,v
retrieving revision 1.5
diff -u -p -r1.5 i915_vma.c
--- sys/dev/pci/drm/i915/i915_vma.c 11 Oct 2020 05:45:33 -0000 1.5
+++ sys/dev/pci/drm/i915/i915_vma.c 1 Jun 2021 13:16:37 -0000
@@ -484,7 +484,8 @@ void __iomem *i915_vma_pin_iomap(struct
err = agp_map_subregion(dev_priv->agph, vma->node.start,
vma->node.size, &vma->bsh);
if (err) {
- err = -err;
+ printf("%s agp_map_subregion failed\n", __func__);
+ err = -ENOMEM;
goto err;
}
ptr = bus_space_vaddr(dev_priv->bst, vma->bsh);
@@ -851,6 +852,13 @@ static int vma_get_pages(struct i915_vma
if (mutex_lock_interruptible(&vma->pages_mutex))
return -EINTR;
+ if (((vaddr_t)vma->obj) < VM_MIN_KERNEL_ADDRESS) {
+ printf("%s bad obj ptr %p\n", __func__, vma->obj);
+ vma->obj = NULL;
+ err = -EINVAL;
+ goto unlock;
+ }
+
if (!atomic_read(&vma->pages_count)) {
if (vma->obj) {
err = i915_gem_object_pin_pages(vma->obj);
@@ -1029,8 +1037,14 @@ static void flush_idle_contexts(struct i
int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
{
- struct i915_address_space *vm = vma->vm;
+ struct i915_address_space *vm;
int err;
+
+ if (((vaddr_t)vma) < VM_MIN_KERNEL_ADDRESS) {
+ printf("%s called with %p vma\n", __func__, vma);
+ return -EINVAL;
+ }
+ vm = vma->vm;
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
Index: sys/dev/pci/drm/i915/gt/gen6_ppgtt.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/gen6_ppgtt.c,v
retrieving revision 1.1
diff -u -p -r1.1 gen6_ppgtt.c
--- sys/dev/pci/drm/i915/gt/gen6_ppgtt.c 8 Jun 2020 04:48:13 -0000
1.1
+++ sys/dev/pci/drm/i915/gt/gen6_ppgtt.c 1 Jun 2021 13:18:33 -0000
@@ -354,8 +354,10 @@ static struct i915_vma *pd_vma_create(st
GEM_BUG_ON(size > ggtt->vm.total);
vma = i915_vma_alloc();
- if (!vma)
+ if (!vma) {
+ printf("%s i915_vma_alloc() returned NULL\n", __func__);
return ERR_PTR(-ENOMEM);
+ }
i915_active_init(&vma->active, NULL, NULL);
@@ -401,8 +403,13 @@ int gen6_ppgtt_pin(struct i915_ppgtt *ba
* size. We allocate at the top of the GTT to avoid fragmentation.
*/
err = 0;
- if (!atomic_read(&ppgtt->pin_count))
+ if (!atomic_read(&ppgtt->pin_count)) {
+ printf("gen6 pad1 0 %x\n", ppgtt->pad1[0]);
+ printf("pad1 4095 %x\n", ppgtt->pad1[4095]);
+ printf("pad2 0 %x\n", ppgtt->pad2[0]);
+ printf("pad2 4095 %x\n", ppgtt->pad2[4095]);
err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
+ }
if (!err)
atomic_inc(&ppgtt->pin_count);
mutex_unlock(&ppgtt->pin_mutex);
Index: sys/dev/pci/drm/i915/gt/gen6_ppgtt.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/gen6_ppgtt.h,v
retrieving revision 1.1
diff -u -p -r1.1 gen6_ppgtt.h
--- sys/dev/pci/drm/i915/gt/gen6_ppgtt.h 8 Jun 2020 04:48:13 -0000
1.1
+++ sys/dev/pci/drm/i915/gt/gen6_ppgtt.h 1 Jun 2021 12:59:09 -0000
@@ -12,7 +12,9 @@ struct gen6_ppgtt {
struct i915_ppgtt base;
struct rwlock flush;
+ unsigned int pad1[PAGE_SIZE];
struct i915_vma *vma;
+ unsigned int pad2[PAGE_SIZE];
gen6_pte_t __iomem *pd_addr;
atomic_t pin_count;
Index: sys/dev/pci/drm/i915/gt/intel_ggtt.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/intel_ggtt.c,v
retrieving revision 1.1
diff -u -p -r1.1 intel_ggtt.c
--- sys/dev/pci/drm/i915/gt/intel_ggtt.c 8 Jun 2020 04:48:13 -0000
1.1
+++ sys/dev/pci/drm/i915/gt/intel_ggtt.c 1 Jun 2021 10:44:43 -0000
@@ -699,8 +699,11 @@ int i915_init_ggtt(struct drm_i915_priva
if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
ret = init_aliasing_ppgtt(&i915->ggtt);
- if (ret)
+ if (ret) {
+ printf("\n%s init_aliasing_ppgtt() returned %d\n",
+ __func__, ret);
cleanup_init_ggtt(&i915->ggtt);
+ }
}
return 0;