On Wed, May 26, 2021 at 11:50:36AM +1000, Jonathan Gray wrote:
> On Wed, May 26, 2021 at 02:51:53AM +0200, Peter N. M. Hansteen wrote:
> > Somewhat encouraged by the last few weeks' adventure with ASUS
> > laptops (thanks, kettenis@!) I decided to try out an incrementally
> > higher range model, the Zenbook S.
> > 
> > The latest amd64 snapshot installer seemed to work fine until
> > it encountered the main storage in the unit, which the firmware
> > sees as
> > 
> > PCIE bus:0 Dev:E Func:0
> >  Device type:  NVMe SSD (953.8GB)
> >  Model name:   INTEL SSDOEKNV 010T8
> > 
> > But shows up in the installer kernel dmesg as only
> > 
> > sd0 at scsibus0 targ 1 lun 0: (Generic, STORAGE DEVICE, 0233), removable.
> 
> that appears to be something else on usb?
> 
> 'scsibus0 at umass0: 2 targets, initiator 0'
> from https://www.bsdly.net/~peter/20210525_233623.jpg
> 
> can you provide a dmesg and pcidump -v output?
> 
> I didn't see an image that made it clear which cpu / inteldrm device
> this has.
> 
> > 
> > Installing to a removable USB device was a bit hit and miss, after a
> > bit of fiddling with boot -c and disable acpicpu* and disable iwx* in
> > response to the errors I got I did manage to get it to display its
> > full 3300x2200 in X, but the file system layout on the 16GB device I
> > used initially did not have space in /home to store the the entire
> > output of sendbug -P from the device.
> > 
> > The following shows the various panics and traces as best I could
> > capture them:
> > 
> > https://www.bsdly.net/~peter/20210525_223507.jpg
> > https://www.bsdly.net/~peter/20210525_223511.jpg
> > https://www.bsdly.net/~peter/20210525_233623.jpg
> > https://www.bsdly.net/~peter/20210525_235112.jpg
> 
> kernel: protection fault trap, code=0
> Stopped at pool_do_put+0xc9: movq 0x8(%rcx),%rcx
> 
> > https://www.bsdly.net/~peter/20210525_235137.jpg
> 
> kernel: protection fault trap, code=0
> pool_do_put()+0xc9 movq 0x8(%rcx),%rcx
> pool_put()+0x71
> process_zap()+0x14f
> dowait4()
> sys_wait4()
> syscall()
> 
> > https://www.bsdly.net/~peter/20210525_235449.jpg
> 
> panic: pool_do_get: procpl free list modified: ...
> db_enter()+0x10 popq %rbp
> panic()+0x12a
> pool_do_get()+0x321
> pool_get()+0x93
> fork1()+0x1b6
> syscall()+0x3b9
> 
> > https://www.bsdly.net/~peter/20210525_235502.jpg
> 
> as above with ps showing *init
> 
> > https://www.bsdly.net/~peter/20210525_235717.jpg
> 
> panic: pool_do_get: drmobj free list modified: ...
> panic: kernel diagnostic assertion "!_kernel_lock_held()" failed: file 
> "/usr/src/sys/uvm/uvm_map.c", line 2707
> db_enter()+0x10 popq %rbp
> panic()+0x12a
> __assert()+0x2b
> uvm_map_teardown()+0x23e
> uvmspace_free()+0x5a
> reaper()+0x14c
> 
> > https://www.bsdly.net/~peter/20210525_235739.jpg
> 
> as above with some ps output
> 
> > https://www.bsdly.net/~peter/20210525_235747.jpg
> 
> ps output
> 
> > https://www.bsdly.net/~peter/20210525_235753.jpg
> 
> ps output
> 
> > https://www.bsdly.net/~peter/20210525_235903.jpg
> 
> kernel: protection fault trap, code=0
> pool_do_put()+0xc9 movq 0x8(%rcx),%rcx
> pool_put()+0x71
> process_zap()+0x14f
> dowait4()+0x303
> sys_wait4()+0x5a
> syscall()+0x3b9
> 
> > https://www.bsdly.net/~peter/20210525_235914.jpg
> 
> above with ps output
> 
> > https://www.bsdly.net/~peter/20210526_015841.jpg
> 
> uvm_fault()
> kernel: page fault trap, code=0
> Stopped at i915_gem_object_pin_map+0x2a: testb $0x3,0(%rax)
> 
> > https://www.bsdly.net/~peter/20210526_015851.jpg
> 
> uvm_fault()
> kernel: page fault trap, code=0
> Stopped at i915_gem_object_pin_map+0x2a: testb $0x3,0(%rax)
> i915_gem_object_pin_map()+0x2a
> __execlists_context_alloc()+0x187
> __intel_context_do_pin()+0x15a
> i915_gem_do_execbuffer()
> i915_gem_execbuffer2_ioctl()+0x1cb
> drm_do_ioctl()+0x28c
> 
> > https://www.bsdly.net/~peter/20210526_015901.jpg
> 
> ps output *Xorg
> 
> > https://www.bsdly.net/~peter/20210526_015906.jpg
> 
> ps output
> 
> > https://www.bsdly.net/~peter/20210526_024111.jpg
> 
> uvm_fault()
> kernel: page fault trap, code=0
> Stopped at i915_gem_object_pin_map+0x2a: testb $0x3,0(%rax)
> i915_gem_object_pin_map()+0x2a
> __execlists_context_alloc()+0x187
> __intel_context_do_pin()+0x15a
> i915_gem_do_execbuffer()+0x2820
> i915_gem_execbuffer2_ioctl()+0x1cb
> drm_do_ioctl()+0x28c

some backported drm commits related to vma/pinning

drm/i915/gt: Prevent use of engine->wa_ctx after error
drm/i915: Fix mismatch between misplaced vma check and vma insert
drm/i915: Hold onto an explicit ref to i915_vma_work.pinned
drm/i915: Use the active reference on the vma while capturing

diff --git sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c 
sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c
index 971ed84f371..993c2b22f9f 100644
--- sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c
+++ sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c
@@ -365,7 +365,7 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 
*entry,
                return true;
 
        if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
-           (vma->node.start + vma->node.size - 1) >> 32)
+           (vma->node.start + vma->node.size + 4095) >> 32)
                return true;
 
        if (flags & __EXEC_OBJECT_NEEDS_MAP &&
diff --git sys/dev/pci/drm/i915/gt/intel_lrc.c 
sys/dev/pci/drm/i915/gt/intel_lrc.c
index ac8eade748b..9bdb964d14f 100644
--- sys/dev/pci/drm/i915/gt/intel_lrc.c
+++ sys/dev/pci/drm/i915/gt/intel_lrc.c
@@ -3462,6 +3462,9 @@ err:
 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
 {
        i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
+
+       /* Called on error unwind, clear all flags to prevent further use */
+       memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx));
 }
 
 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
diff --git sys/dev/pci/drm/i915/i915_gpu_error.c 
sys/dev/pci/drm/i915/i915_gpu_error.c
index 9d02829f8df..72e25f3d014 100644
--- sys/dev/pci/drm/i915/i915_gpu_error.c
+++ sys/dev/pci/drm/i915/i915_gpu_error.c
@@ -1346,7 +1346,7 @@ capture_vma(struct intel_engine_capture_vma *next,
        }
 
        strlcpy(c->name, name, sizeof(c->name));
-       c->vma = i915_vma_get(vma);
+       c->vma = vma; /* reference held while active */
 
        c->next = next;
        return c;
@@ -1456,7 +1456,6 @@ intel_engine_coredump_add_vma(struct 
intel_engine_coredump *ee,
                                                 compress));
 
                i915_active_release(&vma->active);
-               i915_vma_put(vma);
 
                capture = this->next;
                kfree(this);
diff --git sys/dev/pci/drm/i915/i915_vma.c sys/dev/pci/drm/i915/i915_vma.c
index 2bf2292ae31..8aca774266c 100644
--- sys/dev/pci/drm/i915/i915_vma.c
+++ sys/dev/pci/drm/i915/i915_vma.c
@@ -331,8 +331,10 @@ static void __vma_release(struct dma_fence_work *work)
 {
        struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
 
-       if (vw->pinned)
+       if (vw->pinned) {
                __i915_gem_object_unpin_pages(vw->pinned);
+               i915_gem_object_put(vw->pinned);
+       }
 }
 
 static const struct dma_fence_work_ops bind_ops = {
@@ -448,7 +450,7 @@ int i915_vma_bind(struct i915_vma *vma,
 
                if (vma->obj) {
                        __i915_gem_object_pin_pages(vma->obj);
-                       work->pinned = vma->obj;
+                       work->pinned = i915_gem_object_get(vma->obj);
                }
        } else {
                ret = vma->ops->bind_vma(vma, cache_level, bind_flags);

Reply via email to