On Wed, May 26, 2021 at 11:50:36AM +1000, Jonathan Gray wrote: > On Wed, May 26, 2021 at 02:51:53AM +0200, Peter N. M. Hansteen wrote: > > Somewhat encouraged by the last few weeks' adventure with ASUS > > laptops (thanks, kettenis@!) I decided to try out an incrementally > > higher range model, the Zenbook S. > > > > The latest amd64 snapshot installer seemed to work fine until > > it encountered the main storage in the unit, which the firmware > > sees as > > > > PCIE bus:0 Dev:E Func:0 > > Device type: NVMe SSD (953.8GB) > > Model name: INTEL SSDOEKNV 010T8 > > > > But shows up in the installer kernel dmesg as only > > > > sd0 at scsibus0 targ 1 lun 0: (Generic, STORAGE DEVICE, 0233), removable. > > that appears to be something else on usb? > > 'scsibus0 at umass0: 2 targets, initiator 0' > from https://www.bsdly.net/~peter/20210525_233623.jpg > > can you provide a dmesg and pcidump -v output? > > I didn't see an image that made it clear which cpu / inteldrm device > this has. > > > > > Installing to a removable USB device was a bit hit and miss, after a > > bit of fiddling with boot -c and disable acpicpu* and disable iwx* in > > response to the errors I got I did manage to get it to display its > > full 3300x2200 in X, but the file system layout on the 16GB device I > > used initially did not have space in /home to store the the entire > > output of sendbug -P from the device. > > > > The following shows the various panics and traces as best I could > > capture them: > > > > https://www.bsdly.net/~peter/20210525_223507.jpg > > https://www.bsdly.net/~peter/20210525_223511.jpg > > https://www.bsdly.net/~peter/20210525_233623.jpg > > https://www.bsdly.net/~peter/20210525_235112.jpg > > kernel: protection fault trap, code=0 > Stopped at pool_do_put+0xc9: movq 0x8(%rcx),%rcx > > > https://www.bsdly.net/~peter/20210525_235137.jpg > > kernel: protection fault trap, code=0 > pool_do_put()+0xc9 movq 0x8(%rcx),%rcx > pool_put()+0x71 > process_zap()+0x14f > dowait4() > sys_wait4() > syscall() > > > https://www.bsdly.net/~peter/20210525_235449.jpg > > panic: pool_do_get: procpl free list modified: ... > db_enter()+0x10 popq %rbp > panic()+0x12a > pool_do_get()+0x321 > pool_get()+0x93 > fork1()+0x1b6 > syscall()+0x3b9 > > > https://www.bsdly.net/~peter/20210525_235502.jpg > > as above with ps showing *init > > > https://www.bsdly.net/~peter/20210525_235717.jpg > > panic: pool_do_get: drmobj free list modified: ... > panic: kernel diagnostic assertion "!_kernel_lock_held()" failed: file > "/usr/src/sys/uvm/uvm_map.c", line 2707 > db_enter()+0x10 popq %rbp > panic()+0x12a > __assert()+0x2b > uvm_map_teardown()+0x23e > uvmspace_free()+0x5a > reaper()+0x14c > > > https://www.bsdly.net/~peter/20210525_235739.jpg > > as above with some ps output > > > https://www.bsdly.net/~peter/20210525_235747.jpg > > ps output > > > https://www.bsdly.net/~peter/20210525_235753.jpg > > ps output > > > https://www.bsdly.net/~peter/20210525_235903.jpg > > kernel: protection fault trap, code=0 > pool_do_put()+0xc9 movq 0x8(%rcx),%rcx > pool_put()+0x71 > process_zap()+0x14f > dowait4()+0x303 > sys_wait4()+0x5a > syscall()+0x3b9 > > > https://www.bsdly.net/~peter/20210525_235914.jpg > > above with ps output > > > https://www.bsdly.net/~peter/20210526_015841.jpg > > uvm_fault() > kernel: page fault trap, code=0 > Stopped at i915_gem_object_pin_map+0x2a: testb $0x3,0(%rax) > > > https://www.bsdly.net/~peter/20210526_015851.jpg > > uvm_fault() > kernel: page fault trap, code=0 > Stopped at i915_gem_object_pin_map+0x2a: testb $0x3,0(%rax) > i915_gem_object_pin_map()+0x2a > __execlists_context_alloc()+0x187 > __intel_context_do_pin()+0x15a > i915_gem_do_execbuffer() > i915_gem_execbuffer2_ioctl()+0x1cb > drm_do_ioctl()+0x28c > > > https://www.bsdly.net/~peter/20210526_015901.jpg > > ps output *Xorg > > > https://www.bsdly.net/~peter/20210526_015906.jpg > > ps output > > > https://www.bsdly.net/~peter/20210526_024111.jpg > > uvm_fault() > kernel: page fault trap, code=0 > Stopped at i915_gem_object_pin_map+0x2a: testb $0x3,0(%rax) > i915_gem_object_pin_map()+0x2a > __execlists_context_alloc()+0x187 > __intel_context_do_pin()+0x15a > i915_gem_do_execbuffer()+0x2820 > i915_gem_execbuffer2_ioctl()+0x1cb > drm_do_ioctl()+0x28c
some backported drm commits related to vma/pinning drm/i915/gt: Prevent use of engine->wa_ctx after error drm/i915: Fix mismatch between misplaced vma check and vma insert drm/i915: Hold onto an explicit ref to i915_vma_work.pinned drm/i915: Use the active reference on the vma while capturing diff --git sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c index 971ed84f371..993c2b22f9f 100644 --- sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c +++ sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c @@ -365,7 +365,7 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, return true; if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && - (vma->node.start + vma->node.size - 1) >> 32) + (vma->node.start + vma->node.size + 4095) >> 32) return true; if (flags & __EXEC_OBJECT_NEEDS_MAP && diff --git sys/dev/pci/drm/i915/gt/intel_lrc.c sys/dev/pci/drm/i915/gt/intel_lrc.c index ac8eade748b..9bdb964d14f 100644 --- sys/dev/pci/drm/i915/gt/intel_lrc.c +++ sys/dev/pci/drm/i915/gt/intel_lrc.c @@ -3462,6 +3462,9 @@ err: static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); + + /* Called on error unwind, clear all flags to prevent further use */ + memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx)); } typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); diff --git sys/dev/pci/drm/i915/i915_gpu_error.c sys/dev/pci/drm/i915/i915_gpu_error.c index 9d02829f8df..72e25f3d014 100644 --- sys/dev/pci/drm/i915/i915_gpu_error.c +++ sys/dev/pci/drm/i915/i915_gpu_error.c @@ -1346,7 +1346,7 @@ capture_vma(struct intel_engine_capture_vma *next, } strlcpy(c->name, name, sizeof(c->name)); - c->vma = i915_vma_get(vma); + c->vma = vma; /* reference held while active */ c->next = next; return c; @@ -1456,7 +1456,6 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, compress)); i915_active_release(&vma->active); - i915_vma_put(vma); capture = this->next; kfree(this); diff --git sys/dev/pci/drm/i915/i915_vma.c sys/dev/pci/drm/i915/i915_vma.c index 2bf2292ae31..8aca774266c 100644 --- sys/dev/pci/drm/i915/i915_vma.c +++ sys/dev/pci/drm/i915/i915_vma.c @@ -331,8 +331,10 @@ static void __vma_release(struct dma_fence_work *work) { struct i915_vma_work *vw = container_of(work, typeof(*vw), base); - if (vw->pinned) + if (vw->pinned) { __i915_gem_object_unpin_pages(vw->pinned); + i915_gem_object_put(vw->pinned); + } } static const struct dma_fence_work_ops bind_ops = { @@ -448,7 +450,7 @@ int i915_vma_bind(struct i915_vma *vma, if (vma->obj) { __i915_gem_object_pin_pages(vma->obj); - work->pinned = vma->obj; + work->pinned = i915_gem_object_get(vma->obj); } } else { ret = vma->ops->bind_vma(vma, cache_level, bind_flags);