On Wed, 13 Nov 2019, Bruce Chang <yu.bruce.ch...@intel.com> wrote:
> below is the call trace when this issue is hit
>
> <3> [113.316247] BUG: sleeping function called from invalid context at 
> mm/page_alloc.c:4653
> <3> [113.318190] in_atomic(): 1, irqs_disabled(): 0, pid: 678, name: 
> debugfs_test
> <4> [113.319900] no locks held by debugfs_test/678.
> <3> [113.321002] Preemption disabled at:
> <4> [113.321130] [<ffffffffa02506d4>] i915_error_object_create+0x494/0x610 
> [i915]
> <4> [113.327259] Call Trace:
> <4> [113.327871] dump_stack+0x67/0x9b
> <4> [113.328683] ___might_sleep+0x167/0x250
> <4> [113.329618] __alloc_pages_nodemask+0x26b/0x1110
> <4> [113.330731] ? ___slab_alloc.constprop.34+0x21c/0x380
> <4> [113.331943] ? ___slab_alloc.constprop.34+0x21c/0x380
> <4> [113.333169] ? __slab_alloc.isra.28.constprop.33+0x4d/0x70
> <4> [113.334614] pool_alloc.constprop.19+0x14/0x60 [i915]
> <4> [113.335951] compress_page+0x7c/0x100 [i915]
> <4> [113.337110] i915_error_object_create+0x4bd/0x610 [i915]
> <4> [113.338515] i915_capture_gpu_state+0x384/0x1680 [i915]
> <4> [113.339771] ? __lock_acquire+0x4ac/0x1e90
> <4> [113.340785] ? _raw_spin_lock_irqsave_nested+0x1/0x50
> <4> [113.342127] i915_gpu_info_open+0x44/0x70 [i915]
> <4> [113.343243] full_proxy_open+0x139/0x1b0
> <4> [113.344196] ? open_proxy_open+0xc0/0xc0
> <4> [113.345149] do_dentry_open+0x1ce/0x3a0
> <4> [113.346084] path_openat+0x4c9/0xac0
> <4> [113.346967] do_filp_open+0x96/0x110
> <4> [113.347848] ? __alloc_fd+0xe0/0x1f0
> <4> [113.348736] ? do_sys_open+0x1b8/0x250
> <4> [113.349647] do_sys_open+0x1b8/0x250
> <4> [113.350526] do_syscall_64+0x55/0x1c0
> <4> [113.351418] entry_SYSCALL_64_after_hwframe+0x49/0xbe
>
> After the io_mapping_map_atomic_wc/kmap_atomic, the kernel enters atomic 
> context
> but after that, compress_page calls pool_alloc with GFP_KERNEL flag which can
> potentially go to sleep. When the kernel is in atomic context, sleeping is not
> allowed. This is why this bug got triggered.
>
> In order to fix this issue, we either
>       1) not enter into atomic context, i.e., to use non atomic version of
>               functions like io_mapping_map_wc/kmap,
> or
>       2) make compress_page run in atomic context.
>
> But it is not a good idea to run slow compression in atomic context, so,
> 1) above is preferred solution which is the implementation of this patch.
>
> Signed-off-by: Bruce Chang <yu.bruce.ch...@intel.com>
> Reviewed-by: Brian Welty <brian.we...@intel.com>
> Fixes: 895d8ebeaa924 ("drm/i915: error capture with no ggtt slot")
> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 1f2f266f26af..7118ecb7f144 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1007,67 +1007,67 @@ i915_error_object_create(struct drm_i915_private 
> *i915,
>       compress->wc = i915_gem_object_is_lmem(vma->obj) ||
>                      drm_mm_node_allocated(&ggtt->error_capture);
>  
>       ret = -EINVAL;
>       if (drm_mm_node_allocated(&ggtt->error_capture)) {
>               void __iomem *s;
>               dma_addr_t dma;
>  
>               for_each_sgt_daddr(dma, iter, vma->pages) {
>                       ggtt->vm.insert_page(&ggtt->vm, dma, slot,
>                                            I915_CACHE_NONE, 0);
>  
>                       s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE);
>                       ret = compress_page(compress, (void  __force *)s, dst);
>                       io_mapping_unmap(s);
>                       if (ret)
>                               break;
>               }
>       } else if (i915_gem_object_is_lmem(vma->obj)) {
>               struct intel_memory_region *mem = vma->obj->mm.region;
>               dma_addr_t dma;
>  
>               for_each_sgt_daddr(dma, iter, vma->pages) {
>                       void __iomem *s;
>  
> -                     s = io_mapping_map_atomic_wc(&mem->iomap, dma);
> +                     s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE);
>                       ret = compress_page(compress, (void __force *)s, dst);
> -                     io_mapping_unmap_atomic(s);
> +                     io_mapping_unmap(s);
>                       if (ret)
>                               break;
>               }
>       } else {
>               struct page *page;
>  
>               for_each_sgt_page(page, iter, vma->pages) {
>                       void *s;
>  
>                       drm_clflush_pages(&page, 1);
>  
> -                     s = kmap_atomic(page);
> +                     s = kmap(page);
>                       ret = compress_page(compress, s, dst);
> -                     kunmap_atomic(s);
> +                     kunmap(s);
>  
>                       drm_clflush_pages(&page, 1);
>  
>                       if (ret)
>                               break;
>               }
>       }
>  
>       if (ret || compress_flush(compress, dst)) {
>               while (dst->page_count--)
>                       pool_free(&compress->pool, dst->pages[dst->page_count]);
>               kfree(dst);
>               dst = NULL;
>       }
>       compress_finish(compress);
>  
>       return dst;
>  }
>  
>  /*
>   * Generate a semi-unique error code. The code is not meant to have meaning, 
> The
>   * code's only purpose is to try to prevent false duplicated bug reports by
>   * grossly estimating a GPU error state.
>   *
>   * TODO Ideally, hashing the batchbuffer would be a very nice way to 
> determine

For future reference, please just use the default patch context size.

BR,
Jani.


-- 
Jani Nikula, Intel Open Source Graphics Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to