Re: [Intel-gfx] [PATCH 2/4] drm/i915/gtt: Recursive ppgtt clear for gen8

2019-07-12 Thread Abdiel Janulgue


On 12/07/2019 14.27, Chris Wilson wrote:
> With an explicit level, we can refactor the separate clear functions
> as a simple recursive function. The additional knowledge of the level
> allows us to spot when we can free an entire subtree at once.
> 
> Signed-off-by: Chris Wilson 
> ---

Reviewed-by: Abdiel Janulgue 

>  drivers/gpu/drm/i915/Kconfig.debug  |  15 +++
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 154 
>  2 files changed, 105 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig.debug 
> b/drivers/gpu/drm/i915/Kconfig.debug
> index 8d922bb4d953..ed8c787058a5 100644
> --- a/drivers/gpu/drm/i915/Kconfig.debug
> +++ b/drivers/gpu/drm/i915/Kconfig.debug
> @@ -94,6 +94,21 @@ config DRM_I915_TRACE_GEM
>  
> If in doubt, say "N".
>  
> +config DRM_I915_TRACE_GTT
> + bool "Insert extra ftrace output from the GTT internals"
> + depends on DRM_I915_DEBUG_GEM
> + select TRACING
> + default n
> + help
> +   Enable additional and verbose debugging output that will spam
> +   ordinary tests, but may be vital for post-mortem debugging when
> +   used with /proc/sys/kernel/ftrace_dump_on_oops
> +
> +   Recommended for driver developers only.
> +
> +   If in doubt, say "N".
> +
> +
>  config DRM_I915_SW_FENCE_DEBUG_OBJECTS
>  bool "Enable additional driver debugging for fence objects"
>  depends on DRM_I915
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 305c65c08a6a..7b2f3188d435 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -46,6 +46,12 @@
>  
>  #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
>  
> +#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
> +#define DBG(...) trace_printk(__VA_ARGS__)
> +#else
> +#define DBG(...)
> +#endif
> +
>  /**
>   * DOC: Global GTT views
>   *
> @@ -796,6 +802,9 @@ release_pd_entry(struct i915_page_directory * const pd,
>  {
>   bool free = false;
>  
> + if (atomic_add_unless(&pt->used, -1, 1))
> + return false;
> +
>   spin_lock(&pd->lock);
>   if (atomic_dec_and_test(&pt->used)) {
>   clear_pd_entry(pd, idx, scratch);
> @@ -927,86 +936,101 @@ static void gen8_ppgtt_cleanup(struct 
> i915_address_space *vm)
>   free_scratch(vm);
>  }
>  
> -/* Removes entries from a single page table, releasing it if it's empty.
> - * Caller can use the return value to update higher-level entries.
> - */
> -static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
> - struct i915_page_table *pt,
> - u64 start, u64 length)
> +static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
> +   struct i915_page_directory * const pd,
> +   u64 start, const u64 end, int lvl)
>  {
> - const unsigned int num_entries = gen8_pte_count(start, length);
> - gen8_pte_t *vaddr;
> + const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
> + unsigned int idx, len;
>  
> - vaddr = kmap_atomic_px(pt);
> - memset64(vaddr + gen8_pte_index(start),
> -  vm->scratch[0].encode,
> -  num_entries);
> - kunmap_atomic(vaddr);
> + len = gen8_pd_range(start, end, lvl--, &idx);
> + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n",
> + __func__, vm, lvl + 1, start, end,
> + idx, len, atomic_read(px_used(pd)));
> + GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
>  
> - GEM_BUG_ON(num_entries > atomic_read(&pt->used));
> + do {
> + struct i915_page_table *pt = pd->entry[idx];
> +
> + if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
> + gen8_pd_contains(start, end, lvl)) {
> + DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } 
> removing pd\n",
> + __func__, vm, lvl + 1, idx, start, end);
> + clear_pd_entry(pd, idx, scratch);
> + __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
> + start += (u64)I915_PDES << gen8_pd_shift(lvl);
> + continue;
> + }
>  
> - atomic_sub(num_entries, &pt->used);
> -}
> + if (lvl) {
> + start = __gen8_ppgtt_clear(vm, as_pd(pt),
> +start, end, lvl);
> + } else {
> + unsigned int count;
> + u64 *vaddr;
>  
> -static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
> - struct i915_page_directory *pd,
> - u64 start, u64 length)
> -{
> - struct i915_page_table *pt;
> - u32 pde;
> + count = gen8_pt_count(start, end);
> + DBG("%s(%p):{ lvl:%d, start

[Intel-gfx] [PATCH 2/4] drm/i915/gtt: Recursive ppgtt clear for gen8

2019-07-12 Thread Chris Wilson
With an explicit level, we can refactor the separate clear functions
as a simple recursive function. The additional knowledge of the level
allows us to spot when we can free an entire subtree at once.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Kconfig.debug  |  15 +++
 drivers/gpu/drm/i915/i915_gem_gtt.c | 154 
 2 files changed, 105 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug 
b/drivers/gpu/drm/i915/Kconfig.debug
index 8d922bb4d953..ed8c787058a5 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -94,6 +94,21 @@ config DRM_I915_TRACE_GEM
 
  If in doubt, say "N".
 
+config DRM_I915_TRACE_GTT
+   bool "Insert extra ftrace output from the GTT internals"
+   depends on DRM_I915_DEBUG_GEM
+   select TRACING
+   default n
+   help
+ Enable additional and verbose debugging output that will spam
+ ordinary tests, but may be vital for post-mortem debugging when
+ used with /proc/sys/kernel/ftrace_dump_on_oops
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+
 config DRM_I915_SW_FENCE_DEBUG_OBJECTS
 bool "Enable additional driver debugging for fence objects"
 depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 305c65c08a6a..7b2f3188d435 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -46,6 +46,12 @@
 
 #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
 
+#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
+#define DBG(...) trace_printk(__VA_ARGS__)
+#else
+#define DBG(...)
+#endif
+
 /**
  * DOC: Global GTT views
  *
@@ -796,6 +802,9 @@ release_pd_entry(struct i915_page_directory * const pd,
 {
bool free = false;
 
+   if (atomic_add_unless(&pt->used, -1, 1))
+   return false;
+
spin_lock(&pd->lock);
if (atomic_dec_and_test(&pt->used)) {
clear_pd_entry(pd, idx, scratch);
@@ -927,86 +936,101 @@ static void gen8_ppgtt_cleanup(struct i915_address_space 
*vm)
free_scratch(vm);
 }
 
-/* Removes entries from a single page table, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries.
- */
-static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
-   struct i915_page_table *pt,
-   u64 start, u64 length)
+static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
+ struct i915_page_directory * const pd,
+ u64 start, const u64 end, int lvl)
 {
-   const unsigned int num_entries = gen8_pte_count(start, length);
-   gen8_pte_t *vaddr;
+   const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+   unsigned int idx, len;
 
-   vaddr = kmap_atomic_px(pt);
-   memset64(vaddr + gen8_pte_index(start),
-vm->scratch[0].encode,
-num_entries);
-   kunmap_atomic(vaddr);
+   len = gen8_pd_range(start, end, lvl--, &idx);
+   DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n",
+   __func__, vm, lvl + 1, start, end,
+   idx, len, atomic_read(px_used(pd)));
+   GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
 
-   GEM_BUG_ON(num_entries > atomic_read(&pt->used));
+   do {
+   struct i915_page_table *pt = pd->entry[idx];
+
+   if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
+   gen8_pd_contains(start, end, lvl)) {
+   DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } 
removing pd\n",
+   __func__, vm, lvl + 1, idx, start, end);
+   clear_pd_entry(pd, idx, scratch);
+   __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
+   start += (u64)I915_PDES << gen8_pd_shift(lvl);
+   continue;
+   }
 
-   atomic_sub(num_entries, &pt->used);
-}
+   if (lvl) {
+   start = __gen8_ppgtt_clear(vm, as_pd(pt),
+  start, end, lvl);
+   } else {
+   unsigned int count;
+   u64 *vaddr;
 
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
-   struct i915_page_directory *pd,
-   u64 start, u64 length)
-{
-   struct i915_page_table *pt;
-   u32 pde;
+   count = gen8_pt_count(start, end);
+   DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, 
len:%d, used:%d} removing pte\n",
+   __func__, vm, lvl, start, end,
+   gen8_pd_index(start, 0), count,
+   at