[Intel-gfx] [CI 3/3] drm/i915/gtt: Free unused lower-level page tables

2016-10-14 Thread Michał Winiarski
Since "Dynamic page table allocations" were introduced, our page tables
can grow (being dynamically allocated) with address space range usage.
Unfortunately, their lifetime is bound to vm. This is not a huge problem
when we're not using softpin - drm_mm is creating an upper bound on used
range by causing addresses for our VMAs to eventually be reused.

With softpin, long lived contexts can drain the system out of memory
even with a single "small" object. For example:

bo = bo_alloc(size);
while(true)
offset += size;
exec(bo, offset);

Will cause us to create new allocations until all memory in the system
is used for tracking GPU pages (even though almost all PTEs in this vm
are pointing to scratch).

Let's free unused page tables in clear_range to prevent this - if no
entries are used, we can safely free it and return this information to
the caller (so that higher-level entry is pointing to scratch).

v2: Document return value and free semantics (Joonas)
v3: No newlines in vars block (Joonas)
v4: Drop redundant local 'reduce' variable
v5: Handle CI fail with enable_ppgtt=2

Cc: Michel Thierry 
Cc: Mika Kuoppala 
Reviewed-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
Signed-off-by: Michał Winiarski 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 78 +
 1 file changed, 70 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c284d8d..f4c80bc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -704,13 +704,14 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
return gen8_write_pdp(req, 0, px_dma(>pml4));
 }
 
-static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
+/* Removes entries from a single page table, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries */
+static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
struct i915_page_table *pt,
uint64_t start,
uint64_t length)
 {
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
unsigned int pte_start = gen8_pte_index(start);
unsigned int num_entries = gen8_pte_count(start, length);
uint64_t pte;
@@ -719,63 +720,124 @@ static void gen8_ppgtt_clear_pt(struct 
i915_address_space *vm,
 I915_CACHE_LLC);
 
if (WARN_ON(!px_page(pt)))
-   return;
+   return false;
 
bitmap_clear(pt->used_ptes, pte_start, num_entries);
 
+   if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
+   free_pt(vm->dev, pt);
+   return true;
+   }
+
pt_vaddr = kmap_px(pt);
 
for (pte = pte_start; pte < num_entries; pte++)
pt_vaddr[pte] = scratch_pte;
 
kunmap_px(ppgtt, pt_vaddr);
+
+   return false;
 }
 
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+/* Removes entries from a single page dir, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
struct i915_page_directory *pd,
uint64_t start,
uint64_t length)
 {
+   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_table *pt;
uint64_t pde;
+   gen8_pde_t *pde_vaddr;
+   gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
+I915_CACHE_LLC);
 
gen8_for_each_pde(pt, pd, start, length, pde) {
if (WARN_ON(!pd->page_table[pde]))
break;
 
-   gen8_ppgtt_clear_pt(vm, pt, start, length);
+   if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
+   __clear_bit(pde, pd->used_pdes);
+   pde_vaddr = kmap_px(pd);
+   pde_vaddr[pde] = scratch_pde;
+   kunmap_px(ppgtt, pde_vaddr);
+   }
+   }
+
+   if (bitmap_empty(pd->used_pdes, I915_PDES)) {
+   free_pd(vm->dev, pd);
+   return true;
}
+
+   return false;
 }
 
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+/* Removes entries from a single page dir pointer, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 struct i915_page_directory_pointer *pdp,
 uint64_t start,
 uint64_t length)
 {
+   struct 

[Intel-gfx] [CI 3/3] drm/i915/gtt: Free unused lower-level page tables

2016-10-13 Thread Michał Winiarski
Since "Dynamic page table allocations" were introduced, our page tables
can grow (being dynamically allocated) with address space range usage.
Unfortunately, their lifetime is bound to vm. This is not a huge problem
when we're not using softpin - drm_mm is creating an upper bound on used
range by causing addresses for our VMAs to eventually be reused.

With softpin, long lived contexts can drain the system out of memory
even with a single "small" object. For example:

bo = bo_alloc(size);
while(true)
offset += size;
exec(bo, offset);

Will cause us to create new allocations until all memory in the system
is used for tracking GPU pages (even though almost all PTEs in this vm
are pointing to scratch).

Let's free unused page tables in clear_range to prevent this - if no
entries are used, we can safely free it and return this information to
the caller (so that higher-level entry is pointing to scratch).

v2: Document return value and free semantics (Joonas)
v3: No newlines in vars block (Joonas)
v4: Drop redundant local 'reduce' variable
v5: Handle CI fail with enable_ppgtt=2

Cc: Michel Thierry 
Cc: Mika Kuoppala 
Reviewed-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
Signed-off-by: Michał Winiarski 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 78 +
 1 file changed, 70 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c284d8d..f4c80bc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -704,13 +704,14 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
return gen8_write_pdp(req, 0, px_dma(>pml4));
 }
 
-static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
+/* Removes entries from a single page table, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries */
+static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
struct i915_page_table *pt,
uint64_t start,
uint64_t length)
 {
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
unsigned int pte_start = gen8_pte_index(start);
unsigned int num_entries = gen8_pte_count(start, length);
uint64_t pte;
@@ -719,63 +720,124 @@ static void gen8_ppgtt_clear_pt(struct 
i915_address_space *vm,
 I915_CACHE_LLC);
 
if (WARN_ON(!px_page(pt)))
-   return;
+   return false;
 
bitmap_clear(pt->used_ptes, pte_start, num_entries);
 
+   if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
+   free_pt(vm->dev, pt);
+   return true;
+   }
+
pt_vaddr = kmap_px(pt);
 
for (pte = pte_start; pte < num_entries; pte++)
pt_vaddr[pte] = scratch_pte;
 
kunmap_px(ppgtt, pt_vaddr);
+
+   return false;
 }
 
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+/* Removes entries from a single page dir, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
struct i915_page_directory *pd,
uint64_t start,
uint64_t length)
 {
+   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_table *pt;
uint64_t pde;
+   gen8_pde_t *pde_vaddr;
+   gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
+I915_CACHE_LLC);
 
gen8_for_each_pde(pt, pd, start, length, pde) {
if (WARN_ON(!pd->page_table[pde]))
break;
 
-   gen8_ppgtt_clear_pt(vm, pt, start, length);
+   if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
+   __clear_bit(pde, pd->used_pdes);
+   pde_vaddr = kmap_px(pd);
+   pde_vaddr[pde] = scratch_pde;
+   kunmap_px(ppgtt, pde_vaddr);
+   }
+   }
+
+   if (bitmap_empty(pd->used_pdes, I915_PDES)) {
+   free_pd(vm->dev, pd);
+   return true;
}
+
+   return false;
 }
 
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+/* Removes entries from a single page dir pointer, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 struct i915_page_directory_pointer *pdp,
 uint64_t start,
 uint64_t length)
 {
+   struct 

Re: [Intel-gfx] [CI 3/3] drm/i915/gtt: Free unused lower-level page tables

2016-10-12 Thread Chris Wilson
On Wed, Oct 12, 2016 at 06:47:38PM +0200, Michał Winiarski wrote:
> +static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>   struct i915_page_directory *pd,
>   uint64_t start,
>   uint64_t length)
>  {
> + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>   struct i915_page_table *pt;
>   uint64_t pde;
> + gen8_pde_t *pde_vaddr;
> + gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
> +  I915_CACHE_LLC);
> + bool reduce;
>  
>   gen8_for_each_pde(pt, pd, start, length, pde) {
>   if (WARN_ON(!pd->page_table[pde]))
>   break;
>  
> - gen8_ppgtt_clear_pt(vm, pt, start, length);
> + reduce = gen8_ppgtt_clear_pt(vm, pt, start, length);
> +
> + if (reduce) {
> + __clear_bit(pde, pd->used_pdes);
> + pde_vaddr = kmap_px(pd);
> + pde_vaddr[pde] = scratch_pde;
> + kunmap_px(ppgtt, pde_vaddr);

On !48b (bsw), the pd is only partially set up, it is not backed by a
page, but i915_page_dir_dma_addr() converts the empty bit into the
scratch address instead.
Obnoxiously this requires another if (USES_FULL_48BIT_PPGTT(0)) {}
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [CI 3/3] drm/i915/gtt: Free unused lower-level page tables

2016-10-12 Thread Michał Winiarski
Since "Dynamic page table allocations" were introduced, our page tables
can grow (being dynamically allocated) with address space range usage.
Unfortunately, their lifetime is bound to vm. This is not a huge problem
when we're not using softpin - drm_mm is creating an upper bound on used
range by causing addresses for our VMAs to eventually be reused.

With softpin, long lived contexts can drain the system out of memory
even with a single "small" object. For example:

bo = bo_alloc(size);
while(true)
offset += size;
exec(bo, offset);

Will cause us to create new allocations until all memory in the system
is used for tracking GPU pages (even though almost all PTEs in this vm
are pointing to scratch).

Let's free unused page tables in clear_range to prevent this - if no
entries are used, we can safely free it and return this information to
the caller (so that higher-level entry is pointing to scratch).

v2: Document return value and free semantics (Joonas)
v3: No newlines in vars block (Joonas)

Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Michel Thierry 
Cc: Mika Kuoppala 
Signed-off-by: Michał Winiarski 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 84 +
 1 file changed, 76 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c284d8d..e733657 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -704,13 +704,14 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
return gen8_write_pdp(req, 0, px_dma(>pml4));
 }
 
-static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
+/* Removes entries from a single page table, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries */
+static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
struct i915_page_table *pt,
uint64_t start,
uint64_t length)
 {
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
unsigned int pte_start = gen8_pte_index(start);
unsigned int num_entries = gen8_pte_count(start, length);
uint64_t pte;
@@ -719,63 +720,130 @@ static void gen8_ppgtt_clear_pt(struct 
i915_address_space *vm,
 I915_CACHE_LLC);
 
if (WARN_ON(!px_page(pt)))
-   return;
+   return false;
 
bitmap_clear(pt->used_ptes, pte_start, num_entries);
 
+   if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
+   free_pt(vm->dev, pt);
+   return true;
+   }
+
pt_vaddr = kmap_px(pt);
 
for (pte = pte_start; pte < num_entries; pte++)
pt_vaddr[pte] = scratch_pte;
 
kunmap_px(ppgtt, pt_vaddr);
+
+   return false;
 }
 
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+/* Removes entries from a single page dir, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
struct i915_page_directory *pd,
uint64_t start,
uint64_t length)
 {
+   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_table *pt;
uint64_t pde;
+   gen8_pde_t *pde_vaddr;
+   gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
+I915_CACHE_LLC);
+   bool reduce;
 
gen8_for_each_pde(pt, pd, start, length, pde) {
if (WARN_ON(!pd->page_table[pde]))
break;
 
-   gen8_ppgtt_clear_pt(vm, pt, start, length);
+   reduce = gen8_ppgtt_clear_pt(vm, pt, start, length);
+
+   if (reduce) {
+   __clear_bit(pde, pd->used_pdes);
+   pde_vaddr = kmap_px(pd);
+   pde_vaddr[pde] = scratch_pde;
+   kunmap_px(ppgtt, pde_vaddr);
+   }
+   }
+
+   if (bitmap_empty(pd->used_pdes, I915_PDES)) {
+   free_pd(vm->dev, pd);
+   return true;
}
+
+   return false;
 }
 
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+/* Removes entries from a single page dir pointer, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 struct i915_page_directory_pointer *pdp,
 uint64_t start,
 uint64_t length)
 {
+   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 

[Intel-gfx] [CI 3/3] drm/i915/gtt: Free unused lower-level page tables

2016-10-12 Thread Michał Winiarski
Since "Dynamic page table allocations" were introduced, our page tables
can grow (being dynamically allocated) with address space range usage.
Unfortunately, their lifetime is bound to vm. This is not a huge problem
when we're not using softpin - drm_mm is creating an upper bound on used
range by causing addresses for our VMAs to eventually be reused.

With softpin, long lived contexts can drain the system out of memory
even with a single "small" object. For example:

bo = bo_alloc(size);
while(true)
offset += size;
exec(bo, offset);

Will cause us to create new allocations until all memory in the system
is used for tracking GPU pages (even though almost all PTEs in this vm
are pointing to scratch).

Let's free unused page tables in clear_range to prevent this - if no
entries are used, we can safely free it and return this information to
the caller (so that higher-level entry is pointing to scratch).

v2: Document return value and free semantics (Joonas)
v3: No newlines in vars block (Joonas)

Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Michel Thierry 
Cc: Mika Kuoppala 
Signed-off-by: Michał Winiarski 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 84 +
 1 file changed, 76 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c284d8d..e733657 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -704,13 +704,14 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
return gen8_write_pdp(req, 0, px_dma(>pml4));
 }
 
-static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
+/* Removes entries from a single page table, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries */
+static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
struct i915_page_table *pt,
uint64_t start,
uint64_t length)
 {
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
unsigned int pte_start = gen8_pte_index(start);
unsigned int num_entries = gen8_pte_count(start, length);
uint64_t pte;
@@ -719,63 +720,130 @@ static void gen8_ppgtt_clear_pt(struct 
i915_address_space *vm,
 I915_CACHE_LLC);
 
if (WARN_ON(!px_page(pt)))
-   return;
+   return false;
 
bitmap_clear(pt->used_ptes, pte_start, num_entries);
 
+   if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
+   free_pt(vm->dev, pt);
+   return true;
+   }
+
pt_vaddr = kmap_px(pt);
 
for (pte = pte_start; pte < num_entries; pte++)
pt_vaddr[pte] = scratch_pte;
 
kunmap_px(ppgtt, pt_vaddr);
+
+   return false;
 }
 
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+/* Removes entries from a single page dir, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
struct i915_page_directory *pd,
uint64_t start,
uint64_t length)
 {
+   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_table *pt;
uint64_t pde;
+   gen8_pde_t *pde_vaddr;
+   gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
+I915_CACHE_LLC);
+   bool reduce;
 
gen8_for_each_pde(pt, pd, start, length, pde) {
if (WARN_ON(!pd->page_table[pde]))
break;
 
-   gen8_ppgtt_clear_pt(vm, pt, start, length);
+   reduce = gen8_ppgtt_clear_pt(vm, pt, start, length);
+
+   if (reduce) {
+   __clear_bit(pde, pd->used_pdes);
+   pde_vaddr = kmap_px(pd);
+   pde_vaddr[pde] = scratch_pde;
+   kunmap_px(ppgtt, pde_vaddr);
+   }
+   }
+
+   if (bitmap_empty(pd->used_pdes, I915_PDES)) {
+   free_pd(vm->dev, pd);
+   return true;
}
+
+   return false;
 }
 
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+/* Removes entries from a single page dir pointer, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 struct i915_page_directory_pointer *pdp,
 uint64_t start,
 uint64_t length)
 {
+   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 

[Intel-gfx] [CI 3/3] drm/i915/gtt: Free unused lower-level page tables

2016-10-11 Thread Michał Winiarski
Since "Dynamic page table allocations" were introduced, our page tables
can grow (being dynamically allocated) with address space range usage.
Unfortunately, their lifetime is bound to vm. This is not a huge problem
when we're not using softpin - drm_mm is creating an upper bound on used
range by causing addresses for our VMAs to eventually be reused.

With softpin, long lived contexts can drain the system out of memory
even with a single "small" object. For example:

bo = bo_alloc(size);
while(true)
offset += size;
exec(bo, offset);

Will cause us to create new allocations until all memory in the system
is used for tracking GPU pages (even though almost all PTEs in this vm
are pointing to scratch).

Let's free unused page tables in clear_range to prevent this - if no
entries are used, we can safely free it and return this information to
the caller (so that higher-level entry is pointing to scratch).

v2: Document return value and free semantics (Joonas)
v3: No newlines in vars block (Joonas)
v4: Drop redundant local 'reduce' variable

Cc: Michel Thierry 
Cc: Mika Kuoppala 
Reviewed-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
Signed-off-by: Michał Winiarski 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 75 +
 1 file changed, 67 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index adabf58..c58ae96 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -704,13 +704,14 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
return gen8_write_pdp(req, 0, px_dma(>pml4));
 }
 
-static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
+/* Removes entries from a single page table, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries */
+static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
struct i915_page_table *pt,
uint64_t start,
uint64_t length)
 {
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
unsigned int pte_start = gen8_pte_index(start);
unsigned int num_entries = gen8_pte_count(start, length);
uint64_t pte;
@@ -719,63 +720,121 @@ static void gen8_ppgtt_clear_pt(struct 
i915_address_space *vm,
 I915_CACHE_LLC);
 
if (WARN_ON(!px_page(pt)))
-   return;
+   return false;
 
bitmap_clear(pt->used_ptes, pte_start, num_entries);
 
+   if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
+   free_pt(vm->dev, pt);
+   return true;
+   }
+
pt_vaddr = kmap_px(pt);
 
for (pte = pte_start; pte < num_entries; pte++)
pt_vaddr[pte] = scratch_pte;
 
kunmap_px(ppgtt, pt_vaddr);
+
+   return false;
 }
 
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+/* Removes entries from a single page dir, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
struct i915_page_directory *pd,
uint64_t start,
uint64_t length)
 {
+   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_table *pt;
uint64_t pde;
+   gen8_pde_t *pde_vaddr;
+   gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
+I915_CACHE_LLC);
 
gen8_for_each_pde(pt, pd, start, length, pde) {
if (WARN_ON(!pd->page_table[pde]))
break;
 
-   gen8_ppgtt_clear_pt(vm, pt, start, length);
+   if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
+   __clear_bit(pde, pd->used_pdes);
+   pde_vaddr = kmap_px(pd);
+   pde_vaddr[pde] = scratch_pde;
+   kunmap_px(ppgtt, pde_vaddr);
+   }
}
+
+   if (bitmap_empty(pd->used_pdes, I915_PDES)) {
+   free_pd(vm->dev, pd);
+   return true;
+   }
+
+   return false;
 }
 
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+/* Removes entries from a single page dir pointer, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 struct i915_page_directory_pointer *pdp,
 uint64_t start,
 uint64_t length)
 {
+   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);