On each call to gen8_alloc_va_range_3lvl we're allocating temporary
bitmaps needed for error handling. Unfortunately, when we increase
address space size (48b ppgtt) we do additional (512 - 4) calls to
kcalloc, increasing latency between exec and actual start of execution
on the GPU. Let's just do a single kcalloc, we can also drop the size
from free_gen8_temp_bitmaps since it's no longer used.

v2: Use GFP_TEMPORARY to make the allocations reclaimable.
v3: Drop the 2D array, just allocate a single block.

Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
Cc: Michel Thierry <michel.thie...@intel.com>
Signed-off-by: Michał Winiarski <michal.winiar...@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 45 +++++++++++++------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4a76807..f810762 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1126,13 +1126,8 @@ unwind_out:
 }
 
 static void
-free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts,
-                      uint32_t pdpes)
+free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
 {
-       int i;
-
-       for (i = 0; i < pdpes; i++)
-               kfree(new_pts[i]);
        kfree(new_pts);
        kfree(new_pds);
 }
@@ -1142,29 +1137,20 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned 
long **new_pts,
  */
 static
 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
-                                        unsigned long ***new_pts,
+                                        unsigned long **new_pts,
                                         uint32_t pdpes)
 {
-       int i;
        unsigned long *pds;
-       unsigned long **pts;
+       unsigned long *pts;
 
-       pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_KERNEL);
+       pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), 
GFP_TEMPORARY);
        if (!pds)
                return -ENOMEM;
 
-       pts = kcalloc(pdpes, sizeof(unsigned long *), GFP_KERNEL);
-       if (!pts) {
-               kfree(pds);
-               return -ENOMEM;
-       }
-
-       for (i = 0; i < pdpes; i++) {
-               pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
-                                sizeof(unsigned long), GFP_KERNEL);
-               if (!pts[i])
-                       goto err_out;
-       }
+       pts = kcalloc(pdpes * BITS_TO_LONGS(I915_PDES),
+                       sizeof(unsigned long), GFP_TEMPORARY);
+       if (!pts)
+               goto err_out;
 
        *new_pds = pds;
        *new_pts = pts;
@@ -1172,7 +1158,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long 
**new_pds,
        return 0;
 
 err_out:
-       free_gen8_temp_bitmaps(pds, pts, pdpes);
+       free_gen8_temp_bitmaps(pds, pts);
        return -ENOMEM;
 }
 
@@ -1193,7 +1179,7 @@ static int gen8_alloc_va_range_3lvl(struct 
i915_address_space *vm,
 {
        struct i915_hw_ppgtt *ppgtt =
                container_of(vm, struct i915_hw_ppgtt, base);
-       unsigned long *new_page_dirs, **new_page_tables;
+       unsigned long *new_page_dirs, *new_page_tables;
        struct drm_device *dev = vm->dev;
        struct i915_page_directory *pd;
        const uint64_t orig_start = start;
@@ -1220,14 +1206,14 @@ static int gen8_alloc_va_range_3lvl(struct 
i915_address_space *vm,
        ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
                                                new_page_dirs);
        if (ret) {
-               free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
+               free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
                return ret;
        }
 
        /* For every page directory referenced, allocate page tables */
        gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
                ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
-                                               new_page_tables[pdpe]);
+                                               new_page_tables + pdpe * 
BITS_TO_LONGS(I915_PDES));
                if (ret)
                        goto err_out;
        }
@@ -1278,20 +1264,21 @@ static int gen8_alloc_va_range_3lvl(struct 
i915_address_space *vm,
                gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
        }
 
-       free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
+       free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
        mark_tlbs_dirty(ppgtt);
        return 0;
 
 err_out:
        while (pdpe--) {
-               for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES)
+               for_each_set_bit(temp, new_page_tables + pdpe *
+                               BITS_TO_LONGS(I915_PDES), I915_PDES)
                        free_pt(dev, 
pdp->page_directory[pdpe]->page_table[temp]);
        }
 
        for_each_set_bit(pdpe, new_page_dirs, pdpes)
                free_pd(dev, pdp->page_directory[pdpe]);
 
-       free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
+       free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
        mark_tlbs_dirty(ppgtt);
        return ret;
 }
-- 
2.4.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to