These page table helpers make the code much cleaner. There is some
room to use the arch/x86 header files. The reason I've opted not to is
in several cases, the definitions are dictated by the CONFIG_ options
which do not always indicate the restrictions in the GPU.

Signed-off-by: Ben Widawsky <[email protected]>
---

I have this patch queued up for the next round of /stuff/ I am working on. If
you want to pull it into this series, it's fine by me. As I deal with the code
more, it does become more obvious what looks good, and what does not.

---

 drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++++++++++++++++-----------
 1 file changed, 81 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index aa3ef7f..43d9129 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -30,8 +30,6 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
-#define GEN6_PPGTT_PD_ENTRIES 512
-#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
 typedef uint64_t gen8_gtt_pte_t;
 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 
@@ -51,6 +49,27 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define GEN6_PTE_ADDR_ENCODE(addr)     GEN6_GTT_ADDR_ENCODE(addr)
 #define HSW_PTE_ADDR_ENCODE(addr)      HSW_GTT_ADDR_ENCODE(addr)
 
+/* GEN6 PPGTT resembles a 2 level page table:
+ * 31:22 | 21:12 |  11:0
+ *  PDE  |  PTE  | offset
+ */
+#define GEN6_PDE_SHIFT                 22
+#define GEN6_PPGTT_PD_ENTRIES          512
+#define GEN6_PDE_MASK                  (GEN6_PPGTT_PD_ENTRIES-1)
+#define GEN6_PTE_SHIFT                 12
+#define GEN6_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
+#define GEN6_PTE_MASK                  (GEN6_PPGTT_PT_ENTRIES-1)
+
+static inline uint32_t gen6_pte_index(uint32_t address)
+{
+       return (address >> GEN6_PTE_SHIFT) & GEN6_PTE_MASK;
+}
+
+static inline uint32_t gen6_pde_index(uint32_t address)
+{
+       return (address >> GEN6_PDE_SHIFT) & GEN6_PDE_MASK;
+}
+
 /* Cacheability Control is a 4-bit value. The low three bits are stored in *
  * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
  */
@@ -63,6 +82,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define HSW_WT_ELLC_LLC_AGE0           HSW_CACHEABILITY_CONTROL(0x6)
 #define HSW_WT_ELLC_LLC_AGE3           HSW_CACHEABILITY_CONTROL(0x7)
 
+#define PPAT_UNCACHED_INDEX            (_PAGE_PWT | _PAGE_PCD)
+#define PPAT_CACHED_PDE_INDEX          0 /* WB LLC */
+#define PPAT_CACHED_INDEX              _PAGE_PAT /* WB LLCeLLC */
+#define PPAT_DISPLAY_ELLC_INDEX                _PAGE_PCD /* WT eLLC */
+
 #define GEN8_PTES_PER_PAGE             (PAGE_SIZE / sizeof(gen8_gtt_pte_t))
 #define GEN8_PDES_PER_PAGE             (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
 
@@ -71,6 +95,10 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
  * PDPE  |  PDE  |  PTE  | offset
  * The difference as compared to normal x86 3 level page table is the PDPEs are
  * programmed via register.
+ *
+ * The x86 pagetable code is flexible in its ability to handle varying page
+ * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and
+ * instead replicate the interesting functionality.
  */
 #define GEN8_PDPE_SHIFT                        30
 #define GEN8_PDPE_MASK                 0x3
@@ -79,10 +107,31 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define GEN8_PTE_SHIFT                 12
 #define GEN8_PTE_MASK                  0x1ff
 
-#define PPAT_UNCACHED_INDEX            (_PAGE_PWT | _PAGE_PCD)
-#define PPAT_CACHED_PDE_INDEX          0 /* WB LLC */
-#define PPAT_CACHED_INDEX              _PAGE_PAT /* WB LLCeLLC */
-#define PPAT_DISPLAY_ELLC_INDEX                _PAGE_PCD /* WT eLLC */
+static inline uint32_t gen8_pte_index(uint64_t address)
+{
+       return (address >> GEN8_PTE_SHIFT) & GEN8_PTE_MASK;
+}
+
+static inline uint32_t gen8_pde_index(uint64_t address)
+{
+       return (address >> GEN8_PDE_SHIFT) & GEN8_PDE_MASK;
+}
+
+static inline uint32_t gen8_pdpe_index(uint64_t address)
+{
+       return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
+}
+
+static inline uint32_t gen8_pml4e_index(uint64_t address)
+{
+       BUG();
+}
+
+/* Useful for 3 level page table functions */
+static inline uint32_t gen8_pde_count(uint64_t start, uint64_t length)
+{
+       return ((length - start) / PAGE_SIZE) << GEN8_PDE_SHIFT;
+}
 
 static void ppgtt_bind_vma(struct i915_vma *vma,
                           enum i915_cache_level cache_level,
@@ -274,9 +323,9 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
        struct i915_hw_ppgtt *ppgtt =
                container_of(vm, struct i915_hw_ppgtt, base);
        gen8_gtt_pte_t *pt_vaddr, scratch_pte;
-       unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-       unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-       unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+       unsigned pdpe = gen8_pdpe_index(start);
+       unsigned pde = gen8_pde_index(start);
+       unsigned pte = gen8_pte_index(start);
        unsigned num_entries = (length - start) >> PAGE_SHIFT;
        unsigned last_pte, i;
 
@@ -315,9 +364,9 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
        struct i915_hw_ppgtt *ppgtt =
                container_of(vm, struct i915_hw_ppgtt, base);
        gen8_gtt_pte_t *pt_vaddr;
-       unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-       unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-       unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+       unsigned pdpe = gen8_pdpe_index(start);
+       unsigned pde = gen8_pde_index(start);
+       unsigned pte = gen8_pte_index(start);
        struct sg_page_iter sg_iter;
 
        pt_vaddr = NULL;
@@ -661,9 +710,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, 
struct seq_file *m)
                seq_printf(m, "\tPDE: %x\n", pd_entry);
 
                pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
-               for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
+               for (pte = 0; pte < GEN6_PPGTT_PT_ENTRIES; pte+=4) {
                        unsigned long va =
-                               (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
+                               (pde * PAGE_SIZE * GEN6_PPGTT_PT_ENTRIES) +
                                (pte * PAGE_SIZE);
                        int i;
                        bool found = false;
@@ -938,29 +987,28 @@ static void gen6_ppgtt_clear_range(struct 
i915_address_space *vm,
        struct i915_hw_ppgtt *ppgtt =
                container_of(vm, struct i915_hw_ppgtt, base);
        gen6_gtt_pte_t *pt_vaddr, scratch_pte;
-       unsigned first_entry = start >> PAGE_SHIFT;
+       unsigned pde = gen6_pde_index(start);
        unsigned num_entries = (length - start) >> PAGE_SHIFT;
-       unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
-       unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+       unsigned pte = gen6_pte_index(start);
        unsigned last_pte, i;
 
        scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
 
        while (num_entries) {
-               last_pte = first_pte + num_entries;
-               if (last_pte > I915_PPGTT_PT_ENTRIES)
-                       last_pte = I915_PPGTT_PT_ENTRIES;
+               last_pte = pte + num_entries;
+               if (last_pte > GEN6_PPGTT_PT_ENTRIES)
+                       last_pte = GEN6_PPGTT_PT_ENTRIES;
 
-               pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
+               pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
 
-               for (i = first_pte; i < last_pte; i++)
+               for (i = pte; i < last_pte; i++)
                        pt_vaddr[i] = scratch_pte;
 
                kunmap_atomic(pt_vaddr);
 
-               num_entries -= last_pte - first_pte;
-               first_pte = 0;
-               act_pt++;
+               num_entries -= last_pte - pte;
+               pte = 0;
+               pde++;
        }
 }
 
@@ -972,24 +1020,23 @@ static void gen6_ppgtt_insert_entries(struct 
i915_address_space *vm,
        struct i915_hw_ppgtt *ppgtt =
                container_of(vm, struct i915_hw_ppgtt, base);
        gen6_gtt_pte_t *pt_vaddr;
-       unsigned first_entry = start >> PAGE_SHIFT;
-       unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
-       unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+       unsigned pde = gen6_pde_index(start);
+       unsigned pte = gen6_pte_index(start);
        struct sg_page_iter sg_iter;
 
        pt_vaddr = NULL;
        for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
                if (pt_vaddr == NULL)
-                       pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
+                       pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
 
-               pt_vaddr[act_pte] =
+               pt_vaddr[pte] =
                        vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
                                       cache_level, true);
-               if (++act_pte == I915_PPGTT_PT_ENTRIES) {
+               if (++pte == GEN6_PPGTT_PT_ENTRIES) {
                        kunmap_atomic(pt_vaddr);
                        pt_vaddr = NULL;
-                       act_pt++;
-                       act_pte = 0;
+                       pde++;
+                       pte = 0;
                }
        }
        if (pt_vaddr)
@@ -1173,7 +1220,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
        ppgtt->base.cleanup = gen6_ppgtt_cleanup;
        ppgtt->base.scratch = dev_priv->gtt.base.scratch;
        ppgtt->base.start = 0;
-       ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * 
PAGE_SIZE;
+       ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * GEN6_PPGTT_PT_ENTRIES * 
PAGE_SIZE;
        ppgtt->debug_dump = gen6_dump_ppgtt;
 
        ppgtt->pd_offset =
-- 
1.9.0

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to