Quoting Matthew Auld (2017-07-25 20:21:23)
> Support inserting 1G gtt pages into the 48b PPGTT.
>
> v2: sanity check sg->length against page_size
>
> Signed-off-by: Matthew Auld <[email protected]>
> Cc: Joonas Lahtinen <[email protected]>
> Cc: Chris Wilson <[email protected]>
> ---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 73
> +++++++++++++++++++++++++++++++++++--
> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +
> 2 files changed, 71 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 385cd85f47bb..acd0c0d1ba8d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -945,6 +945,66 @@ static void gen8_ppgtt_insert_3lvl(struct
> i915_address_space *vm,
> cache_level);
> }
>
> +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
> + struct i915_page_directory_pointer
> **pdps,
> + struct sgt_dma *iter,
> + enum i915_cache_level cache_level)
> +{
> + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
> + u64 start = vma->node.start;
> +
> + do {
> + struct gen8_insert_pte idx = gen8_insert_pte(start);
> + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
> + struct i915_page_directory *pd =
> pdp->page_directory[idx.pdpe];
> + struct i915_page_table *pt = pd->page_table[idx.pde];
> + dma_addr_t rem = iter->max - iter->dma;
> + unsigned int page_size;
> + gen8_pte_t encode = pte_encode;
> + gen8_pte_t *vaddr;
> + u16 index, max;
> +
> + if (unlikely(vma->page_sizes.sg & I915_GTT_PAGE_SIZE_1G) &&
> + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_1G) &&
> + rem >= I915_GTT_PAGE_SIZE_1G && !(idx.pte | idx.pde)) {
> + vaddr = kmap_atomic_px(pdp);
> + index = idx.pdpe;
> + max = GEN8_PML4ES_PER_PML4;
> + page_size = I915_GTT_PAGE_SIZE_1G;
> + encode |= GEN8_PDPE_PS_1G;
> + } else {
> + vaddr = kmap_atomic_px(pt);
> + index = idx.pte;
> + max = GEN8_PTES;
> + page_size = I915_GTT_PAGE_SIZE;
> + }
> +
> + do {
> + GEM_BUG_ON(iter->sg->length < page_size);
> + vaddr[index++] = encode | iter->dma;
> +
> + start += page_size;
> + iter->dma += page_size;
> + if (iter->dma >= iter->max) {
> + iter->sg = __sg_next(iter->sg);
> + if (!iter->sg)
> + break;
> +
> + iter->dma = sg_dma_address(iter->sg);
> + iter->max = iter->dma + iter->sg->length;
> +
> + if (unlikely(!IS_ALIGNED(iter->dma,
> page_size)))
> + break;
> + }
> + rem = iter->max - iter->dma;
> +
> + } while (rem >= page_size && index < max);
> +
> + kunmap_atomic(vaddr);
> +
> + } while (iter->sg);
> +}
Staring at the final result, I think most importantly we need to break up
index/max/page, encode, vaddr with whitespace:
static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
struct i915_page_directory_pointer
**pdps,
struct sgt_dma *iter,
enum i915_cache_level cache_level)
{
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
u64 start = vma->node.start;
do {
struct gen8_insert_pte idx = gen8_insert_pte(start);
struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
struct i915_page_table *pt = pd->page_table[idx.pde];
dma_addr_t rem = iter->max - iter->dma;
unsigned int page_size;
bool maybe_64K = false;
gen8_pte_t encode = pte_encode;
gen8_pte_t *vaddr;
u16 index, max;
if (!(idx.pte | idx.pde) &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_1G) &&
rem >= I915_GTT_PAGE_SIZE_1G) {
index = idx.pdpe;
max = GEN8_PML4ES_PER_PML4;
page_size = I915_GTT_PAGE_SIZE_1G;
encode |= GEN8_PDPE_PS_1G;
vaddr = kmap_atomic_px(pdp);
} else if (!idx.pte &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
rem >= I915_GTT_PAGE_SIZE_2M) {
index = idx.pde;
max = I915_PDES;
page_size = I915_GTT_PAGE_SIZE_2M;
encode |= GEN8_PDE_PS_2M;
vaddr = kmap_atomic_px(pd);
} else {
index = idx.pte;
max = GEN8_PTES;
page_size = I915_GTT_PAGE_SIZE;
if (!index &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
(rem >= (max - index) * 4<<10 ||
IS_ALIGNED(rem, 64 << 10)))
maybe_64K = true;
vaddr = kmap_atomic_px(pt);
}
do {
GEM_BUG_ON(iter->sg->length < page_size);
vaddr[index++] = encode | iter->dma;
start += page_size;
iter->dma += page_size;
rem -= page_size;
if (iter->dma >= iter->max) {
iter->sg = __sg_next(iter->sg);
if (!iter->sg)
break;
iter->dma = sg_dma_address(iter->sg);
iter->max = iter->dma + iter->sg->length;
rem = iter->max - iter->dma;
if (maybe_64K && index < max &&
!(IS_ALIGNED(iter->dma,
I915_GTT_PAGE_SIZE_64K) &&
(rem >= (max - index) * 4<<10 ||
IS_ALIGNED(rem, 64 << 10))))
maybe_64K = false;
if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
break;
}
} while (rem >= page_size && index < max);
kunmap_atomic(vaddr);
/* Is it safe to mark the 2M block as 64K? -- Either we have
* filled whole page-table with 64K entries, or filled part of
* it and have reached the end of the sg table and we have
* enough padding.
*
* XXX We need 64k scratch to allow index < max
*/
if (maybe_64K && index == max) {
vaddr = kmap_atomic_px(pd);
vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
kunmap_atomic(vaddr);
page_size = I915_GTT_PAGE_SIZE_64K;
}
vma->page_sizes.gtt |= page_size;
} while (iter->sg);
}
Please excuse the raw numbers, I was just staring at the maybe_64k problem
trying to find a neater/consistent way of expressing it.
-Chris
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx