From: Dave Airlie <[email protected]> When NVK enabled large pages userspace tests were seeing fault reports at a valid address.
There was a case where an address moving from 64k page to 4k pages could expose a race between unmapping the 4k page, mapping the 64k page and unref the 4k pages. Unref 4k pages would cause the dual-page table handling to always set the LPTE entry to SPARSE or INVALID, but if we'd mapped a valid LPTE in the meantime, it would get trashed. Keep track of when a valid LPTE has been referenced, and don't reset in that case. This increase the tracking to 32-bit, because it turns out if unref can get delayed, you can get a lot of these outstanding and this can cause strange behaviours. Cc: [email protected] Link: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14610 Signed-off-by: Dave Airlie <[email protected]> -- v2: move to 32-bit from 8-bit tracker fix some more flag changes. v3: missed one BIG_PTE unset v4: start referencing counting LPTE --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 80 ++++++++++++------- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 14 +++- 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index f95c58b67633..c2dfaa4b89cf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -53,7 +53,7 @@ nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse, } } - if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL))) + if (!(pgt = kzalloc(sizeof(*pgt) + (sizeof(pgt->pte[0]) * lpte), GFP_KERNEL))) return NULL; pgt->page = page ? page->shift : 0; pgt->sparse = sparse; @@ -208,7 +208,7 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, */ for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { const u32 pten = min(sptn - spti, ptes); - pgt->pte[lpti] -= pten; + pgt->pte[lpti].spte_count -= pten; ptes -= pten; } @@ -218,9 +218,9 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { /* Skip over any LPTEs that still have valid SPTEs. */ - if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) { + if (pgt->pte[pteb].spte_count) { for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)) + if (!pgt->pte[ptei].spte_count) break; } continue; @@ -232,24 +232,27 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, * * Determine how many LPTEs need to transition state. */ - pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + pgt->pte[ptei].spte_valid = false; for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES) + if (pgt->pte[ptei].spte_count) break; - pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + pgt->pte[ptei].spte_valid = false; } - if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + if (pgt->pte[pteb].sparse) { TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes); pair->func->sparse(vmm, pgt->pt[0], pteb, ptes); - } else - if (pair->func->invalid) { - /* If the MMU supports it, restore the LPTE to the - * INVALID state to tell the MMU there is no point - * trying to fetch the corresponding SPTEs. - */ - TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes); - pair->func->invalid(vmm, pgt->pt[0], pteb, ptes); + } else if (!pgt->pte[pteb].lpte_valid) { + if (pair->func->invalid) { + /* If the MMU supports it, restore the LPTE to the + * INVALID state to tell the MMU there is no point + * trying to fetch the corresponding SPTEs. + */ + TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes); + pair->func->invalid(vmm, pgt->pt[0], pteb, ptes); + } + } else { + TRA(it, "LPTE %05x: V %d PTEs", pteb, ptes); } } } @@ -280,6 +283,13 @@ nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes) if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1])) nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes); + if (desc->type == LPT && (pgt->refs[0] || pgt->refs[1])) { + for (u32 lpti = ptei; ptes; lpti++) { + pgt->pte[lpti].lpte_count--; + ptes--; + } + } + /* PT no longer needed? Destroy it. */ if (!pgt->refs[type]) { it->lvl++; @@ -307,7 +317,7 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, */ for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { const u32 pten = min(sptn - spti, ptes); - pgt->pte[lpti] += pten; + pgt->pte[lpti].spte_count += pten; ptes -= pten; } @@ -317,9 +327,9 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { /* Skip over any LPTEs that already have valid SPTEs. */ - if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) { + if (pgt->pte[pteb].spte_valid) { for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID)) + if (!pgt->pte[ptei].spte_valid) break; } continue; @@ -331,14 +341,16 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, * * Determine how many LPTEs need to transition state. */ - pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + pgt->pte[ptei].spte_valid = true; + pgt->pte[ptei].lpte_valid = false; for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID) + if (pgt->pte[ptei].spte_valid) break; - pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + pgt->pte[ptei].spte_valid = true; + pgt->pte[ptei].lpte_valid = false; } - if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + if (pgt->pte[pteb].sparse) { const u32 spti = pteb * sptn; const u32 sptc = ptes * sptn; /* The entire LPTE is marked as sparse, we need @@ -374,6 +386,15 @@ nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes) if (desc->type == SPT) nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes); + if (desc->type == LPT) { + for (u32 lpti = ptei; ptes; lpti++) { + pgt->pte[lpti].spte_valid = false; + pgt->pte[lpti].lpte_valid = true; + pgt->pte[lpti].lpte_count++; + ptes--; + } + } + return true; } @@ -386,7 +407,8 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc, pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE; } else if (desc->type == LPT) { - memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes); + struct pt_tracker sparse = { .sparse = 1 }; + memset32((u32 *)&pgt->pte[ptei], *(u32 *)&sparse, ptes); } } @@ -398,7 +420,7 @@ nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 pte memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes); else if (it->desc->type == LPT) - memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes); + memset32((u32 *)&pt->pte[ptei], 0x00, ptes); return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes); } @@ -445,9 +467,9 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) * the SPTEs on some GPUs. */ for (ptei = pteb = 0; ptei < pten; pteb = ptei) { - bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + bool spte = !!pgt->pte[ptei].spte_count; for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) { - bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + bool next = !!pgt->pte[ptei].spte_count; if (spte != next) break; } @@ -457,11 +479,11 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) desc->func->sparse(vmm, pt, pteb, ptes); else desc->func->invalid(vmm, pt, pteb, ptes); - memset(&pgt->pte[pteb], 0x00, ptes); + memset32((u32 *)&pgt->pte[pteb], 0x00, ptes); } else { desc->func->unmap(vmm, pt, pteb, ptes); while (ptes--) - pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID; + pgt->pte[pteb++].spte_valid = true; } } } else { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index 4586a425dbe4..8c4531a70a3a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -4,6 +4,15 @@ #include <core/memory.h> enum nvkm_memory_target; +struct pt_tracker { + u32 sparse:1; + u32 spte_valid:1; + u32 lpte_valid:1; + u32 lpte_count:13; + u32 spte_count:16; +}; + + struct nvkm_vmm_pt { /* Some GPUs have a mapping level with a dual page tables to * support large and small pages in the same address-range. @@ -44,10 +53,7 @@ struct nvkm_vmm_pt { * * This information is used to manage LPTE state transitions. */ -#define NVKM_VMM_PTE_SPARSE 0x80 -#define NVKM_VMM_PTE_VALID 0x40 -#define NVKM_VMM_PTE_SPTES 0x3f - u8 pte[]; + struct pt_tracker pte[]; }; typedef void (*nvkm_vmm_pxe_func)(struct nvkm_vmm *, -- 2.52.0
