This patch uses IOMMU to aggregate (probably) discrete small pages as larger big page(s) and map it to GMMU.
Signed-off-by: Vince Hsu <[email protected]> --- drm/nouveau/nvkm/engine/device/gk104.c | 2 +- drm/nouveau/nvkm/subdev/mmu/Kbuild | 1 + drm/nouveau/nvkm/subdev/mmu/gk20a.c | 253 +++++++++++++++++++++++++++++++++ 3 files changed, 255 insertions(+), 1 deletion(-) create mode 100644 drm/nouveau/nvkm/subdev/mmu/gk20a.c diff --git a/drm/nouveau/nvkm/engine/device/gk104.c b/drm/nouveau/nvkm/engine/device/gk104.c index 6a9483f65d83..9ea48ba31c0d 100644 --- a/drm/nouveau/nvkm/engine/device/gk104.c +++ b/drm/nouveau/nvkm/engine/device/gk104.c @@ -172,7 +172,7 @@ gk104_identify(struct nvkm_device *device) device->oclass[NVDEV_SUBDEV_LTC ] = gk104_ltc_oclass; device->oclass[NVDEV_SUBDEV_IBUS ] = &gk20a_ibus_oclass; device->oclass[NVDEV_SUBDEV_INSTMEM] = gk20a_instmem_oclass; - device->oclass[NVDEV_SUBDEV_MMU ] = &gf100_mmu_oclass; + device->oclass[NVDEV_SUBDEV_MMU ] = &gk20a_mmu_oclass; device->oclass[NVDEV_SUBDEV_BAR ] = &gk20a_bar_oclass; device->oclass[NVDEV_ENGINE_DMAOBJ ] = gf110_dmaeng_oclass; device->oclass[NVDEV_ENGINE_FIFO ] = gk20a_fifo_oclass; diff --git a/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drm/nouveau/nvkm/subdev/mmu/Kbuild index 012c9db687b2..141302a8e933 100644 --- a/drm/nouveau/nvkm/subdev/mmu/Kbuild +++ b/drm/nouveau/nvkm/subdev/mmu/Kbuild @@ -4,3 +4,4 @@ nvkm-y += nvkm/subdev/mmu/nv41.o nvkm-y += nvkm/subdev/mmu/nv44.o nvkm-y += nvkm/subdev/mmu/nv50.o nvkm-y += nvkm/subdev/mmu/gf100.o +nvkm-y += nvkm/subdev/mmu/gk20a.o diff --git a/drm/nouveau/nvkm/subdev/mmu/gk20a.c b/drm/nouveau/nvkm/subdev/mmu/gk20a.c new file mode 100644 index 000000000000..b444b73e208d --- /dev/null +++ b/drm/nouveau/nvkm/subdev/mmu/gk20a.c @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <subdev/fb.h> +#include <subdev/ltc.h> +#include <subdev/mmu.h> + +#ifdef __KERNEL__ +#include <linux/iommu.h> +#include <nouveau_platform.h> +#endif + +#include "gf100.h" + +struct gk20a_mmu_priv { + struct nvkm_mmu base; +}; + +struct gk20a_mmu_iommu_mapping { + struct nvkm_mm_node *node; + u64 iova; +}; + +extern const u8 gf100_pte_storage_type_map[256]; + +static void +gk20a_vm_map(struct nvkm_vma *vma, struct nvkm_gpuobj *pgt, + struct nvkm_mem *mem, u32 pte, u64 list) +{ + u32 target = (vma->access & NV_MEM_ACCESS_NOSNOOP) ? 7 : 5; + u64 phys; + + pte <<= 3; + phys = gf100_vm_addr(vma, list, mem->memtype, target); + + if (mem->tag) { + struct nvkm_ltc *ltc = nvkm_ltc(vma->vm->mmu); + u32 tag = mem->tag->offset; + phys |= (u64)tag << (32 + 12); + ltc->tags_clear(ltc, tag, 1); + } + + nv_wo32(pgt, pte + 0, lower_32_bits(phys)); + nv_wo32(pgt, pte + 4, upper_32_bits(phys)); +} + +static void +gk20a_vm_map_iommu(struct nvkm_vma *vma, struct nvkm_gpuobj *pgt, + struct nvkm_mem *mem, u32 pte, dma_addr_t *list, + void **priv) +{ + struct nvkm_vm *vm = vma->vm; + struct nvkm_mmu *mmu = vm->mmu; + struct nvkm_mm_node *node; + struct nouveau_platform_device *plat; + struct gk20a_mmu_iommu_mapping *p; + int npages = 1 << (mmu->lpg_shift - mmu->spg_shift); + int i, ret; + u64 addr; + + plat = nv_device_to_platform(nv_device(&mmu->base)); + + *priv = kzalloc(sizeof(struct gk20a_mmu_iommu_mapping), GFP_KERNEL); + if (!*priv) + return; + + mutex_lock(&plat->gpu->iommu.mutex); + ret = nvkm_mm_head(plat->gpu->iommu.mm, + 0, + 1, + npages, + npages, + (1 << mmu->lpg_shift) >> 12, + &node); + mutex_unlock(&plat->gpu->iommu.mutex); + if (ret) + return; + + for (i = 0; i < npages; i++, list++) { + ret = iommu_map(plat->gpu->iommu.domain, + (node->offset + i) << PAGE_SHIFT, + *list, + PAGE_SIZE, + IOMMU_READ | IOMMU_WRITE); + + if (ret < 0) + return; + + nv_trace(mmu, "IOMMU: IOVA=0x%016llx-> IOMMU -> PA=%016llx\n", + (u64)(node->offset + i) << PAGE_SHIFT, (u64)(*list)); + } + + addr = (u64)node->offset << PAGE_SHIFT; + addr |= BIT_ULL(plat->gpu->iommu.phys_addr_bit); + + gk20a_vm_map(vma, pgt, mem, pte, addr); + + p = *priv; + p->node = node; + p->iova = node->offset << PAGE_SHIFT; +} + +static void +gk20a_vm_map_sg_iommu(struct nvkm_vma *vma, struct nvkm_gpuobj *pgt, + struct nvkm_mem *mem, u32 pte, struct sg_page_iter *iter, + void **priv) +{ + struct nvkm_vm *vm = vma->vm; + struct nvkm_mmu *mmu = vm->mmu; + struct nvkm_mm_node *node; + struct nouveau_platform_device *plat; + struct gk20a_mmu_iommu_mapping *p; + int npages = 1 << (mmu->lpg_shift - mmu->spg_shift); + int i, ret; + u64 addr; + + plat = nv_device_to_platform(nv_device(&mmu->base)); + + *priv = kzalloc(sizeof(struct gk20a_mmu_iommu_mapping), GFP_KERNEL); + if (!*priv) + return; + + mutex_lock(&plat->gpu->iommu.mutex); + ret = nvkm_mm_head(plat->gpu->iommu.mm, + 0, + 1, + npages, + npages, + (1 << mmu->lpg_shift) >> 12, + &node); + mutex_unlock(&plat->gpu->iommu.mutex); + if (ret) + return; + + for (i = 0; i < npages; i++) { + dma_addr_t phys = sg_page_iter_dma_address(iter); + + ret = iommu_map(plat->gpu->iommu.domain, + (node->offset + i) << PAGE_SHIFT, + phys, + PAGE_SIZE, + IOMMU_READ | IOMMU_WRITE); + + if (ret < 0) + return; + + nv_trace(mmu, "IOMMU: IOVA=0x%016llx-> IOMMU -> PA=%016llx\n", + (u64)(node->offset + i) << PAGE_SHIFT, (u64)phys); + + if ((i < npages - 1) && !__sg_page_iter_next(iter)) { + nv_error(mmu, "failed to iterate sg table\n"); + return; + } + } + + addr = (u64)node->offset << PAGE_SHIFT; + addr |= BIT_ULL(plat->gpu->iommu.phys_addr_bit); + + gk20a_vm_map(vma, pgt, mem, pte, addr); + + p = *priv; + p->node = node; + p->iova = node->offset << PAGE_SHIFT; +} + +static void +gk20a_vm_unmap_iommu(struct nvkm_vma *vma, void *priv) +{ + struct nvkm_vm *vm = vma->vm; + struct nvkm_mmu *mmu = vm->mmu; + struct nouveau_platform_device *plat; + struct gk20a_mmu_iommu_mapping *p = priv; + int ret; + + plat = nv_device_to_platform(nv_device(&mmu->base)); + + ret = iommu_unmap(plat->gpu->iommu.domain, p->iova, + 1 << mmu->lpg_shift); + WARN(ret < 0, "failed to unmap IOMMU address 0x%16llx, ret=%d\n", + p->iova, ret); + + mutex_lock(&plat->gpu->iommu.mutex); + nvkm_mm_free(plat->gpu->iommu.mm, &p->node); + mutex_unlock(&plat->gpu->iommu.mutex); + + kfree(priv); +} + +static int +gk20a_mmu_ctor(struct nvkm_object *parent, struct nvkm_object *engine, + struct nvkm_oclass *oclass, void *data, u32 size, + struct nvkm_object **pobject) +{ + struct gk20a_mmu_priv *priv; + struct nouveau_platform_device *plat; + int ret; + + ret = nvkm_mmu_create(parent, engine, oclass, "VM", "vm", &priv); + *pobject = nv_object(priv); + if (ret) + return ret; + + plat = nv_device_to_platform(nv_device(parent)); + if (plat->gpu->iommu.domain) + priv->base.iommu_capable = true; + + priv->base.limit = 1ULL << 40; + priv->base.dma_bits = 40; + priv->base.pgt_bits = 27 - 12; + priv->base.spg_shift = 12; + priv->base.lpg_shift = 17; + priv->base.create = gf100_vm_create; + priv->base.map_pgt = gf100_vm_map_pgt; + priv->base.map = gf100_vm_map; + priv->base.map_sg = gf100_vm_map_sg; + priv->base.map_iommu = gk20a_vm_map_iommu; + priv->base.unmap_iommu = gk20a_vm_unmap_iommu; + priv->base.map_sg_iommu = gk20a_vm_map_sg_iommu; + priv->base.unmap = gf100_vm_unmap; + priv->base.flush = gf100_vm_flush; + + return 0; +} + +struct nvkm_oclass +gk20a_mmu_oclass = { + .handle = NV_SUBDEV(MMU, 0xea), + .ofuncs = &(struct nvkm_ofuncs) { + .ctor = gk20a_mmu_ctor, + .dtor = _nvkm_mmu_dtor, + .init = _nvkm_mmu_init, + .fini = _nvkm_mmu_fini, + }, +}; -- 2.1.4 _______________________________________________ Nouveau mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/nouveau
