Since Linux may not merge adjacent pages into a single scatter entry through calls to dma_map_sg(), we check the special case of hugetlb pages which are likely to be mapped to coniguous dma addresses and if they are, take advantage of this. This will result in a significantly lower number of MTT segments used for registering hugetlb memory regions.
Signed-off-by: Eli Cohen <[email protected]> --- In this version I also took care of the case where the kernel is compiled without hugetlb support. drivers/infiniband/hw/mlx4/mr.c | 86 ++++++++++++++++++++++++++++++++++----- 1 files changed, 75 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 8e4d26d..4c7a5bf 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -119,6 +119,68 @@ out: return err; } +static int handle_hugetlb_user_mr(struct ib_pd *pd, struct mlx4_ib_mr *mr, + u64 virt_addr, int access_flags) +{ +#ifdef CONFIG_HUGETLB_PAGE + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_chunk *chunk; + unsigned dsize; + dma_addr_t daddr; + unsigned uninitialized_var(cur_size); + dma_addr_t uninitialized_var(cur_addr); + int restart; + int n; + struct ib_umem *umem = mr->umem; + u64 *arr; + int err = 0; + int i; + int j = 0; + + n = PAGE_ALIGN(umem->length + umem->offset) >> HPAGE_SHIFT; + arr = kmalloc(n * sizeof *arr, GFP_KERNEL); + if (!arr) + return -ENOMEM; + + restart = 1; + list_for_each_entry(chunk, &umem->chunk_list, list) + for (i = 0; i < chunk->nmap; ++i) { + daddr = sg_dma_address(&chunk->page_list[i]); + dsize = sg_dma_len(&chunk->page_list[i]); + if (restart) { + cur_addr = daddr; + cur_size = dsize; + restart = 0; + } else if (cur_addr + cur_size != daddr) { + err = -EINVAL; + goto out; + } else + cur_size += dsize; + + if (cur_size > HPAGE_SIZE) { + err = -EINVAL; + goto out; + } else if (cur_size == HPAGE_SIZE) { + restart = 1; + arr[j++] = cur_addr; + } + } + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, umem->length, + convert_access(access_flags), n, HPAGE_SHIFT, &mr->mmr); + if (err) + goto out; + + err = mlx4_write_mtt(dev->dev, &mr->mmr.mtt, 0, n, arr); + +out: + kfree(arr); + return err; +#else + return -ENOSYS; +#endif +} + struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -140,17 +202,19 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_free; } - n = ib_umem_page_count(mr->umem); - shift = ilog2(mr->umem->page_size); - - err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, - convert_access(access_flags), n, shift, &mr->mmr); - if (err) - goto err_umem; - - err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); - if (err) - goto err_mr; + if (!mr->umem->hugetlb || handle_hugetlb_user_mr(pd, mr, virt_addr, access_flags)) { + n = ib_umem_page_count(mr->umem); + shift = ilog2(mr->umem->page_size); + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, + convert_access(access_flags), n, shift, &mr->mmr); + if (err) + goto err_umem; + + err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); + if (err) + goto err_mr; + } err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) -- 1.6.0.5 _______________________________________________ general mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
