Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)
On 2020/10/30 下午3:45, Si-Wei Liu wrote: Pinned pages are not properly accounted particularly when mapping error occurs on IOTLB update. Clean up dangling pinned pages for the error path. The memory usage for bookkeeping pinned pages is reverted to what it was before: only one single free page is needed. This helps reduce the host memory demand for VM with a large amount of memory, or in the situation where host is running short of free memory. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Si-Wei Liu --- drivers/vhost/vdpa.c | 64 +--- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b6d9016..8da8558 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, if (r) vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); + else + atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm); return r; } @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; unsigned long npages, cur_base, map_pfn, last_pfn = 0; - unsigned long locked, lock_limit, pinned, i; + unsigned long lock_limit, sz2pin, nchunks, i; u64 iova = msg->iova; + long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; + /* Limit the use of memory for bookkeeping */ page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) return -ENOMEM; @@ -607,52 +611,64 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, gup_flags |= FOLL_WRITE; npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; - if (!npages) - return -EINVAL; + if (!npages) { + ret = -EINVAL; + goto free; + } mmap_read_lock(dev->mm); - locked = atomic64_add_return(npages, >mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit) { + if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) { ret = -ENOMEM; - goto out; + goto unlock; } cur_base = msg->uaddr & PAGE_MASK; iova &= PAGE_MASK; + nchunks = 0; while (npages) { - pinned = min_t(unsigned long, npages, list_size); - ret = pin_user_pages(cur_base, pinned, -gup_flags, page_list, NULL); - if (ret != pinned) + sz2pin = min_t(unsigned long, npages, list_size); + pinned = pin_user_pages(cur_base, sz2pin, + gup_flags, page_list, NULL); + if (sz2pin != pinned) { + if (pinned < 0) { + ret = pinned; + } else { + unpin_user_pages(page_list, pinned); + ret = -ENOMEM; + } goto out; + } + nchunks++; if (!last_pfn) map_pfn = page_to_pfn(page_list[0]); - for (i = 0; i < ret; i++) { + for (i = 0; i < pinned; i++) { unsigned long this_pfn = page_to_pfn(page_list[i]); u64 csize; if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; - if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) + ret = vhost_vdpa_map(v, iova, csize, +map_pfn << PAGE_SHIFT, +msg->perm); + if (ret) goto out; + map_pfn = this_pfn; iova += csize; + nchunks = 0; } last_pfn = this_pfn; } - cur_base += ret << PAGE_SHIFT; - npages -= ret; + cur_base += pinned << PAGE_SHIFT; + npages -= pinned; } /* Pin the rest chunk */ @@ -660,10 +676,22 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, map_pfn << PAGE_SHIFT,
Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)
On 11/3/2020 5:58 PM, Jason Wang wrote: On 2020/11/4 上午9:08, si-wei liu wrote: On 11/3/2020 5:06 PM, si-wei liu wrote: On 11/3/2020 5:00 AM, Jason Wang wrote: On 2020/10/30 下午3:45, Si-Wei Liu wrote: Pinned pages are not properly accounted particularly when mapping error occurs on IOTLB update. Clean up dangling pinned pages for the error path. The memory usage for bookkeeping pinned pages is reverted to what it was before: only one single free page is needed. This helps reduce the host memory demand for VM with a large amount of memory, or in the situation where host is running short of free memory. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Si-Wei Liu --- drivers/vhost/vdpa.c | 64 +--- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b6d9016..8da8558 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, if (r) vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); +else +atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm); return r; } @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; unsigned long npages, cur_base, map_pfn, last_pfn = 0; -unsigned long locked, lock_limit, pinned, i; +unsigned long lock_limit, sz2pin, nchunks, i; u64 iova = msg->iova; +long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; +/* Limit the use of memory for bookkeeping */ page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) return -ENOMEM; @@ -607,52 +611,64 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, gup_flags |= FOLL_WRITE; npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; -if (!npages) -return -EINVAL; +if (!npages) { +ret = -EINVAL; +goto free; +} mmap_read_lock(dev->mm); -locked = atomic64_add_return(npages, >mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - -if (locked > lock_limit) { +if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) { ret = -ENOMEM; -goto out; +goto unlock; } cur_base = msg->uaddr & PAGE_MASK; iova &= PAGE_MASK; +nchunks = 0; while (npages) { -pinned = min_t(unsigned long, npages, list_size); -ret = pin_user_pages(cur_base, pinned, - gup_flags, page_list, NULL); -if (ret != pinned) +sz2pin = min_t(unsigned long, npages, list_size); +pinned = pin_user_pages(cur_base, sz2pin, +gup_flags, page_list, NULL); +if (sz2pin != pinned) { +if (pinned < 0) { +ret = pinned; +} else { +unpin_user_pages(page_list, pinned); +ret = -ENOMEM; +} goto out; +} +nchunks++; if (!last_pfn) map_pfn = page_to_pfn(page_list[0]); -for (i = 0; i < ret; i++) { +for (i = 0; i < pinned; i++) { unsigned long this_pfn = page_to_pfn(page_list[i]); u64 csize; if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; -if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) +ret = vhost_vdpa_map(v, iova, csize, + map_pfn << PAGE_SHIFT, + msg->perm); +if (ret) goto out; + map_pfn = this_pfn; iova += csize; +nchunks = 0; } last_pfn = this_pfn; } -cur_base += ret << PAGE_SHIFT; -npages -= ret; +cur_base += pinned << PAGE_SHIFT; +npages -= pinned; } /* Pin the rest chunk */ @@ -660,10 +676,22 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, map_pfn << PAGE_SHIFT, msg->perm); out: if (ret) { +if (nchunks && last_pfn) { +unsigned long pfn; + +/* + * Unpin the outstanding pages which are unmapped. + * Mapped pages are accounted in vdpa_map(), thus + * will be handled by vdpa_unmap(). + */ +for (pfn = map_pfn; pfn <= last_pfn; pfn++) +
Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)
On 2020/11/4 上午9:08, si-wei liu wrote: On 11/3/2020 5:06 PM, si-wei liu wrote: On 11/3/2020 5:00 AM, Jason Wang wrote: On 2020/10/30 下午3:45, Si-Wei Liu wrote: Pinned pages are not properly accounted particularly when mapping error occurs on IOTLB update. Clean up dangling pinned pages for the error path. The memory usage for bookkeeping pinned pages is reverted to what it was before: only one single free page is needed. This helps reduce the host memory demand for VM with a large amount of memory, or in the situation where host is running short of free memory. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Si-Wei Liu --- drivers/vhost/vdpa.c | 64 +--- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b6d9016..8da8558 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, if (r) vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); + else + atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm); return r; } @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; unsigned long npages, cur_base, map_pfn, last_pfn = 0; - unsigned long locked, lock_limit, pinned, i; + unsigned long lock_limit, sz2pin, nchunks, i; u64 iova = msg->iova; + long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; + /* Limit the use of memory for bookkeeping */ page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) return -ENOMEM; @@ -607,52 +611,64 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, gup_flags |= FOLL_WRITE; npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; - if (!npages) - return -EINVAL; + if (!npages) { + ret = -EINVAL; + goto free; + } mmap_read_lock(dev->mm); - locked = atomic64_add_return(npages, >mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit) { + if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) { ret = -ENOMEM; - goto out; + goto unlock; } cur_base = msg->uaddr & PAGE_MASK; iova &= PAGE_MASK; + nchunks = 0; while (npages) { - pinned = min_t(unsigned long, npages, list_size); - ret = pin_user_pages(cur_base, pinned, - gup_flags, page_list, NULL); - if (ret != pinned) + sz2pin = min_t(unsigned long, npages, list_size); + pinned = pin_user_pages(cur_base, sz2pin, + gup_flags, page_list, NULL); + if (sz2pin != pinned) { + if (pinned < 0) { + ret = pinned; + } else { + unpin_user_pages(page_list, pinned); + ret = -ENOMEM; + } goto out; + } + nchunks++; if (!last_pfn) map_pfn = page_to_pfn(page_list[0]); - for (i = 0; i < ret; i++) { + for (i = 0; i < pinned; i++) { unsigned long this_pfn = page_to_pfn(page_list[i]); u64 csize; if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; - if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) + ret = vhost_vdpa_map(v, iova, csize, + map_pfn << PAGE_SHIFT, + msg->perm); + if (ret) goto out; + map_pfn = this_pfn; iova += csize; + nchunks = 0; } last_pfn = this_pfn; } - cur_base += ret << PAGE_SHIFT; - npages -= ret; + cur_base += pinned << PAGE_SHIFT; + npages -= pinned; } /* Pin the rest chunk */ @@ -660,10 +676,22 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, map_pfn << PAGE_SHIFT, msg->perm); out: if (ret) { + if (nchunks && last_pfn) { + unsigned long pfn; + + /* + * Unpin the outstanding pages which are unmapped. + * Mapped pages are accounted in vdpa_map(), thus + * will be handled by vdpa_unmap(). + */ + for (pfn = map_pfn; pfn <= last_pfn; pfn++) + unpin_user_page(pfn_to_page(pfn)); + }
Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)
On 11/3/2020 5:06 PM, si-wei liu wrote: On 11/3/2020 5:00 AM, Jason Wang wrote: On 2020/10/30 下午3:45, Si-Wei Liu wrote: Pinned pages are not properly accounted particularly when mapping error occurs on IOTLB update. Clean up dangling pinned pages for the error path. The memory usage for bookkeeping pinned pages is reverted to what it was before: only one single free page is needed. This helps reduce the host memory demand for VM with a large amount of memory, or in the situation where host is running short of free memory. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Si-Wei Liu --- drivers/vhost/vdpa.c | 64 +--- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b6d9016..8da8558 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, if (r) vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); +else +atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm); return r; } @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; unsigned long npages, cur_base, map_pfn, last_pfn = 0; -unsigned long locked, lock_limit, pinned, i; +unsigned long lock_limit, sz2pin, nchunks, i; u64 iova = msg->iova; +long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; +/* Limit the use of memory for bookkeeping */ page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) return -ENOMEM; @@ -607,52 +611,64 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, gup_flags |= FOLL_WRITE; npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; -if (!npages) -return -EINVAL; +if (!npages) { +ret = -EINVAL; +goto free; +} mmap_read_lock(dev->mm); -locked = atomic64_add_return(npages, >mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - -if (locked > lock_limit) { +if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) { ret = -ENOMEM; -goto out; +goto unlock; } cur_base = msg->uaddr & PAGE_MASK; iova &= PAGE_MASK; +nchunks = 0; while (npages) { -pinned = min_t(unsigned long, npages, list_size); -ret = pin_user_pages(cur_base, pinned, - gup_flags, page_list, NULL); -if (ret != pinned) +sz2pin = min_t(unsigned long, npages, list_size); +pinned = pin_user_pages(cur_base, sz2pin, +gup_flags, page_list, NULL); +if (sz2pin != pinned) { +if (pinned < 0) { +ret = pinned; +} else { +unpin_user_pages(page_list, pinned); +ret = -ENOMEM; +} goto out; +} +nchunks++; if (!last_pfn) map_pfn = page_to_pfn(page_list[0]); -for (i = 0; i < ret; i++) { +for (i = 0; i < pinned; i++) { unsigned long this_pfn = page_to_pfn(page_list[i]); u64 csize; if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; -if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) +ret = vhost_vdpa_map(v, iova, csize, + map_pfn << PAGE_SHIFT, + msg->perm); +if (ret) goto out; + map_pfn = this_pfn; iova += csize; +nchunks = 0; } last_pfn = this_pfn; } -cur_base += ret << PAGE_SHIFT; -npages -= ret; +cur_base += pinned << PAGE_SHIFT; +npages -= pinned; } /* Pin the rest chunk */ @@ -660,10 +676,22 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, map_pfn << PAGE_SHIFT, msg->perm); out: if (ret) { +if (nchunks && last_pfn) { +unsigned long pfn; + +/* + * Unpin the outstanding pages which are unmapped. + * Mapped pages are accounted in vdpa_map(), thus + * will be handled by vdpa_unmap(). + */ +for (pfn = map_pfn; pfn <= last_pfn; pfn++) +unpin_user_page(pfn_to_page(pfn)); +} vhost_vdpa_unmap(v, msg->iova, msg->size); I want
Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)
On 11/3/2020 5:00 AM, Jason Wang wrote: On 2020/10/30 下午3:45, Si-Wei Liu wrote: Pinned pages are not properly accounted particularly when mapping error occurs on IOTLB update. Clean up dangling pinned pages for the error path. The memory usage for bookkeeping pinned pages is reverted to what it was before: only one single free page is needed. This helps reduce the host memory demand for VM with a large amount of memory, or in the situation where host is running short of free memory. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Si-Wei Liu --- drivers/vhost/vdpa.c | 64 +--- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b6d9016..8da8558 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, if (r) vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); +else +atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm); return r; } @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; unsigned long npages, cur_base, map_pfn, last_pfn = 0; -unsigned long locked, lock_limit, pinned, i; +unsigned long lock_limit, sz2pin, nchunks, i; u64 iova = msg->iova; +long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; +/* Limit the use of memory for bookkeeping */ page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) return -ENOMEM; @@ -607,52 +611,64 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, gup_flags |= FOLL_WRITE; npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; -if (!npages) -return -EINVAL; +if (!npages) { +ret = -EINVAL; +goto free; +} mmap_read_lock(dev->mm); -locked = atomic64_add_return(npages, >mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - -if (locked > lock_limit) { +if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) { ret = -ENOMEM; -goto out; +goto unlock; } cur_base = msg->uaddr & PAGE_MASK; iova &= PAGE_MASK; +nchunks = 0; while (npages) { -pinned = min_t(unsigned long, npages, list_size); -ret = pin_user_pages(cur_base, pinned, - gup_flags, page_list, NULL); -if (ret != pinned) +sz2pin = min_t(unsigned long, npages, list_size); +pinned = pin_user_pages(cur_base, sz2pin, +gup_flags, page_list, NULL); +if (sz2pin != pinned) { +if (pinned < 0) { +ret = pinned; +} else { +unpin_user_pages(page_list, pinned); +ret = -ENOMEM; +} goto out; +} +nchunks++; if (!last_pfn) map_pfn = page_to_pfn(page_list[0]); -for (i = 0; i < ret; i++) { +for (i = 0; i < pinned; i++) { unsigned long this_pfn = page_to_pfn(page_list[i]); u64 csize; if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; -if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) +ret = vhost_vdpa_map(v, iova, csize, + map_pfn << PAGE_SHIFT, + msg->perm); +if (ret) goto out; + map_pfn = this_pfn; iova += csize; +nchunks = 0; } last_pfn = this_pfn; } -cur_base += ret << PAGE_SHIFT; -npages -= ret; +cur_base += pinned << PAGE_SHIFT; +npages -= pinned; } /* Pin the rest chunk */ @@ -660,10 +676,22 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, map_pfn << PAGE_SHIFT, msg->perm); out: if (ret) { +if (nchunks && last_pfn) { +unsigned long pfn; + +/* + * Unpin the outstanding pages which are unmapped. + * Mapped pages are accounted in vdpa_map(), thus + * will be handled by vdpa_unmap(). + */ +for (pfn = map_pfn; pfn <= last_pfn; pfn++) +unpin_user_page(pfn_to_page(pfn)); +} vhost_vdpa_unmap(v, msg->iova, msg->size); I want to know what's wrong with current code. We
Re: [PATCH] vhost/vsock: add IOTLB API support
On Tue, Nov 03, 2020 at 05:04:23PM +0800, Jason Wang wrote: > > On 2020/11/3 上午1:11, Stefano Garzarella wrote: > > On Fri, Oct 30, 2020 at 07:44:43PM +0800, Jason Wang wrote: > > > > > > On 2020/10/30 下午6:54, Stefano Garzarella wrote: > > > > On Fri, Oct 30, 2020 at 06:02:18PM +0800, Jason Wang wrote: > > > > > > > > > > On 2020/10/30 上午1:43, Stefano Garzarella wrote: > > > > > > This patch enables the IOTLB API support for vhost-vsock devices, > > > > > > allowing the userspace to emulate an IOMMU for the guest. > > > > > > > > > > > > These changes were made following vhost-net, in details this patch: > > > > > > - exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb > > > > > > device if the feature is acked > > > > > > - implements VHOST_GET_BACKEND_FEATURES and > > > > > > VHOST_SET_BACKEND_FEATURES ioctls > > > > > > - calls vq_meta_prefetch() before vq processing to prefetch vq > > > > > > metadata address in IOTLB > > > > > > - provides .read_iter, .write_iter, and .poll callbacks for the > > > > > > chardev; they are used by the userspace to exchange IOTLB messages > > > > > > > > > > > > This patch was tested with QEMU and a patch applied [1] to fix a > > > > > > simple issue: > > > > > > $ qemu -M q35,accel=kvm,kernel-irqchip=split \ > > > > > > -drive file=fedora.qcow2,format=qcow2,if=virtio \ > > > > > > -device intel-iommu,intremap=on \ > > > > > > -device vhost-vsock-pci,guest-cid=3,iommu_platform=on > > > > > > > > > > > > > > > Patch looks good, but a question: > > > > > > > > > > It looks to me you don't enable ATS which means vhost won't > > > > > get any invalidation request or did I miss anything? > > > > > > > > > > > > > You're right, I didn't see invalidation requests, only miss and > > > > updates. > > > > Now I have tried to enable 'ats' and 'device-iotlb' but I still > > > > don't see any invalidation. > > > > > > > > How can I test it? (Sorry but I don't have much experience yet > > > > with vIOMMU) > > > > > > > > > I guess it's because the batched unmap. Maybe you can try to use > > > "intel_iommu=strict" in guest kernel command line to see if it > > > works. > > > > > > Btw, make sure the qemu contains the patch [1]. Otherwise ATS won't > > > be enabled for recent Linux Kernel in the guest. > > > > The problem was my kernel, it was built with a tiny configuration. > > Using fedora stock kernel I can see the 'invalidate' requests, but I > > also had the following issues. > > > > Do they make you ring any bells? > > > > $ ./qemu -m 4G -smp 4 -M q35,accel=kvm,kernel-irqchip=split \ > > -drive file=fedora.qcow2,format=qcow2,if=virtio \ > > -device intel-iommu,intremap=on,device-iotlb=on \ > > -device vhost-vsock-pci,guest-cid=6,iommu_platform=on,ats=on,id=v1 > > > > qemu-system-x86_64: vtd_iova_to_slpte: detected IOVA overflow > > (iova=0x1d4030c0) > > > It's a hint that IOVA exceeds the AW. It might be worth to check whether the > missed IOVA reported from IOTLB is legal. Yeah. By default the QEMU vIOMMU should only support 39bits width for guest iova address space. To extend it, we can use: -device intel-iommu,aw-bits=48 So we'll enable 4-level iommu pgtable. Here the iova is obvious longer than this, so it'll be interesting to know why that iova is allocated in the guest driver since the driver should know somehow that this iova is beyond what's supported (guest iommu driver should be able to probe viommu capability on this width information too). -- Peter Xu ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [patch V3 22/37] highmem: High implementation details and document API
On Tue, Nov 03 2020 at 09:48, Linus Torvalds wrote: > I have no complaints about the patch, but it strikes me that if people > want to actually have much better debug coverage, this is where it > should be (I like the "every other address" thing too, don't get me > wrong). > > In particular, instead of these PageHighMem(page) tests, I think > something like this would be better: > >#ifdef CONFIG_DEBUG_HIGHMEM > #define page_use_kmap(page) ((page),1) >#else > #define page_use_kmap(page) PageHighMem(page) >#endif > > adn then replace those "if (!PageHighMem(page))" tests with "if > (!page_use_kmap())" instead. > > IOW, in debug mode, it would _always_ remap the page, whether it's > highmem or not. That would really stress the highmem code and find any > fragilities. Yes, that makes a lot of sense. We just have to avoid that for the architectures with aliasing issues. > Anyway, this is all sepatrate from the series, which still looks fine > to me. Just a reaction to seeing the patch, and Thomas' earlier > mention that the highmem debugging doesn't actually do much. Right, forcing it for both kmap and kmap_local is straight forward. I'll cook a patch on top for that. Thanks, tglx ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [patch V3 22/37] highmem: High implementation details and document API
On Tue, Nov 3, 2020 at 2:33 AM Thomas Gleixner wrote: > > +static inline void *kmap(struct page *page) > +{ > + void *addr; > + > + might_sleep(); > + if (!PageHighMem(page)) > + addr = page_address(page); > + else > + addr = kmap_high(page); > + kmap_flush_tlb((unsigned long)addr); > + return addr; > +} > + > +static inline void kunmap(struct page *page) > +{ > + might_sleep(); > + if (!PageHighMem(page)) > + return; > + kunmap_high(page); > +} I have no complaints about the patch, but it strikes me that if people want to actually have much better debug coverage, this is where it should be (I like the "every other address" thing too, don't get me wrong). In particular, instead of these PageHighMem(page) tests, I think something like this would be better: #ifdef CONFIG_DEBUG_HIGHMEM #define page_use_kmap(page) ((page),1) #else #define page_use_kmap(page) PageHighMem(page) #endif adn then replace those "if (!PageHighMem(page))" tests with "if (!page_use_kmap())" instead. IOW, in debug mode, it would _always_ remap the page, whether it's highmem or not. That would really stress the highmem code and find any fragilities. No? Anyway, this is all sepatrate from the series, which still looks fine to me. Just a reaction to seeing the patch, and Thomas' earlier mention that the highmem debugging doesn't actually do much. Linus ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH mlx5-next v1 06/11] vdpa/mlx5: Connect mlx5_vdpa to auxiliary bus
On Sun, Nov 01, 2020 at 10:15:37PM +0200, Leon Romanovsky wrote: > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c > b/drivers/vdpa/mlx5/net/mlx5_vnet.c > index 6c218b47b9f1..5316e51e72d4 100644 > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c > @@ -1,18 +1,27 @@ > // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB > /* Copyright (c) 2020 Mellanox Technologies Ltd. */ > > +#include > #include > +#include > +#include > #include > #include > +#include > +#include > #include > #include > +#include > #include > #include > -#include > #include > -#include "mlx5_vnet.h" > #include "mlx5_vdpa.h" > > +MODULE_AUTHOR("Eli Cohen "); > +MODULE_DESCRIPTION("Mellanox VDPA driver"); > +MODULE_LICENSE("Dual BSD/GPL"); > + > +#define to_mlx5_vdpa_ndev(__mvdev) container_of(__mvdev, struct > mlx5_vdpa_net, mvdev) > #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev) > > #define VALID_FEATURES_MASK > \ > @@ -159,6 +168,11 @@ static bool mlx5_vdpa_debug; > mlx5_vdpa_info(mvdev, "%s\n", #_status); >\ > } while (0) > > +static inline u32 mlx5_vdpa_max_qps(int max_vqs) > +{ > + return max_vqs / 2; > +} > + > static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) > { > if (status & ~VALID_STATUS_MASK) > @@ -1928,8 +1942,11 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev) > } > } > > -void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) > +static int mlx5v_probe(struct auxiliary_device *adev, > +const struct auxiliary_device_id *id) > { > + struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); > + struct mlx5_core_dev *mdev = madev->mdev; > struct virtio_net_config *config; > struct mlx5_vdpa_dev *mvdev; > struct mlx5_vdpa_net *ndev; > @@ -1943,7 +1960,7 @@ void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) > ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, > mdev->device, _vdpa_ops, >2 * mlx5_vdpa_max_qps(max_vqs)); > if (IS_ERR(ndev)) > - return ndev; > + return PTR_ERR(ndev); > > ndev->mvdev.max_vqs = max_vqs; > mvdev = >mvdev; > @@ -1972,7 +1989,8 @@ void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) > if (err) > goto err_reg; > > - return ndev; > + dev_set_drvdata(>dev, ndev); > + return 0; > > err_reg: > free_resources(ndev); > @@ -1981,10 +1999,29 @@ void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) > err_mtu: > mutex_destroy(>reslock); > put_device(>vdev.dev); > - return ERR_PTR(err); > + return err; > } > > -void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev) > +static int mlx5v_remove(struct auxiliary_device *adev) > { > + struct mlx5_vdpa_dev *mvdev = dev_get_drvdata(>dev); > + > vdpa_unregister_device(>vdev); > + return 0; > } > + > +static const struct auxiliary_device_id mlx5v_id_table[] = { > + { .name = MLX5_ADEV_NAME ".vnet", }, > + {}, > +}; > + > +MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table); > + > +static struct auxiliary_driver mlx5v_driver = { > + .name = "vnet", > + .probe = mlx5v_probe, > + .remove = mlx5v_remove, > + .id_table = mlx5v_id_table, > +}; It is hard to see from the diff, but when this patch is applied the vdpa module looks like I imagined things would look with the auxiliary bus. It is very similar in structure to a PCI driver with the probe() function cleanly registering with its subsystem. This is what I'd like to see from the new Intel RDMA driver. Greg, I think this patch is the best clean usage example. I've looked over this series and it has the right idea and parts. There is definitely more that can be done to improve mlx5 in this area, but this series is well scoped and cleans a good part of it. Jason ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V4 24/37] sched: highmem: Store local kmaps in task struct
Instead of storing the map per CPU provide and use per task storage. That prepares for local kmaps which are preemptible. The context switch code is preparatory and not yet in use because kmap_atomic() runs with preemption disabled. Will be made usable in the next step. The context switch logic is safe even when an interrupt happens after clearing or before restoring the kmaps. The kmap index in task struct is not modified so any nesting kmap in an interrupt will use unused indices and on return the counter is the same as before. Also add an assert into the return to user space code. Going back to user space with an active kmap local is a nono. Signed-off-by: Thomas Gleixner --- V4: Use the version which actually compiles and works V3: Handle the debug case correctly --- include/linux/highmem-internal.h | 10 +++ include/linux/sched.h|9 +++ kernel/entry/common.c|2 kernel/fork.c|1 kernel/sched/core.c | 18 +++ mm/highmem.c | 99 +++ 6 files changed, 129 insertions(+), 10 deletions(-) --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -9,6 +9,16 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); void *__kmap_local_page_prot(struct page *page, pgprot_t prot); void kunmap_local_indexed(void *vaddr); +void kmap_local_fork(struct task_struct *tsk); +void __kmap_local_sched_out(void); +void __kmap_local_sched_in(void); +static inline void kmap_assert_nomap(void) +{ + DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx); +} +#else +static inline void kmap_local_fork(struct task_struct *tsk) { } +static inline void kmap_assert_nomap(void) { } #endif #ifdef CONFIG_HIGHMEM --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,6 +34,7 @@ #include #include #include +#include /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -629,6 +630,13 @@ struct wake_q_node { struct wake_q_node *next; }; +struct kmap_ctrl { +#ifdef CONFIG_KMAP_LOCAL + int idx; + pte_t pteval[KM_MAX_IDX]; +#endif +}; + struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -1294,6 +1302,7 @@ struct task_struct { unsigned intsequential_io; unsigned intsequential_io_avg; #endif + struct kmap_ctrlkmap_ctrl; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -194,6 +195,7 @@ static void exit_to_user_mode_prepare(st /* Ensure that the address limit is intact and no locks are held */ addr_limit_user_check(); + kmap_assert_nomap(); lockdep_assert_irqs_disabled(); lockdep_sys_exit(); } --- a/kernel/fork.c +++ b/kernel/fork.c @@ -930,6 +930,7 @@ static struct task_struct *dup_task_stru account_kernel_stack(tsk, 1); kcov_task_init(tsk); + kmap_local_fork(tsk); #ifdef CONFIG_FAULT_INJECTION tsk->fail_nth = 0; --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4053,6 +4053,22 @@ static inline void finish_lock_switch(st # define finish_arch_post_lock_switch()do { } while (0) #endif +static inline void kmap_local_sched_out(void) +{ +#ifdef CONFIG_KMAP_LOCAL + if (unlikely(current->kmap_ctrl.idx)) + __kmap_local_sched_out(); +#endif +} + +static inline void kmap_local_sched_in(void) +{ +#ifdef CONFIG_KMAP_LOCAL + if (unlikely(current->kmap_ctrl.idx)) + __kmap_local_sched_in(); +#endif +} + /** * prepare_task_switch - prepare to switch tasks * @rq: the runqueue preparing to switch @@ -4075,6 +4091,7 @@ prepare_task_switch(struct rq *rq, struc perf_event_task_sched_out(prev, next); rseq_preempt(prev); fire_sched_out_preempt_notifiers(prev, next); + kmap_local_sched_out(); prepare_task(next); prepare_arch_switch(next); } @@ -4141,6 +4158,7 @@ static struct rq *finish_task_switch(str finish_lock_switch(rq); finish_arch_post_lock_switch(); kcov_finish_switch(current); + kmap_local_sched_in(); fire_sched_in_preempt_notifiers(current); /* --- a/mm/highmem.c +++ b/mm/highmem.c @@ -365,8 +365,6 @@ EXPORT_SYMBOL(kunmap_high); #include -static DEFINE_PER_CPU(int, __kmap_local_idx); - /* * With DEBUG_HIGHMEM the stack depth is doubled and every second * slot is unused which acts as a guard page @@ -379,23 +377,21 @@ static DEFINE_PER_CPU(int, __kmap_local_ static inline int kmap_local_idx_push(void) { - int idx = __this_cpu_add_return(__kmap_local_idx, KM_INCR) - 1; - WARN_ON_ONCE(in_irq() && !irqs_disabled()); -
Re: [patch V3 24/37] sched: highmem: Store local kmaps in task struct
On Tue, Nov 03 2020 at 10:27, Thomas Gleixner wrote: > +struct kmap_ctrl { > +#ifdef CONFIG_KMAP_LOCAL > + int idx; > + pte_t pteval[KM_TYPE_NR]; I'm a moron. Fixed it on the test machine ... ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)
On 2020/10/30 下午3:45, Si-Wei Liu wrote: Pinned pages are not properly accounted particularly when mapping error occurs on IOTLB update. Clean up dangling pinned pages for the error path. The memory usage for bookkeeping pinned pages is reverted to what it was before: only one single free page is needed. This helps reduce the host memory demand for VM with a large amount of memory, or in the situation where host is running short of free memory. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Si-Wei Liu --- drivers/vhost/vdpa.c | 64 +--- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b6d9016..8da8558 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, if (r) vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); + else + atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm); return r; } @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; unsigned long npages, cur_base, map_pfn, last_pfn = 0; - unsigned long locked, lock_limit, pinned, i; + unsigned long lock_limit, sz2pin, nchunks, i; u64 iova = msg->iova; + long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; + /* Limit the use of memory for bookkeeping */ page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) return -ENOMEM; @@ -607,52 +611,64 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, gup_flags |= FOLL_WRITE; npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; - if (!npages) - return -EINVAL; + if (!npages) { + ret = -EINVAL; + goto free; + } mmap_read_lock(dev->mm); - locked = atomic64_add_return(npages, >mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit) { + if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) { ret = -ENOMEM; - goto out; + goto unlock; } cur_base = msg->uaddr & PAGE_MASK; iova &= PAGE_MASK; + nchunks = 0; while (npages) { - pinned = min_t(unsigned long, npages, list_size); - ret = pin_user_pages(cur_base, pinned, -gup_flags, page_list, NULL); - if (ret != pinned) + sz2pin = min_t(unsigned long, npages, list_size); + pinned = pin_user_pages(cur_base, sz2pin, + gup_flags, page_list, NULL); + if (sz2pin != pinned) { + if (pinned < 0) { + ret = pinned; + } else { + unpin_user_pages(page_list, pinned); + ret = -ENOMEM; + } goto out; + } + nchunks++; if (!last_pfn) map_pfn = page_to_pfn(page_list[0]); - for (i = 0; i < ret; i++) { + for (i = 0; i < pinned; i++) { unsigned long this_pfn = page_to_pfn(page_list[i]); u64 csize; if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; - if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) + ret = vhost_vdpa_map(v, iova, csize, +map_pfn << PAGE_SHIFT, +msg->perm); + if (ret) goto out; + map_pfn = this_pfn; iova += csize; + nchunks = 0; } last_pfn = this_pfn; } - cur_base += ret << PAGE_SHIFT; - npages -= ret; + cur_base += pinned << PAGE_SHIFT; + npages -= pinned; } /* Pin the rest chunk */ @@ -660,10 +676,22 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, map_pfn << PAGE_SHIFT,
Re: [patch V3 03/37] fs: Remove asm/kmap_types.h includes
On Tue, Nov 03, 2020 at 10:27:15AM +0100, Thomas Gleixner wrote: > Historical leftovers from the time where kmap() had fixed slots. > > Signed-off-by: Thomas Gleixner > Cc: Alexander Viro > Cc: Benjamin LaHaise > Cc: linux-fsde...@vger.kernel.org > Cc: linux-...@kvack.org > Cc: Chris Mason > Cc: Josef Bacik > Cc: David Sterba Acked-by: David Sterba For the btrfs bits > fs/btrfs/ctree.h |1 - > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -17,7 +17,6 @@ > #include > #include > #include > -#include > #include > #include > #include ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2 0/8] slab: provide and use krealloc_array()
On Tue, Nov 3, 2020 at 12:13 PM Bartosz Golaszewski wrote: > On Tue, Nov 3, 2020 at 5:14 AM Joe Perches wrote: > > On Mon, 2020-11-02 at 16:20 +0100, Bartosz Golaszewski wrote: > > > From: Bartosz Golaszewski > Yeah so I had this concern for devm_krealloc() and even sent a patch > that extended it to honor __GFP_ZERO before I noticed that regular > krealloc() silently ignores __GFP_ZERO. I'm not sure if this is on > purpose. Maybe we should either make krealloc() honor __GFP_ZERO or > explicitly state in its documentation that it ignores it? And my voice here is to ignore for the same reasons: respect realloc(3) and making common sense with the idea of REallocating (capital letters on purpose). -- With Best Regards, Andy Shevchenko ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 27/37] x86/crashdump/32: Simplify copy_oldmem_page()
Replace kmap_atomic_pfn() with kmap_local_pfn() which is preemptible and can take page faults. Remove the indirection of the dump page and the related cruft which is not longer required. Signed-off-by: Thomas Gleixner --- V3: New patch --- arch/x86/kernel/crash_dump_32.c | 48 1 file changed, 10 insertions(+), 38 deletions(-) --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -13,8 +13,6 @@ #include -static void *kdump_buf_page; - static inline bool is_crashed_pfn_valid(unsigned long pfn) { #ifndef CONFIG_X86_PAE @@ -41,15 +39,11 @@ static inline bool is_crashed_pfn_valid( * @userbuf: if set, @buf is in user address space, use copy_to_user(), * otherwise @buf is in kernel address space, use memcpy(). * - * Copy a page from "oldmem". For this page, there is no pte mapped - * in the current kernel. We stitch up a pte, similar to kmap_atomic. - * - * Calling copy_to_user() in atomic context is not desirable. Hence first - * copying the data to a pre-allocated kernel page and then copying to user - * space in non-atomic context. + * Copy a page from "oldmem". For this page, there might be no pte mapped + * in the current kernel. */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, +unsigned long offset, int userbuf) { void *vaddr; @@ -59,38 +53,16 @@ ssize_t copy_oldmem_page(unsigned long p if (!is_crashed_pfn_valid(pfn)) return -EFAULT; - vaddr = kmap_atomic_pfn(pfn); + vaddr = kmap_local_pfn(pfn); if (!userbuf) { - memcpy(buf, (vaddr + offset), csize); - kunmap_atomic(vaddr); + memcpy(buf, vaddr + offset, csize); } else { - if (!kdump_buf_page) { - printk(KERN_WARNING "Kdump: Kdump buffer page not" - " allocated\n"); - kunmap_atomic(vaddr); - return -EFAULT; - } - copy_page(kdump_buf_page, vaddr); - kunmap_atomic(vaddr); - if (copy_to_user(buf, (kdump_buf_page + offset), csize)) - return -EFAULT; + if (copy_to_user(buf, vaddr + offset, csize)) + csize = -EFAULT; } - return csize; -} + kunmap_local(vaddr); -static int __init kdump_buf_page_init(void) -{ - int ret = 0; - - kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!kdump_buf_page) { - printk(KERN_WARNING "Kdump: Failed to allocate kdump buffer" -" page\n"); - ret = -ENOMEM; - } - - return ret; + return csize; } -arch_initcall(kdump_buf_page_init); ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 19/37] mm/highmem: Remove the old kmap_atomic cruft
All users gone. Signed-off-by: Thomas Gleixner --- include/linux/highmem.h | 63 +++- mm/highmem.c|7 - 2 files changed, 5 insertions(+), 65 deletions(-) --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -86,31 +86,16 @@ static inline void kunmap(struct page *p * be used in IRQ contexts, so in some (very limited) cases we need * it. */ - -#ifndef CONFIG_KMAP_LOCAL -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot); -void kunmap_atomic_high(void *kvaddr); - static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) { preempt_disable(); pagefault_disable(); - if (!PageHighMem(page)) - return page_address(page); - return kmap_atomic_high_prot(page, prot); -} - -static inline void __kunmap_atomic(void *vaddr) -{ - kunmap_atomic_high(vaddr); + return __kmap_local_page_prot(page, prot); } -#else /* !CONFIG_KMAP_LOCAL */ -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +static inline void *kmap_atomic(struct page *page) { - preempt_disable(); - pagefault_disable(); - return __kmap_local_page_prot(page, prot); + return kmap_atomic_prot(page, kmap_prot); } static inline void *kmap_atomic_pfn(unsigned long pfn) @@ -125,13 +110,6 @@ static inline void __kunmap_atomic(void kunmap_local_indexed(addr); } -#endif /* CONFIG_KMAP_LOCAL */ - -static inline void *kmap_atomic(struct page *page) -{ - return kmap_atomic_prot(page, kmap_prot); -} - /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); extern atomic_long_t _totalhigh_pages; @@ -212,41 +190,8 @@ static inline void __kunmap_atomic(void #define kmap_flush_unused()do {} while(0) -#endif /* CONFIG_HIGHMEM */ - -#if !defined(CONFIG_KMAP_LOCAL) -#if defined(CONFIG_HIGHMEM) - -DECLARE_PER_CPU(int, __kmap_atomic_idx); - -static inline int kmap_atomic_idx_push(void) -{ - int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; - -#ifdef CONFIG_DEBUG_HIGHMEM - WARN_ON_ONCE(in_irq() && !irqs_disabled()); - BUG_ON(idx >= KM_TYPE_NR); -#endif - return idx; -} - -static inline int kmap_atomic_idx(void) -{ - return __this_cpu_read(__kmap_atomic_idx) - 1; -} -static inline void kmap_atomic_idx_pop(void) -{ -#ifdef CONFIG_DEBUG_HIGHMEM - int idx = __this_cpu_dec_return(__kmap_atomic_idx); - - BUG_ON(idx < 0); -#else - __this_cpu_dec(__kmap_atomic_idx); -#endif -} -#endif -#endif +#endif /* CONFIG_HIGHMEM */ /* * Prevent people trying to call kunmap_atomic() as if it were kunmap() --- a/mm/highmem.c +++ b/mm/highmem.c @@ -31,12 +31,6 @@ #include #include -#ifndef CONFIG_KMAP_LOCAL -#ifdef CONFIG_HIGHMEM -DEFINE_PER_CPU(int, __kmap_atomic_idx); -#endif -#endif - /* * Virtual_count is not a pure "count". * 0 means that it is not mapped, and has not been mapped @@ -410,6 +404,7 @@ static inline void kmap_local_idx_pop(vo #ifndef arch_kmap_local_post_map # define arch_kmap_local_post_map(vaddr, pteval) do { } while (0) #endif + #ifndef arch_kmap_local_pre_unmap # define arch_kmap_local_pre_unmap(vaddr) do { } while (0) #endif ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 37/37] io-mapping: Remove io_mapping_map_atomic_wc()
No more users. Get rid of it and remove the traces in documentation. Signed-off-by: Thomas Gleixner --- V3: New patch --- Documentation/driver-api/io-mapping.rst | 22 +--- include/linux/io-mapping.h | 42 +--- 2 files changed, 9 insertions(+), 55 deletions(-) --- a/Documentation/driver-api/io-mapping.rst +++ b/Documentation/driver-api/io-mapping.rst @@ -21,19 +21,15 @@ mappable, while 'size' indicates how lar enable. Both are in bytes. This _wc variant provides a mapping which may only be used with -io_mapping_map_atomic_wc(), io_mapping_map_local_wc() or -io_mapping_map_wc(). +io_mapping_map_local_wc() or io_mapping_map_wc(). With this mapping object, individual pages can be mapped either temporarily or long term, depending on the requirements. Of course, temporary maps are -more efficient. They come in two flavours:: +more efficient. void *io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset) - void *io_mapping_map_atomic_wc(struct io_mapping *mapping, - unsigned long offset) - 'offset' is the offset within the defined mapping region. Accessing addresses beyond the region specified in the creation function yields undefined results. Using an offset which is not page aligned yields an @@ -50,9 +46,6 @@ io_mapping_map_local_wc() has a side eff migration to make the mapping code work. No caller can rely on this side effect. -io_mapping_map_atomic_wc() has the side effect of disabling preemption and -pagefaults. Don't use in new code. Use io_mapping_map_local_wc() instead. - Nested mappings need to be undone in reverse order because the mapping code uses a stack for keeping track of them:: @@ -65,11 +58,10 @@ Nested mappings need to be undone in rev The mappings are released with:: void io_mapping_unmap_local(void *vaddr) - void io_mapping_unmap_atomic(void *vaddr) -'vaddr' must be the value returned by the last io_mapping_map_local_wc() or -io_mapping_map_atomic_wc() call. This unmaps the specified mapping and -undoes the side effects of the mapping functions. +'vaddr' must be the value returned by the last io_mapping_map_local_wc() +call. This unmaps the specified mapping and undoes eventual side effects of +the mapping function. If you need to sleep while holding a mapping, you can use the regular variant, although this may be significantly slower:: @@ -77,8 +69,8 @@ If you need to sleep while holding a map void *io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) -This works like io_mapping_map_atomic/local_wc() except it has no side -effects and the pointer is globaly visible. +This works like io_mapping_map_local_wc() except it has no side effects and +the pointer is globaly visible. The mappings are released with:: --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -60,28 +60,7 @@ io_mapping_fini(struct io_mapping *mappi iomap_free(mapping->base, mapping->size); } -/* Atomic map/unmap */ -static inline void __iomem * -io_mapping_map_atomic_wc(struct io_mapping *mapping, -unsigned long offset) -{ - resource_size_t phys_addr; - - BUG_ON(offset >= mapping->size); - phys_addr = mapping->base + offset; - preempt_disable(); - pagefault_disable(); - return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); -} - -static inline void -io_mapping_unmap_atomic(void __iomem *vaddr) -{ - kunmap_local_indexed((void __force *)vaddr); - pagefault_enable(); - preempt_enable(); -} - +/* Temporary mappings which are only valid in the current context */ static inline void __iomem * io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset) { @@ -163,24 +142,7 @@ io_mapping_unmap(void __iomem *vaddr) { } -/* Atomic map/unmap */ -static inline void __iomem * -io_mapping_map_atomic_wc(struct io_mapping *mapping, -unsigned long offset) -{ - preempt_disable(); - pagefault_disable(); - return io_mapping_map_wc(mapping, offset, PAGE_SIZE); -} - -static inline void -io_mapping_unmap_atomic(void __iomem *vaddr) -{ - io_mapping_unmap(vaddr); - pagefault_enable(); - preempt_enable(); -} - +/* Temporary mappings which are only valid in the current context */ static inline void __iomem * io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset) { ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 20/37] io-mapping: Cleanup atomic iomap
Switch the atomic iomap implementation over to kmap_local and stick the preempt/pagefault mechanics into the generic code similar to the kmap_atomic variants. Rename the x86 map function in preparation for a non-atomic variant. Signed-off-by: Thomas Gleixner --- V2: New patch to make review easier --- arch/x86/include/asm/iomap.h |9 + arch/x86/mm/iomap_32.c |6 ++ include/linux/io-mapping.h |8 ++-- 3 files changed, 9 insertions(+), 14 deletions(-) --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -13,14 +13,7 @@ #include #include -void __iomem *iomap_atomic_pfn_prot(unsigned long pfn, pgprot_t prot); - -static inline void iounmap_atomic(void __iomem *vaddr) -{ - kunmap_local_indexed((void __force *)vaddr); - pagefault_enable(); - preempt_enable(); -} +void __iomem *__iomap_local_pfn_prot(unsigned long pfn, pgprot_t prot); int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -44,7 +44,7 @@ void iomap_free(resource_size_t base, un } EXPORT_SYMBOL_GPL(iomap_free); -void __iomem *iomap_atomic_pfn_prot(unsigned long pfn, pgprot_t prot) +void __iomem *__iomap_local_pfn_prot(unsigned long pfn, pgprot_t prot) { /* * For non-PAT systems, translate non-WB request to UC- just in @@ -60,8 +60,6 @@ void __iomem *iomap_atomic_pfn_prot(unsi /* Filter out unsupported __PAGE_KERNEL* bits: */ pgprot_val(prot) &= __default_kernel_pte_mask; - preempt_disable(); - pagefault_disable(); return (void __force __iomem *)__kmap_local_pfn_prot(pfn, prot); } -EXPORT_SYMBOL_GPL(iomap_atomic_pfn_prot); +EXPORT_SYMBOL_GPL(__iomap_local_pfn_prot); --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -69,13 +69,17 @@ io_mapping_map_atomic_wc(struct io_mappi BUG_ON(offset >= mapping->size); phys_addr = mapping->base + offset; - return iomap_atomic_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); + preempt_disable(); + pagefault_disable(); + return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); } static inline void io_mapping_unmap_atomic(void __iomem *vaddr) { - iounmap_atomic(vaddr); + kunmap_local_indexed((void __force *)vaddr); + pagefault_enable(); + preempt_enable(); } static inline void __iomem * ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 31/37] drm/ttm: Replace kmap_atomic() usage
There is no reason to disable pagefaults and preemption as a side effect of kmap_atomic_prot(). Use kmap_local_page_prot() instead and document the reasoning for the mapping usage with the given pgprot. Remove the NULL pointer check for the map. These functions return a valid address for valid pages and the return was bogus anyway as it would have left preemption and pagefaults disabled. Signed-off-by: Thomas Gleixner Cc: Christian Koenig Cc: Huang Rui Cc: David Airlie Cc: Daniel Vetter Cc: dri-de...@lists.freedesktop.org --- V3: New patch --- drivers/gpu/drm/ttm/ttm_bo_util.c | 20 1 file changed, 12 insertions(+), 8 deletions(-) --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -181,13 +181,15 @@ static int ttm_copy_io_ttm_page(struct t return -ENOMEM; src = (void *)((unsigned long)src + (page << PAGE_SHIFT)); - dst = kmap_atomic_prot(d, prot); - if (!dst) - return -ENOMEM; + /* +* Ensure that a highmem page is mapped with the correct +* pgprot. For non highmem the mapping is already there. +*/ + dst = kmap_local_page_prot(d, prot); memcpy_fromio(dst, src, PAGE_SIZE); - kunmap_atomic(dst); + kunmap_local(dst); return 0; } @@ -203,13 +205,15 @@ static int ttm_copy_ttm_io_page(struct t return -ENOMEM; dst = (void *)((unsigned long)dst + (page << PAGE_SHIFT)); - src = kmap_atomic_prot(s, prot); - if (!src) - return -ENOMEM; + /* +* Ensure that a highmem page is mapped with the correct +* pgprot. For non highmem the mapping is already there. +*/ + src = kmap_local_page_prot(s, prot); memcpy_toio(dst, src, PAGE_SIZE); - kunmap_atomic(src); + kunmap_local(src); return 0; } ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 09/37] arc/mm/highmem: Use generic kmap atomic implementation
Adopt the map ordering to match the other architectures and the generic code. Also make the maximum entries limited and not dependend on the number of CPUs. With the original implementation did the following calculation: nr_slots = mapsize >> PAGE_SHIFT; The results in either 512 or 1024 total slots depending on configuration. The total slots have to be divided by the number of CPUs to get the number of slots per CPU (former KM_TYPE_NR). ARC supports up to 4k CPUs, so this just falls apart in random ways depending on the number of CPUs and the actual kmap (atomic) nesting. The comment in highmem.c: * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE * slots across NR_CPUS would be more than sufficient (generic code defines * KM_TYPE_NR as 20). is just wrong. KM_TYPE_NR (now KM_MAX_IDX) is the number of slots per CPU because kmap_local/atomic() needs to support nested mappings (thread, softirq, interrupt). While KM_MAX_IDX might be overestimated, the above reasoning is just wrong and clearly the highmem code was never tested with any system with more than a few CPUs. Use the default number of slots and fail the build when it does not fit. Randomly failing at runtime is not a really good option. Signed-off-by: Thomas Gleixner Cc: Vineet Gupta Cc: linux-snps-...@lists.infradead.org --- V3: Make it actually more correct. --- arch/arc/Kconfig |1 arch/arc/include/asm/highmem.h| 26 ++ arch/arc/include/asm/kmap_types.h | 14 - arch/arc/mm/highmem.c | 54 +++--- 4 files changed, 26 insertions(+), 69 deletions(-) --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -507,6 +507,7 @@ config LINUX_RAM_BASE config HIGHMEM bool "High Memory Support" select ARCH_DISCONTIGMEM_ENABLE + select KMAP_LOCAL help With ARC 2G:2G address split, only upper 2G is directly addressable by kernel. Enable this to potentially allow access to rest of 2G and PAE --- a/arch/arc/include/asm/highmem.h +++ b/arch/arc/include/asm/highmem.h @@ -9,17 +9,29 @@ #ifdef CONFIG_HIGHMEM #include -#include +#include + +#define FIXMAP_SIZEPGDIR_SIZE +#define PKMAP_SIZE PGDIR_SIZE /* start after vmalloc area */ #define FIXMAP_BASE(PAGE_OFFSET - FIXMAP_SIZE - PKMAP_SIZE) -#define FIXMAP_SIZEPGDIR_SIZE /* only 1 PGD worth */ -#define KM_TYPE_NR ((FIXMAP_SIZE >> PAGE_SHIFT)/NR_CPUS) -#define FIXMAP_ADDR(nr)(FIXMAP_BASE + ((nr) << PAGE_SHIFT)) + +#define FIX_KMAP_SLOTS (KM_MAX_IDX * NR_CPUS) +#define FIX_KMAP_BEGIN (0UL) +#define FIX_KMAP_END ((FIX_KMAP_BEGIN + FIX_KMAP_SLOTS) - 1) + +#define FIXADDR_TOP(FIXMAP_BASE + (FIX_KMAP_END << PAGE_SHIFT)) + +/* + * This should be converted to the asm-generic version, but of course this + * is needlessly different from all other architectures. Sigh - tglx + */ +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) +#define __virt_to_fix(x) (((FIXADDR_TOP - ((x) & PAGE_MASK))) >> PAGE_SHIFT) /* start after fixmap area */ #define PKMAP_BASE (FIXMAP_BASE + FIXMAP_SIZE) -#define PKMAP_SIZE PGDIR_SIZE #define LAST_PKMAP (PKMAP_SIZE >> PAGE_SHIFT) #define LAST_PKMAP_MASK(LAST_PKMAP - 1) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) @@ -29,11 +41,13 @@ extern void kmap_init(void); +#define arch_kmap_local_post_unmap(vaddr) \ + local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE) + static inline void flush_cache_kmaps(void) { flush_cache_all(); } - #endif #endif --- a/arch/arc/include/asm/kmap_types.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) - */ - -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -/* - * We primarily need to define KM_TYPE_NR here but that in turn - * is a function of PGDIR_SIZE etc. - * To avoid circular deps issue, put everything in asm/highmem.h - */ -#endif --- a/arch/arc/mm/highmem.c +++ b/arch/arc/mm/highmem.c @@ -36,9 +36,8 @@ * This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means * 2M of kvaddr space for typical config (8K page and 11:8:13 traversal split) * - * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE - * slots across NR_CPUS would be more than sufficient (generic code defines - * KM_TYPE_NR as 20). + * - The fixed KMAP slots for kmap_local/atomic() require KM_MAX_IDX slots per + * CPU. So the number of CPUs sharing a single PTE page is limited. * * - pkmap being preemptible, in theory could do with more than 256 concurrent * mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse @@ -47,48 +46,6 @@ */ extern pte_t *
[patch V3 13/37] mips/mm/highmem: Switch to generic kmap atomic
No reason having the same code in every architecture Signed-off-by: Thomas Gleixner Cc: Thomas Bogendoerfer Cc: linux-m...@vger.kernel.org --- V3: Remove the kmap types cruft --- arch/mips/Kconfig |1 arch/mips/include/asm/fixmap.h |4 - arch/mips/include/asm/highmem.h|6 +- arch/mips/include/asm/kmap_types.h | 13 -- arch/mips/mm/highmem.c | 77 - arch/mips/mm/init.c|4 - 6 files changed, 6 insertions(+), 99 deletions(-) --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2719,6 +2719,7 @@ config WAR_MIPS34K_MISSED_ITLB config HIGHMEM bool "High Memory Support" depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA + select KMAP_LOCAL config CPU_SUPPORTS_HIGHMEM bool --- a/arch/mips/include/asm/fixmap.h +++ b/arch/mips/include/asm/fixmap.h @@ -17,7 +17,7 @@ #include #ifdef CONFIG_HIGHMEM #include -#include +#include #endif /* @@ -52,7 +52,7 @@ enum fixed_addresses { #ifdef CONFIG_HIGHMEM /* reserved pte's for temporary kernel mappings */ FIX_KMAP_BEGIN = FIX_CMAP_END + 1, - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #endif __end_of_fixed_addresses }; --- a/arch/mips/include/asm/highmem.h +++ b/arch/mips/include/asm/highmem.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include /* declarations for highmem.c */ extern unsigned long highstart_pfn, highend_pfn; @@ -48,11 +48,11 @@ extern pte_t *pkmap_page_table; #define ARCH_HAS_KMAP_FLUSH_TLB extern void kmap_flush_tlb(unsigned long addr); -extern void *kmap_atomic_pfn(unsigned long pfn); #define flush_cache_kmaps()BUG_ON(cpu_has_dc_aliases) -extern void kmap_init(void); +#define arch_kmap_local_post_map(vaddr, pteval) local_flush_tlb_one(vaddr) +#define arch_kmap_local_post_unmap(vaddr) local_flush_tlb_one(vaddr) #endif /* __KERNEL__ */ --- a/arch/mips/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -#ifdef CONFIG_DEBUG_HIGHMEM -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -8,8 +8,6 @@ #include #include -static pte_t *kmap_pte; - unsigned long highstart_pfn, highend_pfn; void kmap_flush_tlb(unsigned long addr) @@ -17,78 +15,3 @@ void kmap_flush_tlb(unsigned long addr) flush_tlb_one(addr); } EXPORT_SYMBOL(kmap_flush_tlb); - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte - idx))); -#endif - set_pte(kmap_pte-idx, mk_pte(page, prot)); - local_flush_tlb_one((unsigned long)vaddr); - - return (void*) vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type __maybe_unused; - - if (vaddr < FIXADDR_START) - return; - - type = kmap_atomic_idx(); -#ifdef CONFIG_DEBUG_HIGHMEM - { - int idx = type + KM_TYPE_NR * smp_processor_id(); - - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - - /* -* force other mappings to Oops if they'll try to access -* this pte without first remap it -*/ - pte_clear(_mm, vaddr, kmap_pte-idx); - local_flush_tlb_one(vaddr); - } -#endif - kmap_atomic_idx_pop(); -} -EXPORT_SYMBOL(kunmap_atomic_high); - -/* - * This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn) -{ - unsigned long vaddr; - int idx, type; - - preempt_disable(); - pagefault_disable(); - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, PAGE_KERNEL)); - flush_tlb_one(vaddr); - - return (void*) vaddr; -} - -void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = virt_to_kpte(kmap_vstart); -} --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -402,9 +401,6 @@ void __init paging_init(void) pagetable_init(); -#ifdef
[patch V3 18/37] highmem: Get rid of kmap_types.h
The header is not longer used and on alpha, ia64, openrisc, parisc and um it was completely unused anyway as these architectures have no highmem support. Signed-off-by: Thomas Gleixner --- V3: New patch --- arch/alpha/include/asm/kmap_types.h | 15 --- arch/ia64/include/asm/kmap_types.h | 13 - arch/openrisc/mm/init.c |1 - arch/openrisc/mm/ioremap.c |1 - arch/parisc/include/asm/kmap_types.h | 13 - arch/um/include/asm/fixmap.h |1 - arch/um/include/asm/kmap_types.h | 13 - include/asm-generic/Kbuild |1 - include/asm-generic/kmap_types.h | 11 --- include/linux/highmem.h |2 -- 10 files changed, 71 deletions(-) --- a/arch/alpha/include/asm/kmap_types.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -/* Dummy header just to define km_type. */ - -#ifdef CONFIG_DEBUG_HIGHMEM -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif --- a/arch/ia64/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_KMAP_TYPES_H -#define _ASM_IA64_KMAP_TYPES_H - -#ifdef CONFIG_DEBUG_HIGHMEM -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif /* _ASM_IA64_KMAP_TYPES_H */ --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include --- a/arch/parisc/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -#ifdef CONFIG_DEBUG_HIGHMEM -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif --- a/arch/um/include/asm/fixmap.h +++ b/arch/um/include/asm/fixmap.h @@ -3,7 +3,6 @@ #define __UM_FIXMAP_H #include -#include #include #include #include --- a/arch/um/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2002 Jeff Dike (jd...@karaya.com) - */ - -#ifndef __UM_KMAP_TYPES_H -#define __UM_KMAP_TYPES_H - -/* No more #include "asm/arch/kmap_types.h" ! */ - -#define KM_TYPE_NR 14 - -#endif --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -30,7 +30,6 @@ mandatory-y += irq.h mandatory-y += irq_regs.h mandatory-y += irq_work.h mandatory-y += kdebug.h -mandatory-y += kmap_types.h mandatory-y += kmap_size.h mandatory-y += kprobes.h mandatory-y += linkage.h --- a/include/asm-generic/kmap_types.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_GENERIC_KMAP_TYPES_H -#define _ASM_GENERIC_KMAP_TYPES_H - -#ifdef __WITH_KM_FENCE -# define KM_TYPE_NR 41 -#else -# define KM_TYPE_NR 20 -#endif - -#endif --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -29,8 +29,6 @@ static inline void invalidate_kernel_vma } #endif -#include - /* * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. */ ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 30/37] highmem: Remove kmap_atomic_pfn()
No more users. Signed-off-by: Thomas Gleixner --- V3: New patch --- include/linux/highmem-internal.h | 12 1 file changed, 12 deletions(-) --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -99,13 +99,6 @@ static inline void *kmap_atomic(struct p return kmap_atomic_prot(page, kmap_prot); } -static inline void *kmap_atomic_pfn(unsigned long pfn) -{ - preempt_disable(); - pagefault_disable(); - return __kmap_local_pfn_prot(pfn, kmap_prot); -} - static inline void __kunmap_atomic(void *addr) { kunmap_local_indexed(addr); @@ -193,11 +186,6 @@ static inline void *kmap_atomic_prot(str return kmap_atomic(page); } -static inline void *kmap_atomic_pfn(unsigned long pfn) -{ - return kmap_atomic(pfn_to_page(pfn)); -} - static inline void __kunmap_atomic(void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 21/37] Documentation/io-mapping: Remove outdated blurb
The implementation details in the documentation are outdated and not really helpful. Remove them. Signed-off-by: Thomas Gleixner --- V3: New patch --- Documentation/driver-api/io-mapping.rst | 22 -- 1 file changed, 22 deletions(-) --- a/Documentation/driver-api/io-mapping.rst +++ b/Documentation/driver-api/io-mapping.rst @@ -73,25 +73,3 @@ for pages mapped with io_mapping_map_wc. At driver close time, the io_mapping object must be freed:: void io_mapping_free(struct io_mapping *mapping) - -Current Implementation -== - -The initial implementation of these functions uses existing mapping -mechanisms and so provides only an abstraction layer and no new -functionality. - -On 64-bit processors, io_mapping_create_wc calls ioremap_wc for the whole -range, creating a permanent kernel-visible mapping to the resource. The -map_atomic and map functions add the requested offset to the base of the -virtual address returned by ioremap_wc. - -On 32-bit processors with HIGHMEM defined, io_mapping_map_atomic_wc uses -kmap_atomic_pfn to map the specified page in an atomic fashion; -kmap_atomic_pfn isn't really supposed to be used with device pages, but it -provides an efficient mapping for this usage. - -On 32-bit processors without HIGHMEM defined, io_mapping_map_atomic_wc and -io_mapping_map_wc both use ioremap_wc, a terribly inefficient function which -performs an IPI to inform all processors about the new mapping. This results -in a significant performance penalty. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 25/37] mm/highmem: Provide kmap_local*
Now that the kmap atomic index is stored in task struct provide a preemptible variant. On context switch the maps of an outgoing task are removed and the map of the incoming task are restored. That's obviously slow, but highmem is slow anyway. The kmap_local.*() functions can be invoked from both preemptible and atomic context. kmap local sections disable migration to keep the resulting virtual mapping address correct, but disable neither pagefaults nor preemption. A wholesale conversion of kmap_atomic to be fully preemptible is not possible because some of the usage sites might rely on the preemption disable for serialization or on the implicit pagefault disable. Needs to be done on a case by case basis. Signed-off-by: Thomas Gleixner --- V3: Move migrate disable into the actual highmem mapping code so it only affects real highmem mappings. V2: Make it more consistent and add commentry --- include/linux/highmem-internal.h | 48 +++ include/linux/highmem.h | 43 +- mm/highmem.c |6 3 files changed, 81 insertions(+), 16 deletions(-) --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -69,6 +69,26 @@ static inline void kmap_flush_unused(voi __kmap_flush_unused(); } +static inline void *kmap_local_page(struct page *page) +{ + return __kmap_local_page_prot(page, kmap_prot); +} + +static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +{ + return __kmap_local_page_prot(page, prot); +} + +static inline void *kmap_local_pfn(unsigned long pfn) +{ + return __kmap_local_pfn_prot(pfn, kmap_prot); +} + +static inline void __kunmap_local(void *vaddr) +{ + kunmap_local_indexed(vaddr); +} + static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) { preempt_disable(); @@ -141,6 +161,28 @@ static inline void kunmap(struct page *p #endif } +static inline void *kmap_local_page(struct page *page) +{ + return page_address(page); +} + +static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +{ + return kmap_local_page(page); +} + +static inline void *kmap_local_pfn(unsigned long pfn) +{ + return kmap_local_page(pfn_to_page(pfn)); +} + +static inline void __kunmap_local(void *addr) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(addr); +#endif +} + static inline void *kmap_atomic(struct page *page) { preempt_disable(); @@ -182,4 +224,10 @@ do { \ __kunmap_atomic(__addr);\ } while (0) +#define kunmap_local(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_local(__addr); \ +} while (0) + #endif --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -60,24 +60,22 @@ static inline struct page *kmap_to_page( static inline void kmap_flush_unused(void); /** - * kmap_atomic - Atomically map a page for temporary usage + * kmap_local_page - Map a page for temporary usage * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * - * Side effect: On return pagefaults and preemption are disabled. - * * Can be invoked from any context. * * Requires careful handling when nesting multiple mappings because the map * management is stack based. The unmap has to be in the reverse order of * the map operation: * - * addr1 = kmap_atomic(page1); - * addr2 = kmap_atomic(page2); + * addr1 = kmap_local_page(page1); + * addr2 = kmap_local_page(page2); * ... - * kunmap_atomic(addr2); - * kunmap_atomic(addr1); + * kunmap_local(addr2); + * kunmap_local(addr1); * * Unmapping addr1 before addr2 is invalid and causes malfunction. * @@ -88,10 +86,26 @@ static inline void kmap_flush_unused(voi * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * - * While it is significantly faster than kmap() it comes with restrictions - * about the pointer validity and the side effects of disabling page faults - * and preemption. Use it only when absolutely necessary, e.g. from non - * preemptible contexts. + * While it is significantly faster than kmap() for the higmem case it + * comes with restrictions about the pointer validity. Only use when really + * necessary. + * + * On HIGHMEM enabled systems mapping a highmem page has the side effect of + * disabling migration in order to keep the virtual address stable across + * preemption. No caller of kmap_local_page() can rely on this side effect. + */ +static inline void *kmap_local_page(struct page *page); + +/** + * kmap_atomic - Atomically map a page for temporary usage - Deprecated! + * @page: Pointer to the page to be
[patch V3 35/37] drm/nouveau/device: Replace io_mapping_map_atomic_wc()
Neither fbmem_peek() nor fbmem_poke() require to disable pagefaults and preemption as a side effect of io_mapping_map_atomic_wc(). Use io_mapping_map_local_wc() instead. Signed-off-by: Thomas Gleixner Cc: Ben Skeggs Cc: David Airlie Cc: Daniel Vetter Cc: dri-de...@lists.freedesktop.org Cc: nouv...@lists.freedesktop.org --- V3: New patch --- drivers/gpu/drm/nouveau/nvkm/subdev/devinit/fbmem.h |8 1 file changed, 4 insertions(+), 4 deletions(-) --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/fbmem.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/fbmem.h @@ -60,19 +60,19 @@ fbmem_fini(struct io_mapping *fb) static inline u32 fbmem_peek(struct io_mapping *fb, u32 off) { - u8 __iomem *p = io_mapping_map_atomic_wc(fb, off & PAGE_MASK); + u8 __iomem *p = io_mapping_map_local_wc(fb, off & PAGE_MASK); u32 val = ioread32(p + (off & ~PAGE_MASK)); - io_mapping_unmap_atomic(p); + io_mapping_unmap_local(p); return val; } static inline void fbmem_poke(struct io_mapping *fb, u32 off, u32 val) { - u8 __iomem *p = io_mapping_map_atomic_wc(fb, off & PAGE_MASK); + u8 __iomem *p = io_mapping_map_local_wc(fb, off & PAGE_MASK); iowrite32(val, p + (off & ~PAGE_MASK)); wmb(); - io_mapping_unmap_atomic(p); + io_mapping_unmap_local(p); } static inline bool ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 17/37] xtensa/mm/highmem: Switch to generic kmap atomic
No reason having the same code in every architecture Signed-off-by: Thomas Gleixner Cc: Chris Zankel Cc: Max Filippov Cc: linux-xte...@linux-xtensa.org --- V3: Remove the kmap types cruft --- arch/xtensa/Kconfig |1 arch/xtensa/include/asm/fixmap.h |4 +-- arch/xtensa/include/asm/highmem.h | 12 - arch/xtensa/mm/highmem.c | 46 -- 4 files changed, 18 insertions(+), 45 deletions(-) --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -666,6 +666,7 @@ endchoice config HIGHMEM bool "High Memory Support" depends on MMU + select KMAP_LOCAL help Linux can use the full amount of RAM in the system by default. However, the default MMUv2 setup only maps the --- a/arch/xtensa/include/asm/fixmap.h +++ b/arch/xtensa/include/asm/fixmap.h @@ -16,7 +16,7 @@ #ifdef CONFIG_HIGHMEM #include #include -#include +#include #endif /* @@ -39,7 +39,7 @@ enum fixed_addresses { /* reserved pte's for temporary kernel mappings */ FIX_KMAP_BEGIN, FIX_KMAP_END = FIX_KMAP_BEGIN + - (KM_TYPE_NR * NR_CPUS * DCACHE_N_COLORS) - 1, + (KM_MAX_IDX * NR_CPUS * DCACHE_N_COLORS) - 1, #endif __end_of_fixed_addresses }; --- a/arch/xtensa/include/asm/highmem.h +++ b/arch/xtensa/include/asm/highmem.h @@ -16,9 +16,8 @@ #include #include #include -#include -#define PKMAP_BASE ((FIXADDR_START - \ +#define PKMAP_BASE ((FIXADDR_START - \ (LAST_PKMAP + 1) * PAGE_SIZE) & PMD_MASK) #define LAST_PKMAP (PTRS_PER_PTE * DCACHE_N_COLORS) #define LAST_PKMAP_MASK(LAST_PKMAP - 1) @@ -68,6 +67,15 @@ static inline void flush_cache_kmaps(voi flush_cache_all(); } +enum fixed_addresses kmap_local_map_idx(int type, unsigned long pfn); +#define arch_kmap_local_map_idxkmap_local_map_idx + +enum fixed_addresses kmap_local_unmap_idx(int type, unsigned long addr); +#define arch_kmap_local_unmap_idx kmap_local_unmap_idx + +#define arch_kmap_local_post_unmap(vaddr) \ + local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE) + void kmap_init(void); #endif --- a/arch/xtensa/mm/highmem.c +++ b/arch/xtensa/mm/highmem.c @@ -12,8 +12,6 @@ #include #include -static pte_t *kmap_pte; - #if DCACHE_WAY_SIZE > PAGE_SIZE unsigned int last_pkmap_nr_arr[DCACHE_N_COLORS]; wait_queue_head_t pkmap_map_wait_arr[DCACHE_N_COLORS]; @@ -33,59 +31,25 @@ static inline void kmap_waitqueues_init( static inline enum fixed_addresses kmap_idx(int type, unsigned long color) { - return (type + KM_TYPE_NR * smp_processor_id()) * DCACHE_N_COLORS + + return (type + KM_MAX_IDX * smp_processor_id()) * DCACHE_N_COLORS + color; } -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) +enum fixed_addresses kmap_local_map_idx(int type, unsigned long pfn) { - enum fixed_addresses idx; - unsigned long vaddr; - - idx = kmap_idx(kmap_atomic_idx_push(), - DCACHE_ALIAS(page_to_phys(page))); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte + idx))); -#endif - set_pte(kmap_pte + idx, mk_pte(page, prot)); - - return (void *)vaddr; + return kmap_idx(type, DCACHE_ALIAS(pfn << PAGE_SHIFT)); } -EXPORT_SYMBOL(kmap_atomic_high_prot); -void kunmap_atomic_high(void *kvaddr) +enum fixed_addresses kmap_local_unmap_idx(int type, unsigned long addr) { - if (kvaddr >= (void *)FIXADDR_START && - kvaddr < (void *)FIXADDR_TOP) { - int idx = kmap_idx(kmap_atomic_idx(), - DCACHE_ALIAS((unsigned long)kvaddr)); - - /* -* Force other mappings to Oops if they'll try to access this -* pte without first remap it. Keeping stale mappings around -* is a bad idea also, in case the page changes cacheability -* attributes or becomes a protected page in a hypervisor. -*/ - pte_clear(_mm, kvaddr, kmap_pte + idx); - local_flush_tlb_kernel_range((unsigned long)kvaddr, -(unsigned long)kvaddr + PAGE_SIZE); - - kmap_atomic_idx_pop(); - } + return kmap_idx(type, DCACHE_ALIAS(addr)); } -EXPORT_SYMBOL(kunmap_atomic_high); void __init kmap_init(void) { - unsigned long kmap_vstart; - /* Check if this memory layout is broken because PKMAP overlaps * page table. */ BUILD_BUG_ON(PKMAP_BASE < TLBTEMP_BASE_1 + TLBTEMP_SIZE); - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = virt_to_kpte(kmap_vstart); kmap_waitqueues_init(); }
[patch V3 28/37] mips/crashdump: Simplify copy_oldmem_page()
Replace kmap_atomic_pfn() with kmap_local_pfn() which is preemptible and can take page faults. Remove the indirection of the dump page and the related cruft which is not longer required. Signed-off-by: Thomas Gleixner Cc: Thomas Bogendoerfer Cc: linux-m...@vger.kernel.org --- V3: New patch --- arch/mips/kernel/crash_dump.c | 42 +++--- 1 file changed, 7 insertions(+), 35 deletions(-) --- a/arch/mips/kernel/crash_dump.c +++ b/arch/mips/kernel/crash_dump.c @@ -5,8 +5,6 @@ #include #include -static void *kdump_buf_page; - /** * copy_oldmem_page - copy one page from "oldmem" * @pfn: page frame number to be copied @@ -17,51 +15,25 @@ static void *kdump_buf_page; * @userbuf: if set, @buf is in user address space, use copy_to_user(), * otherwise @buf is in kernel address space, use memcpy(). * - * Copy a page from "oldmem". For this page, there is no pte mapped + * Copy a page from "oldmem". For this page, there might be no pte mapped * in the current kernel. - * - * Calling copy_to_user() in atomic context is not desirable. Hence first - * copying the data to a pre-allocated kernel page and then copying to user - * space in non-atomic context. */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, -size_t csize, unsigned long offset, int userbuf) +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, +unsigned long offset, int userbuf) { void *vaddr; if (!csize) return 0; - vaddr = kmap_atomic_pfn(pfn); + vaddr = kmap_local_pfn(pfn); if (!userbuf) { - memcpy(buf, (vaddr + offset), csize); - kunmap_atomic(vaddr); + memcpy(buf, vaddr + offset, csize); } else { - if (!kdump_buf_page) { - pr_warn("Kdump: Kdump buffer page not allocated\n"); - - return -EFAULT; - } - copy_page(kdump_buf_page, vaddr); - kunmap_atomic(vaddr); - if (copy_to_user(buf, (kdump_buf_page + offset), csize)) - return -EFAULT; + if (copy_to_user(buf, vaddr + offset, csize)) + csize = -EFAULT; } return csize; } - -static int __init kdump_buf_page_init(void) -{ - int ret = 0; - - kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!kdump_buf_page) { - pr_warn("Kdump: Failed to allocate kdump buffer page\n"); - ret = -ENOMEM; - } - - return ret; -} -arch_initcall(kdump_buf_page_init); ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 32/37] drm/vmgfx: Replace kmap_atomic()
There is no reason to disable pagefaults and preemption as a side effect of kmap_atomic_prot(). Use kmap_local_page_prot() instead and document the reasoning for the mapping usage with the given pgprot. Remove the NULL pointer check for the map. These functions return a valid address for valid pages and the return was bogus anyway as it would have left preemption and pagefaults disabled. Signed-off-by: Thomas Gleixner Cc: VMware Graphics Cc: Roland Scheidegger Cc: David Airlie Cc: Daniel Vetter Cc: dri-de...@lists.freedesktop.org --- V3: New patch --- drivers/gpu/drm/vmwgfx/vmwgfx_blit.c | 30 -- 1 file changed, 12 insertions(+), 18 deletions(-) --- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c @@ -375,12 +375,12 @@ static int vmw_bo_cpu_blit_line(struct v copy_size = min_t(u32, copy_size, PAGE_SIZE - src_page_offset); if (unmap_src) { - kunmap_atomic(d->src_addr); + kunmap_local(d->src_addr); d->src_addr = NULL; } if (unmap_dst) { - kunmap_atomic(d->dst_addr); + kunmap_local(d->dst_addr); d->dst_addr = NULL; } @@ -388,12 +388,8 @@ static int vmw_bo_cpu_blit_line(struct v if (WARN_ON_ONCE(dst_page >= d->dst_num_pages)) return -EINVAL; - d->dst_addr = - kmap_atomic_prot(d->dst_pages[dst_page], -d->dst_prot); - if (!d->dst_addr) - return -ENOMEM; - + d->dst_addr = kmap_local_page_prot(d->dst_pages[dst_page], + d->dst_prot); d->mapped_dst = dst_page; } @@ -401,12 +397,8 @@ static int vmw_bo_cpu_blit_line(struct v if (WARN_ON_ONCE(src_page >= d->src_num_pages)) return -EINVAL; - d->src_addr = - kmap_atomic_prot(d->src_pages[src_page], -d->src_prot); - if (!d->src_addr) - return -ENOMEM; - + d->src_addr = kmap_local_page_prot(d->src_pages[src_page], + d->src_prot); d->mapped_src = src_page; } diff->do_cpy(diff, d->dst_addr + dst_page_offset, @@ -436,8 +428,10 @@ static int vmw_bo_cpu_blit_line(struct v * * Performs a CPU blit from one buffer object to another avoiding a full * bo vmap which may exhaust- or fragment vmalloc space. - * On supported architectures (x86), we're using kmap_atomic which avoids - * cross-processor TLB- and cache flushes and may, on non-HIGHMEM systems + * + * On supported architectures (x86), we're using kmap_local_prot() which + * avoids cross-processor TLB- and cache flushes. kmap_local_prot() will + * either map a highmem page with the proper pgprot on HIGHMEM=y systems or * reference already set-up mappings. * * Neither of the buffer objects may be placed in PCI memory @@ -500,9 +494,9 @@ int vmw_bo_cpu_blit(struct ttm_buffer_ob } out: if (d.src_addr) - kunmap_atomic(d.src_addr); + kunmap_local(d.src_addr); if (d.dst_addr) - kunmap_atomic(d.dst_addr); + kunmap_local(d.dst_addr); return ret; } ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 29/37] ARM: mm: Replace kmap_atomic_pfn()
There is no requirement to disable pagefaults and preemption for these cache management mappings. Replace kmap_atomic_pfn() with kmap_local_pfn(). This allows to remove kmap_atomic_pfn() in the next step. Signed-off-by: Thomas Gleixner Cc: Russell King Cc: linux-arm-ker...@lists.infradead.org --- V3: New patch --- arch/arm/mm/cache-feroceon-l2.c |6 +++--- arch/arm/mm/cache-xsc3l2.c |4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -49,9 +49,9 @@ static inline unsigned long l2_get_va(un * we simply install a virtual mapping for it only for the * TLB lookup to occur, hence no need to flush the untouched * memory mapping afterwards (note: a cache flush may happen -* in some circumstances depending on the path taken in kunmap_atomic). +* in some circumstances depending on the path taken in kunmap_local). */ - void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT); + void *vaddr = kmap_local_pfn(paddr >> PAGE_SHIFT); return (unsigned long)vaddr + (paddr & ~PAGE_MASK); #else return __phys_to_virt(paddr); @@ -61,7 +61,7 @@ static inline unsigned long l2_get_va(un static inline void l2_put_va(unsigned long vaddr) { #ifdef CONFIG_HIGHMEM - kunmap_atomic((void *)vaddr); + kunmap_local((void *)vaddr); #endif } --- a/arch/arm/mm/cache-xsc3l2.c +++ b/arch/arm/mm/cache-xsc3l2.c @@ -59,7 +59,7 @@ static inline void l2_unmap_va(unsigned { #ifdef CONFIG_HIGHMEM if (va != -1) - kunmap_atomic((void *)va); + kunmap_local((void *)va); #endif } @@ -75,7 +75,7 @@ static inline unsigned long l2_map_va(un * in place for it. */ l2_unmap_va(prev_va); - va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT); + va = (unsigned long)kmap_local_pfn(pa >> PAGE_SHIFT); } return va + (pa_offset >> (32 - PAGE_SHIFT)); #else ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 22/37] highmem: High implementation details and document API
Move the gory details of kmap & al into a private header and only document the interfaces which are usable by drivers. Signed-off-by: Thomas Gleixner --- V3: New patch --- include/linux/highmem-internal.h | 174 + include/linux/highmem.h | 270 ++- mm/highmem.c | 11 - 3 files changed, 276 insertions(+), 179 deletions(-) --- /dev/null +++ b/include/linux/highmem-internal.h @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_HIGHMEM_INTERNAL_H +#define _LINUX_HIGHMEM_INTERNAL_H + +/* + * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. + */ +#ifdef CONFIG_KMAP_LOCAL +void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); +void *__kmap_local_page_prot(struct page *page, pgprot_t prot); +void kunmap_local_indexed(void *vaddr); +#endif + +#ifdef CONFIG_HIGHMEM +#include + +#ifndef ARCH_HAS_KMAP_FLUSH_TLB +static inline void kmap_flush_tlb(unsigned long addr) { } +#endif + +#ifndef kmap_prot +#define kmap_prot PAGE_KERNEL +#endif + +void *kmap_high(struct page *page); +void kunmap_high(struct page *page); +void __kmap_flush_unused(void); +struct page *__kmap_to_page(void *addr); + +static inline void *kmap(struct page *page) +{ + void *addr; + + might_sleep(); + if (!PageHighMem(page)) + addr = page_address(page); + else + addr = kmap_high(page); + kmap_flush_tlb((unsigned long)addr); + return addr; +} + +static inline void kunmap(struct page *page) +{ + might_sleep(); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} + +static inline struct page *kmap_to_page(void *addr) +{ + return __kmap_to_page(addr); +} + +static inline void kmap_flush_unused(void) +{ + __kmap_flush_unused(); +} + +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_page_prot(page, prot); +} + +static inline void *kmap_atomic(struct page *page) +{ + return kmap_atomic_prot(page, kmap_prot); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_pfn_prot(pfn, kmap_prot); +} + +static inline void __kunmap_atomic(void *addr) +{ + kunmap_local_indexed(addr); + pagefault_enable(); + preempt_enable(); +} + +unsigned int __nr_free_highpages(void); +extern atomic_long_t _totalhigh_pages; + +static inline unsigned int nr_free_highpages(void) +{ + return __nr_free_highpages(); +} + +static inline unsigned long totalhigh_pages(void) +{ + return (unsigned long)atomic_long_read(&_totalhigh_pages); +} + +static inline void totalhigh_pages_inc(void) +{ + atomic_long_inc(&_totalhigh_pages); +} + +static inline void totalhigh_pages_add(long count) +{ + atomic_long_add(count, &_totalhigh_pages); +} + +#else /* CONFIG_HIGHMEM */ + +static inline struct page *kmap_to_page(void *addr) +{ + return virt_to_page(addr); +} + +static inline void *kmap(struct page *page) +{ + might_sleep(); + return page_address(page); +} + +static inline void kunmap_high(struct page *page) { } +static inline void kmap_flush_unused(void) { } + +static inline void kunmap(struct page *page) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(page_address(page)); +#endif +} + +static inline void *kmap_atomic(struct page *page) +{ + preempt_disable(); + pagefault_disable(); + return page_address(page); +} + +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + return kmap_atomic(page); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + return kmap_atomic(pfn_to_page(pfn)); +} + +static inline void __kunmap_atomic(void *addr) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(addr); +#endif + pagefault_enable(); + preempt_enable(); +} + +static inline unsigned int nr_free_highpages(void) { return 0; } +static inline unsigned long totalhigh_pages(void) { return 0UL; } + +#endif /* CONFIG_HIGHMEM */ + +/* + * Prevent people trying to call kunmap_atomic() as if it were kunmap() + * kunmap_atomic() should get the return value of kmap_atomic, not the page. + */ +#define kunmap_atomic(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_atomic(__addr);\ +} while (0) + +#endif --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -11,199 +11,125 @@ #include -#ifndef ARCH_HAS_FLUSH_ANON_PAGE -static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) -{ -} -#endif +#include "highmem-internal.h" -#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
[patch V3 36/37] drm/i915: Replace io_mapping_map_atomic_wc()
None of these mapping requires the side effect of disabling pagefaults and preemption. Use io_mapping_map_local_wc() instead, and clean up gtt_user_read() and gtt_user_write() to use a plain copy_from_user() as the local maps are not disabling pagefaults. Signed-off-by: Thomas Gleixner Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: Daniel Vetter Cc: intel-...@lists.freedesktop.org Cc: dri-de...@lists.freedesktop.org --- V3: New patch --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c |7 +--- drivers/gpu/drm/i915/i915_gem.c| 40 - drivers/gpu/drm/i915/selftests/i915_gem.c |4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |8 ++--- 4 files changed, 22 insertions(+), 37 deletions(-) --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1081,7 +1081,7 @@ static void reloc_cache_reset(struct rel struct i915_ggtt *ggtt = cache_to_ggtt(cache); intel_gt_flush_ggtt_writes(ggtt->vm.gt); - io_mapping_unmap_atomic((void __iomem *)vaddr); + io_mapping_unmap_local((void __iomem *)vaddr); if (drm_mm_node_allocated(>node)) { ggtt->vm.clear_range(>vm, @@ -1147,7 +1147,7 @@ static void *reloc_iomap(struct drm_i915 if (cache->vaddr) { intel_gt_flush_ggtt_writes(ggtt->vm.gt); - io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); + io_mapping_unmap_local((void __force __iomem *) unmask_page(cache->vaddr)); } else { struct i915_vma *vma; int err; @@ -1195,8 +1195,7 @@ static void *reloc_iomap(struct drm_i915 offset += page << PAGE_SHIFT; } - vaddr = (void __force *)io_mapping_map_atomic_wc(>iomap, -offset); + vaddr = (void __force *)io_mapping_map_local_wc(>iomap, offset); cache->page = page; cache->vaddr = (unsigned long)vaddr; --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -379,22 +379,15 @@ gtt_user_read(struct io_mapping *mapping char __user *user_data, int length) { void __iomem *vaddr; - unsigned long unwritten; + bool fail = false; /* We can use the cpu mem copy function because this is X86. */ - vaddr = io_mapping_map_atomic_wc(mapping, base); - unwritten = __copy_to_user_inatomic(user_data, - (void __force *)vaddr + offset, - length); - io_mapping_unmap_atomic(vaddr); - if (unwritten) { - vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); - unwritten = copy_to_user(user_data, -(void __force *)vaddr + offset, -length); - io_mapping_unmap(vaddr); - } - return unwritten; + vaddr = io_mapping_map_local_wc(mapping, base); + if (copy_to_user(user_data, (void __force *)vaddr + offset, length)) + fail = true; + io_mapping_unmap_local(vaddr); + + return fail; } static int @@ -557,21 +550,14 @@ ggtt_write(struct io_mapping *mapping, char __user *user_data, int length) { void __iomem *vaddr; - unsigned long unwritten; + bool fail = false; /* We can use the cpu mem copy function because this is X86. */ - vaddr = io_mapping_map_atomic_wc(mapping, base); - unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, - user_data, length); - io_mapping_unmap_atomic(vaddr); - if (unwritten) { - vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); - unwritten = copy_from_user((void __force *)vaddr + offset, - user_data, length); - io_mapping_unmap(vaddr); - } - - return unwritten; + vaddr = io_mapping_map_local_wc(mapping, base); + if (copy_from_user((void __force *)vaddr + offset, user_data, length)) + fail = true; + io_mapping_unmap_local(vaddr); + return fail; } /** --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -57,12 +57,12 @@ static void trash_stolen(struct drm_i915 ggtt->vm.insert_page(>vm, dma, slot, I915_CACHE_NONE, 0); - s = io_mapping_map_atomic_wc(>iomap, slot); + s = io_mapping_map_local_wc(>iomap, slot); for (x = 0; x < PAGE_SIZE / sizeof(u32); x++) { prng = next_pseudo_random32(prng); iowrite32(prng, [x]); } -
[patch V3 26/37] io-mapping: Provide iomap_local variant
Similar to kmap local provide a iomap local variant which only disables migration, but neither disables pagefaults nor preemption. Signed-off-by: Thomas Gleixner --- V3: Restrict migrate disable to the 32bit mapping case and update documentation. V2: Split out from the large combo patch and add the !IOMAP_ATOMIC variants --- Documentation/driver-api/io-mapping.rst | 76 +++- include/linux/io-mapping.h | 30 +++- 2 files changed, 74 insertions(+), 32 deletions(-) --- a/Documentation/driver-api/io-mapping.rst +++ b/Documentation/driver-api/io-mapping.rst @@ -20,55 +20,71 @@ as it would consume too much of the kern mappable, while 'size' indicates how large a mapping region to enable. Both are in bytes. -This _wc variant provides a mapping which may only be used -with the io_mapping_map_atomic_wc or io_mapping_map_wc. +This _wc variant provides a mapping which may only be used with +io_mapping_map_atomic_wc(), io_mapping_map_local_wc() or +io_mapping_map_wc(). + +With this mapping object, individual pages can be mapped either temporarily +or long term, depending on the requirements. Of course, temporary maps are +more efficient. They come in two flavours:: -With this mapping object, individual pages can be mapped either atomically -or not, depending on the necessary scheduling environment. Of course, atomic -maps are more efficient:: + void *io_mapping_map_local_wc(struct io_mapping *mapping, + unsigned long offset) void *io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) -'offset' is the offset within the defined mapping region. -Accessing addresses beyond the region specified in the -creation function yields undefined results. Using an offset -which is not page aligned yields an undefined result. The -return value points to a single page in CPU address space. - -This _wc variant returns a write-combining map to the -page and may only be used with mappings created by -io_mapping_create_wc +'offset' is the offset within the defined mapping region. Accessing +addresses beyond the region specified in the creation function yields +undefined results. Using an offset which is not page aligned yields an +undefined result. The return value points to a single page in CPU address +space. -Note that the task may not sleep while holding this page -mapped. +This _wc variant returns a write-combining map to the page and may only be +used with mappings created by io_mapping_create_wc() -:: +Temporary mappings are only valid in the context of the caller. The mapping +is not guaranteed to be globaly visible. - void io_mapping_unmap_atomic(void *vaddr) +io_mapping_map_local_wc() has a side effect on X86 32bit as it disables +migration to make the mapping code work. No caller can rely on this side +effect. + +io_mapping_map_atomic_wc() has the side effect of disabling preemption and +pagefaults. Don't use in new code. Use io_mapping_map_local_wc() instead. -'vaddr' must be the value returned by the last -io_mapping_map_atomic_wc call. This unmaps the specified -page and allows the task to sleep once again. +Nested mappings need to be undone in reverse order because the mapping +code uses a stack for keeping track of them:: -If you need to sleep while holding the lock, you can use the non-atomic -variant, although they may be significantly slower. + addr1 = io_mapping_map_local_wc(map1, offset1); + addr2 = io_mapping_map_local_wc(map2, offset2); + ... + io_mapping_unmap_local(addr2); + io_mapping_unmap_local(addr1); -:: +The mappings are released with:: + + void io_mapping_unmap_local(void *vaddr) + void io_mapping_unmap_atomic(void *vaddr) + +'vaddr' must be the value returned by the last io_mapping_map_local_wc() or +io_mapping_map_atomic_wc() call. This unmaps the specified mapping and +undoes the side effects of the mapping functions. + +If you need to sleep while holding a mapping, you can use the regular +variant, although this may be significantly slower:: void *io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) -This works like io_mapping_map_atomic_wc except it allows -the task to sleep while holding the page mapped. - +This works like io_mapping_map_atomic/local_wc() except it has no side +effects and the pointer is globaly visible. -:: +The mappings are released with:: void io_mapping_unmap(void *vaddr) -This works like io_mapping_unmap_atomic, except it is used -for pages mapped with io_mapping_map_wc. +Use for pages mapped with io_mapping_map_wc(). At driver close time, the io_mapping object must be freed:: --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -83,6 +83,21 @@ io_mapping_unmap_atomic(void __iomem *va } static inline void __iomem * +io_mapping_map_local_wc(struct io_mapping *mapping, unsigned
[patch V3 33/37] highmem: Remove kmap_atomic_prot()
No more users. Signed-off-by: Thomas Gleixner --- V3: New patch --- include/linux/highmem-internal.h | 14 ++ 1 file changed, 2 insertions(+), 12 deletions(-) --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -87,16 +87,11 @@ static inline void __kunmap_local(void * kunmap_local_indexed(vaddr); } -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +static inline void *kmap_atomic(struct page *page) { preempt_disable(); pagefault_disable(); - return __kmap_local_page_prot(page, prot); -} - -static inline void *kmap_atomic(struct page *page) -{ - return kmap_atomic_prot(page, kmap_prot); + return __kmap_local_page_prot(page, kmap_prot); } static inline void __kunmap_atomic(void *addr) @@ -181,11 +176,6 @@ static inline void *kmap_atomic(struct p return page_address(page); } -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) -{ - return kmap_atomic(page); -} - static inline void __kunmap_atomic(void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 34/37] drm/qxl: Replace io_mapping_map_atomic_wc()
None of these mapping requires the side effect of disabling pagefaults and preemption. Use io_mapping_map_local_wc() instead, rename the related functions accordingly and clean up qxl_process_single_command() to use a plain copy_from_user() as the local maps are not disabling pagefaults. Signed-off-by: Thomas Gleixner Cc: Dave Airlie Cc: Gerd Hoffmann Cc: David Airlie Cc: Daniel Vetter Cc: virtualization@lists.linux-foundation.org Cc: spice-de...@lists.freedesktop.org --- V3: New patch --- drivers/gpu/drm/qxl/qxl_image.c | 18 +- drivers/gpu/drm/qxl/qxl_ioctl.c | 27 +-- drivers/gpu/drm/qxl/qxl_object.c | 12 ++-- drivers/gpu/drm/qxl/qxl_object.h |4 ++-- drivers/gpu/drm/qxl/qxl_release.c |4 ++-- 5 files changed, 32 insertions(+), 33 deletions(-) --- a/drivers/gpu/drm/qxl/qxl_image.c +++ b/drivers/gpu/drm/qxl/qxl_image.c @@ -124,12 +124,12 @@ qxl_image_init_helper(struct qxl_device wrong (check the bitmaps are sent correctly first) */ - ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, 0); + ptr = qxl_bo_kmap_local_page(qdev, chunk_bo, 0); chunk = ptr; chunk->data_size = height * chunk_stride; chunk->prev_chunk = 0; chunk->next_chunk = 0; - qxl_bo_kunmap_atomic_page(qdev, chunk_bo, ptr); + qxl_bo_kunmap_local_page(qdev, chunk_bo, ptr); { void *k_data, *i_data; @@ -143,7 +143,7 @@ qxl_image_init_helper(struct qxl_device i_data = (void *)data; while (remain > 0) { - ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, page << PAGE_SHIFT); + ptr = qxl_bo_kmap_local_page(qdev, chunk_bo, page << PAGE_SHIFT); if (page == 0) { chunk = ptr; @@ -157,7 +157,7 @@ qxl_image_init_helper(struct qxl_device memcpy(k_data, i_data, size); - qxl_bo_kunmap_atomic_page(qdev, chunk_bo, ptr); + qxl_bo_kunmap_local_page(qdev, chunk_bo, ptr); i_data += size; remain -= size; page++; @@ -175,10 +175,10 @@ qxl_image_init_helper(struct qxl_device page_offset = offset_in_page(out_offset); size = min((int)(PAGE_SIZE - page_offset), remain); - ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, page_base); + ptr = qxl_bo_kmap_local_page(qdev, chunk_bo, page_base); k_data = ptr + page_offset; memcpy(k_data, i_data, size); - qxl_bo_kunmap_atomic_page(qdev, chunk_bo, ptr); + qxl_bo_kunmap_local_page(qdev, chunk_bo, ptr); remain -= size; i_data += size; out_offset += size; @@ -189,7 +189,7 @@ qxl_image_init_helper(struct qxl_device qxl_bo_kunmap(chunk_bo); image_bo = dimage->bo; - ptr = qxl_bo_kmap_atomic_page(qdev, image_bo, 0); + ptr = qxl_bo_kmap_local_page(qdev, image_bo, 0); image = ptr; image->descriptor.id = 0; @@ -212,7 +212,7 @@ qxl_image_init_helper(struct qxl_device break; default: DRM_ERROR("unsupported image bit depth\n"); - qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr); + qxl_bo_kunmap_local_page(qdev, image_bo, ptr); return -EINVAL; } image->u.bitmap.flags = QXL_BITMAP_TOP_DOWN; @@ -222,7 +222,7 @@ qxl_image_init_helper(struct qxl_device image->u.bitmap.palette = 0; image->u.bitmap.data = qxl_bo_physical_address(qdev, chunk_bo, 0); - qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr); + qxl_bo_kunmap_local_page(qdev, image_bo, ptr); return 0; } --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -89,11 +89,11 @@ apply_reloc(struct qxl_device *qdev, str { void *reloc_page; - reloc_page = qxl_bo_kmap_atomic_page(qdev, info->dst_bo, info->dst_offset & PAGE_MASK); + reloc_page = qxl_bo_kmap_local_page(qdev, info->dst_bo, info->dst_offset & PAGE_MASK); *(uint64_t *)(reloc_page + (info->dst_offset & ~PAGE_MASK)) = qxl_bo_physical_address(qdev, info->src_bo, info->src_offset); -
[patch V3 11/37] csky/mm/highmem: Switch to generic kmap atomic
No reason having the same code in every architecture. Signed-off-by: Thomas Gleixner Cc: linux-c...@vger.kernel.org --- V3: Does not compile with gcc 10 --- arch/csky/Kconfig |1 arch/csky/include/asm/fixmap.h |4 +- arch/csky/include/asm/highmem.h |6 ++- arch/csky/mm/highmem.c | 75 4 files changed, 8 insertions(+), 78 deletions(-) --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -286,6 +286,7 @@ config NR_CPUS config HIGHMEM bool "High Memory Support" depends on !CPU_CK610 + select KMAP_LOCAL default y config FORCE_MAX_ZONEORDER --- a/arch/csky/include/asm/fixmap.h +++ b/arch/csky/include/asm/fixmap.h @@ -8,7 +8,7 @@ #include #ifdef CONFIG_HIGHMEM #include -#include +#include #endif enum fixed_addresses { @@ -17,7 +17,7 @@ enum fixed_addresses { #endif #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, - FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #endif __end_of_fixed_addresses }; --- a/arch/csky/include/asm/highmem.h +++ b/arch/csky/include/asm/highmem.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include /* undef for production */ @@ -32,10 +32,12 @@ extern pte_t *pkmap_page_table; #define ARCH_HAS_KMAP_FLUSH_TLB extern void kmap_flush_tlb(unsigned long addr); -extern void *kmap_atomic_pfn(unsigned long pfn); #define flush_cache_kmaps() do {} while (0) +#define arch_kmap_local_post_map(vaddr, pteval)kmap_flush_tlb(vaddr) +#define arch_kmap_local_post_unmap(vaddr) kmap_flush_tlb(vaddr) + extern void kmap_init(void); #endif /* __KERNEL__ */ --- a/arch/csky/mm/highmem.c +++ b/arch/csky/mm/highmem.c @@ -9,8 +9,6 @@ #include #include -static pte_t *kmap_pte; - unsigned long highstart_pfn, highend_pfn; void kmap_flush_tlb(unsigned long addr) @@ -19,67 +17,7 @@ void kmap_flush_tlb(unsigned long addr) } EXPORT_SYMBOL(kmap_flush_tlb); -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte - idx))); -#endif - set_pte(kmap_pte-idx, mk_pte(page, prot)); - flush_tlb_one((unsigned long)vaddr); - - return (void *)vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int idx; - - if (vaddr < FIXADDR_START) - return; - -#ifdef CONFIG_DEBUG_HIGHMEM - idx = KM_TYPE_NR*smp_processor_id() + kmap_atomic_idx(); - - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - - pte_clear(_mm, vaddr, kmap_pte - idx); - flush_tlb_one(vaddr); -#else - (void) idx; /* to kill a warning */ -#endif - kmap_atomic_idx_pop(); -} -EXPORT_SYMBOL(kunmap_atomic_high); - -/* - * This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn) -{ - unsigned long vaddr; - int idx, type; - - pagefault_disable(); - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, PAGE_KERNEL)); - flush_tlb_one(vaddr); - - return (void *) vaddr; -} - -static void __init kmap_pages_init(void) +void __init kmap_init(void) { unsigned long vaddr; pgd_t *pgd; @@ -96,14 +34,3 @@ static void __init kmap_pages_init(void) pte = pte_offset_kernel(pmd, vaddr); pkmap_page_table = pte; } - -void __init kmap_init(void) -{ - unsigned long vaddr; - - kmap_pages_init(); - - vaddr = __fix_to_virt(FIX_KMAP_BEGIN); - - kmap_pte = pte_offset_kernel((pmd_t *)pgd_offset_k(vaddr), vaddr); -} ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 14/37] nds32/mm/highmem: Switch to generic kmap atomic
The mapping code is odd and looks broken. See FIXME in the comment. Also fix the harmless off by one in the FIX_KMAP_END define. Signed-off-by: Thomas Gleixner Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen --- V3: Remove the kmap types cruft --- arch/nds32/Kconfig.cpu |1 arch/nds32/include/asm/fixmap.h |4 +-- arch/nds32/include/asm/highmem.h | 22 + arch/nds32/mm/Makefile |1 arch/nds32/mm/highmem.c | 48 --- 5 files changed, 19 insertions(+), 57 deletions(-) --- a/arch/nds32/Kconfig.cpu +++ b/arch/nds32/Kconfig.cpu @@ -157,6 +157,7 @@ config HW_SUPPORT_UNALIGNMENT_ACCESS config HIGHMEM bool "High Memory Support" depends on MMU && !CPU_CACHE_ALIASING + select KMAP_LOCAL help The address space of Andes processors is only 4 Gigabytes large and it has to accommodate user address space, kernel address --- a/arch/nds32/include/asm/fixmap.h +++ b/arch/nds32/include/asm/fixmap.h @@ -6,7 +6,7 @@ #ifdef CONFIG_HIGHMEM #include -#include +#include #endif enum fixed_addresses { @@ -14,7 +14,7 @@ enum fixed_addresses { FIX_KMAP_RESERVED, FIX_KMAP_BEGIN, #ifdef CONFIG_HIGHMEM - FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS), + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #endif FIX_EARLYCON_MEM_BASE, __end_of_fixed_addresses --- a/arch/nds32/include/asm/highmem.h +++ b/arch/nds32/include/asm/highmem.h @@ -5,7 +5,6 @@ #define _ASM_HIGHMEM_H #include -#include #include /* @@ -45,11 +44,22 @@ extern pte_t *pkmap_page_table; extern void kmap_init(void); /* - * The following functions are already defined by - * when CONFIG_HIGHMEM is not set. + * FIXME: The below looks broken vs. a kmap_atomic() in task context which + * is interupted and another kmap_atomic() happens in interrupt context. + * But what do I know about nds32. -- tglx */ -#ifdef CONFIG_HIGHMEM -extern void *kmap_atomic_pfn(unsigned long pfn); -#endif +#define arch_kmap_local_post_map(vaddr, pteval)\ + do {\ + __nds32__tlbop_inv(vaddr); \ + __nds32__mtsr_dsb(vaddr, NDS32_SR_TLB_VPN); \ + __nds32__tlbop_rwr(pteval); \ + __nds32__isb(); \ + } while (0) + +#define arch_kmap_local_pre_unmap(vaddr) \ + do {\ + __nds32__tlbop_inv(vaddr); \ + __nds32__isb(); \ + } while (0) #endif --- a/arch/nds32/mm/Makefile +++ b/arch/nds32/mm/Makefile @@ -3,7 +3,6 @@ obj-y := extable.o tlb.o fault.o init mm-nds32.o cacheflush.o proc.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o -obj-$(CONFIG_HIGHMEM) += highmem.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_proc.o = $(CC_FLAGS_FTRACE) --- a/arch/nds32/mm/highmem.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2005-2017 Andes Technology Corporation - -#include -#include -#include -#include -#include -#include -#include -#include - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned int idx; - unsigned long vaddr, pte; - int type; - pte_t *ptep; - - type = kmap_atomic_idx_push(); - - idx = type + KM_TYPE_NR * smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - pte = (page_to_pfn(page) << PAGE_SHIFT) | prot; - ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); - set_pte(ptep, pte); - - __nds32__tlbop_inv(vaddr); - __nds32__mtsr_dsb(vaddr, NDS32_SR_TLB_VPN); - __nds32__tlbop_rwr(pte); - __nds32__isb(); - return (void *)vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - if (kvaddr >= (void *)FIXADDR_START) { - unsigned long vaddr = (unsigned long)kvaddr; - pte_t *ptep; - kmap_atomic_idx_pop(); - __nds32__tlbop_inv(vaddr); - __nds32__isb(); - ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); - set_pte(ptep, 0); - } -} -EXPORT_SYMBOL(kunmap_atomic_high); ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 12/37] microblaze/mm/highmem: Switch to generic kmap atomic
No reason having the same code in every architecture. Signed-off-by: Thomas Gleixner Cc: Michal Simek --- V3: Remove the kmap types cruft --- arch/microblaze/Kconfig |1 arch/microblaze/include/asm/fixmap.h |4 - arch/microblaze/include/asm/highmem.h |6 ++ arch/microblaze/mm/Makefile |1 arch/microblaze/mm/highmem.c | 78 -- arch/microblaze/mm/init.c |6 -- 6 files changed, 8 insertions(+), 88 deletions(-) --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -155,6 +155,7 @@ config XILINX_UNCACHED_SHADOW config HIGHMEM bool "High memory support" depends on MMU + select KMAP_LOCAL help The address space of Microblaze processors is only 4 Gigabytes large and it has to accommodate user address space, kernel address --- a/arch/microblaze/include/asm/fixmap.h +++ b/arch/microblaze/include/asm/fixmap.h @@ -20,7 +20,7 @@ #include #ifdef CONFIG_HIGHMEM #include -#include +#include #endif #define FIXADDR_TOP((unsigned long)(-PAGE_SIZE)) @@ -47,7 +47,7 @@ enum fixed_addresses { FIX_HOLE, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * num_possible_cpus()) - 1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * num_possible_cpus()) - 1, #endif __end_of_fixed_addresses }; --- a/arch/microblaze/include/asm/highmem.h +++ b/arch/microblaze/include/asm/highmem.h @@ -25,7 +25,6 @@ #include #include -extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; /* @@ -52,6 +51,11 @@ extern pte_t *pkmap_page_table; #define flush_cache_kmaps(){ flush_icache(); flush_dcache(); } +#define arch_kmap_local_post_map(vaddr, pteval)\ + local_flush_tlb_page(NULL, vaddr); +#define arch_kmap_local_post_unmap(vaddr) \ + local_flush_tlb_page(NULL, vaddr); + #endif /* __KERNEL__ */ #endif /* _ASM_HIGHMEM_H */ --- a/arch/microblaze/mm/Makefile +++ b/arch/microblaze/mm/Makefile @@ -6,4 +6,3 @@ obj-y := consistent.o init.o obj-$(CONFIG_MMU) += pgtable.o mmu_context.o fault.o -obj-$(CONFIG_HIGHMEM) += highmem.o --- a/arch/microblaze/mm/highmem.c +++ /dev/null @@ -1,78 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * highmem.c: virtual kernel memory mappings for high memory - * - * PowerPC version, stolen from the i386 version. - * - * Used in CONFIG_HIGHMEM systems for memory pages which - * are not addressable by direct kernel virtual addresses. - * - * Copyright (C) 1999 Gerhard Wichert, Siemens AG - * gerhard.wich...@pdb.siemens.de - * - * - * Redesigned the x86 32-bit VM architecture to deal with - * up to 16 Terrabyte physical memory. With current x86 CPUs - * we now support up to 64 Gigabytes physical RAM. - * - * Copyright (C) 1999 Ingo Molnar - * - * Reworked for PowerPC by various contributors. Moved from - * highmem.h by Benjamin Herrenschmidt (c) 2009 IBM Corp. - */ - -#include -#include - -/* - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need - * it. - */ -#include - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - - unsigned long vaddr; - int idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte-idx))); -#endif - set_pte_at(_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); - local_flush_tlb_page(NULL, vaddr); - - return (void *) vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type; - unsigned int idx; - - if (vaddr < __fix_to_virt(FIX_KMAP_END)) - return; - - type = kmap_atomic_idx(); - - idx = type + KM_TYPE_NR * smp_processor_id(); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); -#endif - /* -* force other mappings to Oops if they'll try to access -* this pte without first remap it -*/ - pte_clear(_mm, vaddr, kmap_pte-idx); - local_flush_tlb_page(NULL, vaddr); - - kmap_atomic_idx_pop(); -} -EXPORT_SYMBOL(kunmap_atomic_high); --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -49,17 +49,11 @@ unsigned long lowmem_size; EXPORT_SYMBOL(min_low_pfn); EXPORT_SYMBOL(max_low_pfn); -#ifdef CONFIG_HIGHMEM -pte_t *kmap_pte; -EXPORT_SYMBOL(kmap_pte); - static void __init highmem_init(void) { pr_debug("%x\n", (u32)PKMAP_BASE); map_page(PKMAP_BASE, 0, 0); /* XXX gross */ pkmap_page_table =
[patch V3 16/37] sparc/mm/highmem: Switch to generic kmap atomic
No reason having the same code in every architecture Signed-off-by: Thomas Gleixner Cc: "David S. Miller" Cc: sparcli...@vger.kernel.org --- V3: Remove the kmap types cruft --- arch/sparc/Kconfig |1 arch/sparc/include/asm/highmem.h|8 +- arch/sparc/include/asm/kmap_types.h | 11 --- arch/sparc/include/asm/vaddrs.h |4 - arch/sparc/mm/Makefile |3 arch/sparc/mm/highmem.c | 115 arch/sparc/mm/srmmu.c |2 7 files changed, 8 insertions(+), 136 deletions(-) --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -139,6 +139,7 @@ config MMU config HIGHMEM bool default y if SPARC32 + select KMAP_LOCAL config ZONE_DMA bool --- a/arch/sparc/include/asm/highmem.h +++ b/arch/sparc/include/asm/highmem.h @@ -24,7 +24,6 @@ #include #include #include -#include #include /* declarations for highmem.c */ @@ -33,8 +32,6 @@ extern unsigned long highstart_pfn, high #define kmap_prot __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE) extern pte_t *pkmap_page_table; -void kmap_init(void) __init; - /* * Right now we initialize only a single pte table. It can be extended * easily, subsequent pte tables have to be allocated in one physical @@ -53,6 +50,11 @@ void kmap_init(void) __init; #define flush_cache_kmaps()flush_cache_all() +/* FIXME: Use __flush_tlb_one(vaddr) instead of flush_cache_all() -- Anton */ +#define arch_kmap_local_post_map(vaddr, pteval)flush_cache_all() +#define arch_kmap_local_post_unmap(vaddr) flush_cache_all() + + #endif /* __KERNEL__ */ #endif /* _ASM_HIGHMEM_H */ --- a/arch/sparc/include/asm/kmap_types.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -/* Dummy header just to define km_type. None of this - * is actually used on sparc. -DaveM - */ - -#include - -#endif --- a/arch/sparc/include/asm/vaddrs.h +++ b/arch/sparc/include/asm/vaddrs.h @@ -32,13 +32,13 @@ #define SRMMU_NOCACHE_ALCRATIO 64 /* 256 pages per 64MB of system RAM */ #ifndef __ASSEMBLY__ -#include +#include enum fixed_addresses { FIX_HOLE, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, - FIX_KMAP_END = (KM_TYPE_NR * NR_CPUS), + FIX_KMAP_END = (KM_MAX_IDX * NR_CPUS), #endif __end_of_fixed_addresses }; --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -15,6 +15,3 @@ obj-$(CONFIG_SPARC32) += leon_mm.o # Only used by sparc64 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o - -# Only used by sparc32 -obj-$(CONFIG_HIGHMEM) += highmem.o --- a/arch/sparc/mm/highmem.c +++ /dev/null @@ -1,115 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * highmem.c: virtual kernel memory mappings for high memory - * - * Provides kernel-static versions of atomic kmap functions originally - * found as inlines in include/asm-sparc/highmem.h. These became - * needed as kmap_atomic() and kunmap_atomic() started getting - * called from within modules. - * -- Tomas Szepe , September 2002 - * - * But kmap_atomic() and kunmap_atomic() cannot be inlined in - * modules because they are loaded with btfixup-ped functions. - */ - -/* - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need it. - * - * XXX This is an old text. Actually, it's good to use atomic kmaps, - * provided you remember that they are atomic and not try to sleep - * with a kmap taken, much like a spinlock. Non-atomic kmaps are - * shared by CPUs, and so precious, and establishing them requires IPI. - * Atomic kmaps are lightweight and we may have NCPUS more of them. - */ -#include -#include -#include - -#include -#include -#include - -static pte_t *kmap_pte; - -void __init kmap_init(void) -{ - unsigned long address = __fix_to_virt(FIX_KMAP_BEGIN); - -/* cache the first kmap pte */ -kmap_pte = virt_to_kpte(address); -} - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - long idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - -/* XXX Fix - Anton */ -#if 0 - __flush_cache_one(vaddr); -#else - flush_cache_all(); -#endif - -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte-idx))); -#endif - set_pte(kmap_pte-idx, mk_pte(page, prot)); -/* XXX Fix - Anton */ -#if 0 - __flush_tlb_one(vaddr); -#else - flush_tlb_all(); -#endif - - return (void*) vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type; - - if (vaddr < FIXADDR_START) - return;
[patch V3 23/37] sched: Make migrate_disable/enable() independent of RT
Now that the scheduler can deal with migrate disable properly, there is no real compelling reason to make it only available for RT. There are quite some code pathes which needlessly disable preemption in order to prevent migration and some constructs like kmap_atomic() enforce it implicitly. Making it available independent of RT allows to provide a preemptible variant of kmap_atomic() and makes the code more consistent in general. FIXME: Rework the comment in preempt.h Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Daniel Bristot de Oliveira --- include/linux/kernel.h | 21 ++--- include/linux/preempt.h | 38 +++--- include/linux/sched.h |2 +- kernel/sched/core.c | 45 +++-- kernel/sched/sched.h|4 ++-- lib/smp_processor_id.c |2 +- 6 files changed, 56 insertions(+), 56 deletions(-) --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -204,6 +204,7 @@ extern int _cond_resched(void); extern void ___might_sleep(const char *file, int line, int preempt_offset); extern void __might_sleep(const char *file, int line, int preempt_offset); extern void __cant_sleep(const char *file, int line, int preempt_offset); +extern void __cant_migrate(const char *file, int line); /** * might_sleep - annotation for functions that can sleep @@ -227,6 +228,18 @@ extern void __cant_sleep(const char *fil # define cant_sleep() \ do { __cant_sleep(__FILE__, __LINE__, 0); } while (0) # define sched_annotate_sleep()(current->task_state_change = 0) + +/** + * cant_migrate - annotation for functions that cannot migrate + * + * Will print a stack trace if executed in code which is migratable + */ +# define cant_migrate() \ + do {\ + if (IS_ENABLED(CONFIG_SMP)) \ + __cant_migrate(__FILE__, __LINE__); \ + } while (0) + /** * non_block_start - annotate the start of section where sleeping is prohibited * @@ -251,6 +264,7 @@ extern void __cant_sleep(const char *fil int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) # define cant_sleep() do { } while (0) +# define cant_migrate()do { } while (0) # define sched_annotate_sleep() do { } while (0) # define non_block_start() do { } while (0) # define non_block_end() do { } while (0) @@ -258,13 +272,6 @@ extern void __cant_sleep(const char *fil #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) -#ifndef CONFIG_PREEMPT_RT -# define cant_migrate()cant_sleep() -#else - /* Placeholder for now */ -# define cant_migrate()do { } while (0) -#endif - /** * abs - return absolute value of an argument * @x: the value. If it is unsigned type, it is converted to signed type first. --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -322,7 +322,7 @@ static inline void preempt_notifier_init #endif -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) +#ifdef CONFIG_SMP /* * Migrate-Disable and why it is undesired. @@ -382,43 +382,11 @@ static inline void preempt_notifier_init extern void migrate_disable(void); extern void migrate_enable(void); -#elif defined(CONFIG_PREEMPT_RT) +#else static inline void migrate_disable(void) { } static inline void migrate_enable(void) { } -#else /* !CONFIG_PREEMPT_RT */ - -/** - * migrate_disable - Prevent migration of the current task - * - * Maps to preempt_disable() which also disables preemption. Use - * migrate_disable() to annotate that the intent is to prevent migration, - * but not necessarily preemption. - * - * Can be invoked nested like preempt_disable() and needs the corresponding - * number of migrate_enable() invocations. - */ -static __always_inline void migrate_disable(void) -{ - preempt_disable(); -} - -/** - * migrate_enable - Allow migration of the current task - * - * Counterpart to migrate_disable(). - * - * As migrate_disable() can be invoked nested, only the outermost invocation - * reenables migration. - * - * Currently mapped to preempt_enable(). - */ -static __always_inline void migrate_enable(void) -{ - preempt_enable(); -} - -#endif /* CONFIG_SMP && CONFIG_PREEMPT_RT */ +#endif /* CONFIG_SMP */ #endif /* __LINUX_PREEMPT_H */ --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -715,7 +715,7 @@ struct task_struct { const cpumask_t *cpus_ptr; cpumask_t cpus_mask; void*migration_pending; -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) +#ifdef CONFIG_SMP
[patch V3 24/37] sched: highmem: Store local kmaps in task struct
Instead of storing the map per CPU provide and use per task storage. That prepares for local kmaps which are preemptible. The context switch code is preparatory and not yet in use because kmap_atomic() runs with preemption disabled. Will be made usable in the next step. The context switch logic is safe even when an interrupt happens after clearing or before restoring the kmaps. The kmap index in task struct is not modified so any nesting kmap in an interrupt will use unused indices and on return the counter is the same as before. Also add an assert into the return to user space code. Going back to user space with an active kmap local is a nono. Signed-off-by: Thomas Gleixner --- V3: Handle the debug case correctly --- include/linux/highmem-internal.h | 10 +++ include/linux/sched.h|9 +++ kernel/entry/common.c|2 kernel/fork.c|1 kernel/sched/core.c | 18 +++ mm/highmem.c | 99 +++ 6 files changed, 129 insertions(+), 10 deletions(-) --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -9,6 +9,16 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); void *__kmap_local_page_prot(struct page *page, pgprot_t prot); void kunmap_local_indexed(void *vaddr); +void kmap_local_fork(struct task_struct *tsk); +void __kmap_local_sched_out(void); +void __kmap_local_sched_in(void); +static inline void kmap_assert_nomap(void) +{ + DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx); +} +#else +static inline void kmap_local_fork(struct task_struct *tsk) { } +static inline void kmap_assert_nomap(void) { } #endif #ifdef CONFIG_HIGHMEM --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,6 +34,7 @@ #include #include #include +#include /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -629,6 +630,13 @@ struct wake_q_node { struct wake_q_node *next; }; +struct kmap_ctrl { +#ifdef CONFIG_KMAP_LOCAL + int idx; + pte_t pteval[KM_TYPE_NR]; +#endif +}; + struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -1294,6 +1302,7 @@ struct task_struct { unsigned intsequential_io; unsigned intsequential_io_avg; #endif + struct kmap_ctrlkmap_ctrl; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -194,6 +195,7 @@ static void exit_to_user_mode_prepare(st /* Ensure that the address limit is intact and no locks are held */ addr_limit_user_check(); + kmap_assert_nomap(); lockdep_assert_irqs_disabled(); lockdep_sys_exit(); } --- a/kernel/fork.c +++ b/kernel/fork.c @@ -930,6 +930,7 @@ static struct task_struct *dup_task_stru account_kernel_stack(tsk, 1); kcov_task_init(tsk); + kmap_local_fork(tsk); #ifdef CONFIG_FAULT_INJECTION tsk->fail_nth = 0; --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4053,6 +4053,22 @@ static inline void finish_lock_switch(st # define finish_arch_post_lock_switch()do { } while (0) #endif +static inline void kmap_local_sched_out(void) +{ +#ifdef CONFIG_KMAP_LOCAL + if (unlikely(current->kmap_ctrl.idx)) + __kmap_local_sched_out(); +#endif +} + +static inline void kmap_local_sched_in(void) +{ +#ifdef CONFIG_KMAP_LOCAL + if (unlikely(current->kmap_ctrl.idx)) + __kmap_local_sched_in(); +#endif +} + /** * prepare_task_switch - prepare to switch tasks * @rq: the runqueue preparing to switch @@ -4075,6 +4091,7 @@ prepare_task_switch(struct rq *rq, struc perf_event_task_sched_out(prev, next); rseq_preempt(prev); fire_sched_out_preempt_notifiers(prev, next); + kmap_local_sched_out(); prepare_task(next); prepare_arch_switch(next); } @@ -4141,6 +4158,7 @@ static struct rq *finish_task_switch(str finish_lock_switch(rq); finish_arch_post_lock_switch(); kcov_finish_switch(current); + kmap_local_sched_in(); fire_sched_in_preempt_notifiers(current); /* --- a/mm/highmem.c +++ b/mm/highmem.c @@ -365,8 +365,6 @@ EXPORT_SYMBOL(kunmap_high); #include -static DEFINE_PER_CPU(int, __kmap_local_idx); - /* * With DEBUG_HIGHMEM the stack depth is doubled and every second * slot is unused which acts as a guard page @@ -379,23 +377,21 @@ static DEFINE_PER_CPU(int, __kmap_local_ static inline int kmap_local_idx_push(void) { - int idx = __this_cpu_add_return(__kmap_local_idx, KM_INCR) - 1; - WARN_ON_ONCE(in_irq() && !irqs_disabled()); - BUG_ON(idx >= KM_MAX_IDX); - return idx; +
[patch V3 00/37] mm/highmem: Preemptible variant of kmap_atomic & friends
Following up to the discussion in: https://lore.kernel.org/r/20200914204209.256266...@linutronix.de and the second version of this: https://lore.kernel.org/r/20201029221806.189523...@linutronix.de this series provides a preemptible variant of kmap_atomic & related interfaces. This is achieved by: - Removing the RT dependency from migrate_disable/enable() - Consolidating all kmap atomic implementations in generic code including a useful version of the CONFIG_DEBUG_HIGHMEM which provides guard pages between the individual maps instead of just increasing the map size. - Switching from per CPU storage of the kmap index to a per task storage - Adding a pteval array to the per task storage which contains the ptevals of the currently active temporary kmaps - Adding context switch code which checks whether the outgoing or the incoming task has active temporary kmaps. If so, the outgoing task's kmaps are removed and the incoming task's kmaps are restored. - Adding new interfaces k[un]map_local*() which are not disabling preemption and can be called from any context (except NMI). Contrary to kmap() which provides preemptible and "persistant" mappings, these interfaces are meant to replace the temporary mappings provided by kmap_atomic*() today. This allows to get rid of conditional mapping choices and allows to have preemptible short term mappings on 64bit which are today enforced to be non-preemptible due to the highmem constraints. It clearly puts overhead on the highmem users, but highmem is slow anyway. This is not a wholesale conversion which makes kmap_atomic magically preemptible because there might be usage sites which rely on the implicit preempt disable. So this needs to be done on a case by case basis and the call sites converted to kmap_local(). Note, that this is only tested on X86 and completely untested on all other architectures (at least it compiles except on csky which does not compile with the newest cross tools from kernel.org independent of this change). The lot is available from git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git highmem It is based on Peter Zijlstras migrate disable branch which is close to be merged into the tip tree, but still not finalized: git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git sched/migrate-disable The series has the following parts: Patches 1 - 22: Consolidation work which is independent of the scheduler changes 79 files changed, 595 insertions(+), 1296 deletions(-) Patch 23: Needs to be folded back into the sched/migrate-disable Patches 24 - 26: The preemptible kmap_local() implementation 9 files changed, 283 insertions(+), 57 deletions(-) Patches 27 - 37: Cleanup of the less common kmap/io_map_atomic users 19 files changed, 114 insertions(+), 256 deletions(-) Vs. merging this pile: If everyone agrees, I'd like to take the first part (1-22) through tip so that the preemptible implementation can be sorted in tip once the scheduler prerequisites are there. The initial cleanups (27-37) might have to wait if there are conflicts vs. the drm/gpu tree. We'll see. >From what I can tell kmap_atomic() can be removed all together and completly replaced by kmap_local(). Most of the usage sites are trivial and just doing memcpy(), memset() or trivial operations on the temporarily mapped page. The interesting ones are those which do either conditional stuff or have copy_.*_user_inatomic() inside. As shown with the crash and drm/gpu cleanups this allows to simplify the code quite a bit. Changes vs. V2: - Remove the migrate disable from kmap_local and only issue that when the there is an actual highmem mapping. (Linus) - Reordered the series so the consolidation is upfront - Get rid of kmap_types.h and the associated cruft - Fixup documentation and add function documentation for kmap_* - Splitout the internal implementation into a seperate header - More cleanups - removal of unused functions - Replace a few of the less frequently used kmap_atomic and io_mapping_map_atomic variants and remove those interfaces. Thanks, tglx --- arch/alpha/include/asm/kmap_types.h | 15 arch/arc/include/asm/kmap_types.h | 14 arch/arm/include/asm/kmap_types.h | 10 arch/arm/mm/highmem.c | 121 --- arch/ia64/include/asm/kmap_types.h| 13 arch/microblaze/mm/highmem.c | 78 arch/mips/include/asm/kmap_types.h| 13 arch/nds32/mm/highmem.c | 48 -- arch/parisc/include/asm/kmap_types.h | 13 arch/powerpc/include/asm/kmap_types.h | 13 arch/powerpc/mm/highmem.c | 67
[patch V3 15/37] powerpc/mm/highmem: Switch to generic kmap atomic
No reason having the same code in every architecture Signed-off-by: Thomas Gleixner Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-...@lists.ozlabs.org --- V3: Remove the kmap types cruft --- arch/powerpc/Kconfig |1 arch/powerpc/include/asm/fixmap.h |4 +- arch/powerpc/include/asm/highmem.h|7 ++- arch/powerpc/include/asm/kmap_types.h | 13 -- arch/powerpc/mm/Makefile |1 arch/powerpc/mm/highmem.c | 67 -- arch/powerpc/mm/mem.c |7 --- 7 files changed, 8 insertions(+), 92 deletions(-) --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -409,6 +409,7 @@ menu "Kernel options" config HIGHMEM bool "High memory support" depends on PPC32 + select KMAP_LOCAL source "kernel/Kconfig.hz" --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -20,7 +20,7 @@ #include #ifdef CONFIG_HIGHMEM #include -#include +#include #endif #ifdef CONFIG_KASAN @@ -55,7 +55,7 @@ enum fixed_addresses { FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #endif #ifdef CONFIG_PPC_8xx /* For IMMR we need an aligned 512K area */ --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -24,12 +24,10 @@ #ifdef __KERNEL__ #include -#include #include #include #include -extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; /* @@ -60,6 +58,11 @@ extern pte_t *pkmap_page_table; #define flush_cache_kmaps()flush_cache_all() +#define arch_kmap_local_post_map(vaddr, pteval)\ + local_flush_tlb_page(NULL, vaddr) +#define arch_kmap_local_post_unmap(vaddr) \ + local_flush_tlb_page(NULL, vaddr) + #endif /* __KERNEL__ */ #endif /* _ASM_HIGHMEM_H */ --- a/arch/powerpc/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_KMAP_TYPES_H -#define _ASM_POWERPC_KMAP_TYPES_H - -#ifdef __KERNEL__ - -/* - */ - -#define KM_TYPE_NR 16 - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_KMAP_TYPES_H */ --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -16,7 +16,6 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += num obj-$(CONFIG_PPC_MM_SLICES)+= slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o -obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_PPC_PTDUMP) += ptdump/ obj-$(CONFIG_KASAN)+= kasan/ --- a/arch/powerpc/mm/highmem.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * highmem.c: virtual kernel memory mappings for high memory - * - * PowerPC version, stolen from the i386 version. - * - * Used in CONFIG_HIGHMEM systems for memory pages which - * are not addressable by direct kernel virtual addresses. - * - * Copyright (C) 1999 Gerhard Wichert, Siemens AG - * gerhard.wich...@pdb.siemens.de - * - * - * Redesigned the x86 32-bit VM architecture to deal with - * up to 16 Terrabyte physical memory. With current x86 CPUs - * we now support up to 64 Gigabytes physical RAM. - * - * Copyright (C) 1999 Ingo Molnar - * - * Reworked for PowerPC by various contributors. Moved from - * highmem.h by Benjamin Herrenschmidt (c) 2009 IBM Corp. - */ - -#include -#include - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - WARN_ON(IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !pte_none(*(kmap_pte - idx))); - __set_pte_at(_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1); - local_flush_tlb_page(NULL, vaddr); - - return (void*) vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - - if (vaddr < __fix_to_virt(FIX_KMAP_END)) - return; - - if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM)) { - int type = kmap_atomic_idx(); - unsigned int idx; - - idx = type + KM_TYPE_NR * smp_processor_id(); - WARN_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - - /* -* force other mappings to Oops if they'll try to access -* this pte without first remap it -*/ - pte_clear(_mm, vaddr, kmap_pte-idx); - local_flush_tlb_page(NULL, vaddr); - } - -
[patch V3 04/37] sh/highmem: Remove all traces of unused cruft
For whatever reasons SH has highmem bits all over the place but does not enable it via Kconfig. Remove the bitrot. Signed-off-by: Thomas Gleixner --- arch/sh/include/asm/fixmap.h |8 arch/sh/include/asm/kmap_types.h | 15 --- arch/sh/mm/init.c|8 3 files changed, 31 deletions(-) --- a/arch/sh/include/asm/fixmap.h +++ b/arch/sh/include/asm/fixmap.h @@ -13,9 +13,6 @@ #include #include #include -#ifdef CONFIG_HIGHMEM -#include -#endif /* * Here we define all the compile-time 'special' virtual @@ -53,11 +50,6 @@ enum fixed_addresses { FIX_CMAP_BEGIN, FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS) - 1, -#ifdef CONFIG_HIGHMEM - FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1, -#endif - #ifdef CONFIG_IOREMAP_FIXED /* * FIX_IOREMAP entries are useful for mapping physical address --- a/arch/sh/include/asm/kmap_types.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __SH_KMAP_TYPES_H -#define __SH_KMAP_TYPES_H - -/* Dummy header just to define km_type. */ - -#ifdef CONFIG_DEBUG_HIGHMEM -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -362,9 +362,6 @@ void __init mem_init(void) mem_init_print_info(NULL); pr_info("virtual kernel memory layout:\n" "fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -#ifdef CONFIG_HIGHMEM - "pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -#endif "vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" "lowmem : 0x%08lx - 0x%08lx (%4ld MB) (cached)\n" #ifdef CONFIG_UNCACHED_MAPPING @@ -376,11 +373,6 @@ void __init mem_init(void) FIXADDR_START, FIXADDR_TOP, (FIXADDR_TOP - FIXADDR_START) >> 10, -#ifdef CONFIG_HIGHMEM - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, - (LAST_PKMAP*PAGE_SIZE) >> 10, -#endif - (unsigned long)VMALLOC_START, VMALLOC_END, (VMALLOC_END - VMALLOC_START) >> 20, ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 01/37] mm/highmem: Un-EXPORT __kmap_atomic_idx()
Nothing in modules can use that. Signed-off-by: Thomas Gleixner Reviewed-by: Christoph Hellwig Cc: Andrew Morton Cc: linux...@kvack.org --- mm/highmem.c |2 -- 1 file changed, 2 deletions(-) --- a/mm/highmem.c +++ b/mm/highmem.c @@ -108,8 +108,6 @@ static inline wait_queue_head_t *get_pkm atomic_long_t _totalhigh_pages __read_mostly; EXPORT_SYMBOL(_totalhigh_pages); -EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); - unsigned int nr_free_highpages (void) { struct zone *zone; ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 03/37] fs: Remove asm/kmap_types.h includes
Historical leftovers from the time where kmap() had fixed slots. Signed-off-by: Thomas Gleixner Cc: Alexander Viro Cc: Benjamin LaHaise Cc: linux-fsde...@vger.kernel.org Cc: linux-...@kvack.org Cc: Chris Mason Cc: Josef Bacik Cc: David Sterba Cc: linux-bt...@vger.kernel.org --- fs/aio.c |1 - fs/btrfs/ctree.h |1 - 2 files changed, 2 deletions(-) --- a/fs/aio.c +++ b/fs/aio.c @@ -43,7 +43,6 @@ #include #include -#include #include #include --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 05/37] asm-generic: Provide kmap_size.h
kmap_types.h is a misnomer because the old atomic MAP based array does not exist anymore and the whole indirection of architectures including kmap_types.h is inconinstent and does not allow to provide guard page debugging for this misfeature. Add a common header file which defines the mapping stack size for all architectures. Will be used when converting architectures over to a generic kmap_local/atomic implementation. The array size is chosen with the following constraints in mind: - The deepest nest level in one context is 3 according to code inspection. - The worst case nesting for the upcoming reemptible version would be: 2 maps in task context and a fault inside 2 maps in the fault handler 3 maps in softirq 2 maps in interrupt So a total of 16 is sufficient and probably overestimated. Signed-off-by: Thomas Gleixner --- V3: New patch --- include/asm-generic/Kbuild |1 + include/asm-generic/kmap_size.h | 12 2 files changed, 13 insertions(+) --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -31,6 +31,7 @@ mandatory-y += irq_regs.h mandatory-y += irq_work.h mandatory-y += kdebug.h mandatory-y += kmap_types.h +mandatory-y += kmap_size.h mandatory-y += kprobes.h mandatory-y += linkage.h mandatory-y += local.h --- /dev/null +++ b/include/asm-generic/kmap_size.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_KMAP_SIZE_H +#define _ASM_GENERIC_KMAP_SIZE_H + +/* For debug this provides guard pages between the maps */ +#ifdef CONFIG_DEBUG_HIGHMEM +# define KM_MAX_IDX33 +#else +# define KM_MAX_IDX16 +#endif + +#endif ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 08/37] x86/mm/highmem: Use generic kmap atomic implementation
Convert X86 to the generic kmap atomic implementation and make the iomap_atomic() naming convention consistent while at it. Signed-off-by: Thomas Gleixner Cc: x...@kernel.org --- V3: Remove the kmap_types cruft --- arch/x86/Kconfig |3 + arch/x86/include/asm/fixmap.h |5 +- arch/x86/include/asm/highmem.h| 13 +-- arch/x86/include/asm/iomap.h | 18 +- arch/x86/include/asm/kmap_types.h | 13 --- arch/x86/include/asm/paravirt_types.h |1 arch/x86/mm/highmem_32.c | 59 -- arch/x86/mm/init_32.c | 15 arch/x86/mm/iomap_32.c| 59 ++ include/linux/highmem.h |2 - include/linux/io-mapping.h|2 - mm/highmem.c |2 - 12 files changed, 31 insertions(+), 161 deletions(-) --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -14,10 +14,11 @@ config X86_32 select ARCH_WANT_IPC_PARSE_VERSION select CLKSRC_I8253 select CLONE_BACKWARDS + select GENERIC_VDSO_32 select HAVE_DEBUG_STACKOVERFLOW + select KMAP_LOCAL select MODULES_USE_ELF_REL select OLD_SIGACTION - select GENERIC_VDSO_32 config X86_64 def_bool y --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -31,7 +31,7 @@ #include #ifdef CONFIG_X86_32 #include -#include +#include #else #include #endif @@ -94,7 +94,7 @@ enum fixed_addresses { #endif #ifdef CONFIG_X86_32 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, #endif @@ -151,7 +151,6 @@ extern void reserve_top_address(unsigned extern int fixmaps_set; -extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -23,7 +23,6 @@ #include #include -#include #include #include #include @@ -58,11 +57,17 @@ extern unsigned long highstart_pfn, high #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) -void *kmap_atomic_pfn(unsigned long pfn); -void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); - #define flush_cache_kmaps()do { } while (0) +#definearch_kmap_local_post_map(vaddr, pteval) \ + arch_flush_lazy_mmu_mode() + +#definearch_kmap_local_post_unmap(vaddr) \ + do {\ + flush_tlb_one_kernel((vaddr)); \ + arch_flush_lazy_mmu_mode(); \ + } while (0) + extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, unsigned long end_pfn); --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -9,19 +9,21 @@ #include #include #include +#include #include #include -void __iomem * -iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); +void __iomem *iomap_atomic_pfn_prot(unsigned long pfn, pgprot_t prot); -void -iounmap_atomic(void __iomem *kvaddr); +static inline void iounmap_atomic(void __iomem *vaddr) +{ + kunmap_local_indexed((void __force *)vaddr); + pagefault_enable(); + preempt_enable(); +} -int -iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); +int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); -void -iomap_free(resource_size_t base, unsigned long size); +void iomap_free(resource_size_t base, unsigned long size); #endif /* _ASM_X86_IOMAP_H */ --- a/arch/x86/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_KMAP_TYPES_H -#define _ASM_X86_KMAP_TYPES_H - -#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif /* _ASM_X86_KMAP_TYPES_H */ --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -41,7 +41,6 @@ #ifndef __ASSEMBLY__ #include -#include #include #include --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -4,65 +4,6 @@ #include /* for totalram_pages */ #include -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - BUG_ON(!pte_none(*(kmap_pte-idx))); - set_pte(kmap_pte-idx, mk_pte(page, prot)); - arch_flush_lazy_mmu_mode(); - - return
[patch V3 07/37] highmem: Make DEBUG_HIGHMEM functional
For some obscure reason when CONFIG_DEBUG_HIGHMEM is enabled the stack depth is increased from 20 to 41. But the only thing DEBUG_HIGHMEM does is to enable a few BUG_ON()'s in the mapping code. That's a leftover from the historical mapping code which had fixed entries for various purposes. DEBUG_HIGHMEM inserted guard mappings between the map types. But that got all ditched when kmap_atomic() switched to a stack based map management. Though the WITH_KM_FENCE magic survived without being functional. All the thing does today is to increase the stack depth. Add a working implementation to the generic kmap_local* implementation. Signed-off-by: Thomas Gleixner --- V3: New patch --- mm/highmem.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) --- a/mm/highmem.c +++ b/mm/highmem.c @@ -374,9 +374,19 @@ EXPORT_SYMBOL(kunmap_high); static DEFINE_PER_CPU(int, __kmap_local_idx); +/* + * With DEBUG_HIGHMEM the stack depth is doubled and every second + * slot is unused which acts as a guard page + */ +#ifdef CONFIG_DEBUG_HIGHMEM +# define KM_INCR 2 +#else +# define KM_INCR 1 +#endif + static inline int kmap_local_idx_push(void) { - int idx = __this_cpu_inc_return(__kmap_local_idx) - 1; + int idx = __this_cpu_add_return(__kmap_local_idx, KM_INCR) - 1; WARN_ON_ONCE(in_irq() && !irqs_disabled()); BUG_ON(idx >= KM_MAX_IDX); @@ -390,7 +400,7 @@ static inline int kmap_local_idx(void) static inline void kmap_local_idx_pop(void) { - int idx = __this_cpu_dec_return(__kmap_local_idx); + int idx = __this_cpu_sub_return(__kmap_local_idx, KM_INCR); BUG_ON(idx < 0); } ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[patch V3 10/37] ARM: highmem: Switch to generic kmap atomic
No reason having the same code in every architecture. Signed-off-by: Thomas Gleixner Cc: Russell King Cc: Arnd Bergmann Cc: linux-arm-ker...@lists.infradead.org --- V3: Remove the kmap types cruft --- arch/arm/Kconfig |1 arch/arm/include/asm/fixmap.h |4 - arch/arm/include/asm/highmem.h| 33 +++--- arch/arm/include/asm/kmap_types.h | 10 --- arch/arm/mm/Makefile |1 arch/arm/mm/highmem.c | 121 -- 6 files changed, 26 insertions(+), 144 deletions(-) --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1498,6 +1498,7 @@ config HAVE_ARCH_PFN_VALID config HIGHMEM bool "High Memory Support" depends on MMU + select KMAP_LOCAL help The address space of ARM processors is only 4 Gigabytes large and it has to accommodate user address space, kernel address --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -7,14 +7,14 @@ #define FIXADDR_TOP(FIXADDR_END - PAGE_SIZE) #include -#include +#include enum fixed_addresses { FIX_EARLYCON_MEM_BASE, __end_of_permanent_fixed_addresses, FIX_KMAP_BEGIN = __end_of_permanent_fixed_addresses, - FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, /* Support writing RO kernel text via kprobes, jump labels, etc. */ FIX_TEXT_POKE0, --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -2,7 +2,7 @@ #ifndef _ASM_HIGHMEM_H #define _ASM_HIGHMEM_H -#include +#include #define PKMAP_BASE (PAGE_OFFSET - PMD_SIZE) #define LAST_PKMAP PTRS_PER_PTE @@ -46,19 +46,32 @@ extern pte_t *pkmap_page_table; #ifdef ARCH_NEEDS_KMAP_HIGH_GET extern void *kmap_high_get(struct page *page); -#else + +static inline void *arch_kmap_local_high_get(struct page *page) +{ + if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !cache_is_vivt()) + return NULL; + return kmap_high_get(page); +} +#define arch_kmap_local_high_get arch_kmap_local_high_get + +#else /* ARCH_NEEDS_KMAP_HIGH_GET */ static inline void *kmap_high_get(struct page *page) { return NULL; } -#endif +#endif /* !ARCH_NEEDS_KMAP_HIGH_GET */ -/* - * The following functions are already defined by - * when CONFIG_HIGHMEM is not set. - */ -#ifdef CONFIG_HIGHMEM -extern void *kmap_atomic_pfn(unsigned long pfn); -#endif +#define arch_kmap_local_post_map(vaddr, pteval) \ + local_flush_tlb_kernel_page(vaddr) + +#define arch_kmap_local_pre_unmap(vaddr) \ +do { \ + if (cache_is_vivt())\ + __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); \ +} while (0) + +#define arch_kmap_local_post_unmap(vaddr) \ + local_flush_tlb_kernel_page(vaddr) #endif --- a/arch/arm/include/asm/kmap_types.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ARM_KMAP_TYPES_H -#define __ARM_KMAP_TYPES_H - -/* - * This is the "bare minimum". AIO seems to require this. - */ -#define KM_TYPE_NR 16 - -#endif --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_DEBUG_VIRTUAL)+= physaddr.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o -obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM_PV_FIXUP) += pv-fixup-asm.o --- a/arch/arm/mm/highmem.c +++ /dev/null @@ -1,121 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * arch/arm/mm/highmem.c -- ARM highmem support - * - * Author: Nicolas Pitre - * Created:september 8, 2008 - * Copyright: Marvell Semiconductors Inc. - */ - -#include -#include -#include -#include -#include -#include -#include "mm.h" - -static inline void set_fixmap_pte(int idx, pte_t pte) -{ - unsigned long vaddr = __fix_to_virt(idx); - pte_t *ptep = virt_to_kpte(vaddr); - - set_pte_ext(ptep, pte, 0); - local_flush_tlb_kernel_page(vaddr); -} - -static inline pte_t get_fixmap_pte(unsigned long vaddr) -{ - pte_t *ptep = virt_to_kpte(vaddr); - - return *ptep; -} - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned int idx; - unsigned long vaddr; - void *kmap; - int type; - -#ifdef CONFIG_DEBUG_HIGHMEM - /* -* There is no cache coherency issue when non VIVT, so force the -* dedicated kmap usage for better debugging purposes in that case. -*/ - if (!cache_is_vivt()) - kmap = NULL; - else -#endif - kmap = kmap_high_get(page); - if (kmap) - return kmap; - - type =
[patch V3 06/37] highmem: Provide generic variant of kmap_atomic*
The kmap_atomic* interfaces in all architectures are pretty much the same except for post map operations (flush) and pre- and post unmap operations. Provide a generic variant for that. Signed-off-by: Thomas Gleixner Cc: Andrew Morton Cc: linux...@kvack.org --- V3: Do not reuse the kmap_atomic_idx pile and use kmap_size.h right away V2: Address review comments from Christoph (style and EXPORT variant) --- include/linux/highmem.h | 82 ++- mm/Kconfig |3 + mm/highmem.c| 144 +++- 3 files changed, 211 insertions(+), 18 deletions(-) --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -31,9 +31,16 @@ static inline void invalidate_kernel_vma #include +/* + * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. + */ +#ifdef CONFIG_KMAP_LOCAL +void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); +void *__kmap_local_page_prot(struct page *page, pgprot_t prot); +void kunmap_local_indexed(void *vaddr); +#endif + #ifdef CONFIG_HIGHMEM -extern void *kmap_atomic_high_prot(struct page *page, pgprot_t prot); -extern void kunmap_atomic_high(void *kvaddr); #include #ifndef ARCH_HAS_KMAP_FLUSH_TLB @@ -81,6 +88,11 @@ static inline void kunmap(struct page *p * be used in IRQ contexts, so in some (very limited) cases we need * it. */ + +#ifndef CONFIG_KMAP_LOCAL +void *kmap_atomic_high_prot(struct page *page, pgprot_t prot); +void kunmap_atomic_high(void *kvaddr); + static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) { preempt_disable(); @@ -89,7 +101,38 @@ static inline void *kmap_atomic_prot(str return page_address(page); return kmap_atomic_high_prot(page, prot); } -#define kmap_atomic(page) kmap_atomic_prot(page, kmap_prot) + +static inline void __kunmap_atomic(void *vaddr) +{ + kunmap_atomic_high(vaddr); +} +#else /* !CONFIG_KMAP_LOCAL */ + +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_page_prot(page, prot); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_pfn_prot(pfn, kmap_prot); +} + +static inline void __kunmap_atomic(void *addr) +{ + kunmap_local_indexed(addr); +} + +#endif /* CONFIG_KMAP_LOCAL */ + +static inline void *kmap_atomic(struct page *page) +{ + return kmap_atomic_prot(page, kmap_prot); +} /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); @@ -147,25 +190,33 @@ static inline void *kmap_atomic(struct p pagefault_disable(); return page_address(page); } -#define kmap_atomic_prot(page, prot) kmap_atomic(page) -static inline void kunmap_atomic_high(void *addr) +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + return kmap_atomic(page); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + return kmap_atomic(pfn_to_page(pfn)); +} + +static inline void __kunmap_atomic(void *addr) { /* * Mostly nothing to do in the CONFIG_HIGHMEM=n case as kunmap_atomic() -* handles re-enabling faults + preemption +* handles re-enabling faults and preemption */ #ifdef ARCH_HAS_FLUSH_ON_KUNMAP kunmap_flush_on_unmap(addr); #endif } -#define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) - #define kmap_flush_unused()do {} while(0) #endif /* CONFIG_HIGHMEM */ +#if !defined(CONFIG_KMAP_LOCAL) #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) DECLARE_PER_CPU(int, __kmap_atomic_idx); @@ -196,22 +247,21 @@ static inline void kmap_atomic_idx_pop(v __this_cpu_dec(__kmap_atomic_idx); #endif } - +#endif #endif /* * Prevent people trying to call kunmap_atomic() as if it were kunmap() * kunmap_atomic() should get the return value of kmap_atomic, not the page. */ -#define kunmap_atomic(addr) \ -do {\ - BUILD_BUG_ON(__same_type((addr), struct page *)); \ - kunmap_atomic_high(addr); \ - pagefault_enable(); \ - preempt_enable(); \ +#define kunmap_atomic(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_atomic(__addr);\ + pagefault_enable(); \ + preempt_enable(); \ } while (0) - /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage static inline void
[patch V3 02/37] highmem: Remove unused functions
Nothing uses totalhigh_pages_dec() and totalhigh_pages_set(). Signed-off-by: Thomas Gleixner --- V3: New patch --- include/linux/highmem.h | 10 -- 1 file changed, 10 deletions(-) --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -104,21 +104,11 @@ static inline void totalhigh_pages_inc(v atomic_long_inc(&_totalhigh_pages); } -static inline void totalhigh_pages_dec(void) -{ - atomic_long_dec(&_totalhigh_pages); -} - static inline void totalhigh_pages_add(long count) { atomic_long_add(count, &_totalhigh_pages); } -static inline void totalhigh_pages_set(long val) -{ - atomic_long_set(&_totalhigh_pages, val); -} - void kmap_flush_unused(void); struct page *kmap_to_page(void *addr); ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 04/17] vhost: prep vhost_dev_init users to handle failures
Hi Mike, url: https://github.com/0day-ci/linux/commits/Mike-Christie/vhost-fix-scsi-cmd-handling-and-cgroup-support/20201022-083844 base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next config: i386-randconfig-m021-20201101 (attached as .config) compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot Reported-by: Dan Carpenter smatch warnings: drivers/vhost/vsock.c:648 vhost_vsock_dev_open() error: uninitialized symbol 'ret'. vim +/ret +648 drivers/vhost/vsock.c 433fc58e6bf2c8b Asias He2016-07-28 605 static int vhost_vsock_dev_open(struct inode *inode, struct file *file) 433fc58e6bf2c8b Asias He2016-07-28 606 { 433fc58e6bf2c8b Asias He2016-07-28 607 struct vhost_virtqueue **vqs; 433fc58e6bf2c8b Asias He2016-07-28 608 struct vhost_vsock *vsock; 433fc58e6bf2c8b Asias He2016-07-28 609 int ret; 433fc58e6bf2c8b Asias He2016-07-28 610 433fc58e6bf2c8b Asias He2016-07-28 611 /* This struct is large and allocation could fail, fall back to vmalloc 433fc58e6bf2c8b Asias He2016-07-28 612 * if there is no other way. 433fc58e6bf2c8b Asias He2016-07-28 613 */ dcda9b04713c3f6 Michal Hocko2017-07-12 614 vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 433fc58e6bf2c8b Asias He2016-07-28 615 if (!vsock) 433fc58e6bf2c8b Asias He2016-07-28 616 return -ENOMEM; 433fc58e6bf2c8b Asias He2016-07-28 617 433fc58e6bf2c8b Asias He2016-07-28 618 vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); 433fc58e6bf2c8b Asias He2016-07-28 619 if (!vqs) { 433fc58e6bf2c8b Asias He2016-07-28 620 ret = -ENOMEM; 433fc58e6bf2c8b Asias He2016-07-28 621 goto out; 433fc58e6bf2c8b Asias He2016-07-28 622 } 433fc58e6bf2c8b Asias He2016-07-28 623 a72b69dc083a931 Stefan Hajnoczi 2017-11-09 624 vsock->guest_cid = 0; /* no CID assigned yet */ a72b69dc083a931 Stefan Hajnoczi 2017-11-09 625 433fc58e6bf2c8b Asias He2016-07-28 626 atomic_set(>queued_replies, 0); 433fc58e6bf2c8b Asias He2016-07-28 627 433fc58e6bf2c8b Asias He2016-07-28 628 vqs[VSOCK_VQ_TX] = >vqs[VSOCK_VQ_TX]; 433fc58e6bf2c8b Asias He2016-07-28 629 vqs[VSOCK_VQ_RX] = >vqs[VSOCK_VQ_RX]; 433fc58e6bf2c8b Asias He2016-07-28 630 vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; 433fc58e6bf2c8b Asias He2016-07-28 631 vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; 433fc58e6bf2c8b Asias He2016-07-28 632 6e1629548d318c2 Mike Christie 2020-10-21 633 if (vhost_dev_init(>dev, vqs, ARRAY_SIZE(vsock->vqs), e82b9b0727ff6d6 Jason Wang 2019-05-17 634 UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT, 6e1629548d318c2 Mike Christie 2020-10-21 635 VHOST_VSOCK_WEIGHT, true, NULL)) 6e1629548d318c2 Mike Christie 2020-10-21 636 goto err_dev_init; ^ "ret" needs to be set here. 433fc58e6bf2c8b Asias He2016-07-28 637 433fc58e6bf2c8b Asias He2016-07-28 638 file->private_data = vsock; 433fc58e6bf2c8b Asias He2016-07-28 639 spin_lock_init(>send_pkt_list_lock); 433fc58e6bf2c8b Asias He2016-07-28 640 INIT_LIST_HEAD(>send_pkt_list); 433fc58e6bf2c8b Asias He2016-07-28 641 vhost_work_init(>send_pkt_work, vhost_transport_send_pkt_work); 433fc58e6bf2c8b Asias He2016-07-28 642 return 0; 433fc58e6bf2c8b Asias He2016-07-28 643 6e1629548d318c2 Mike Christie 2020-10-21 644 err_dev_init: 6e1629548d318c2 Mike Christie 2020-10-21 645 kfree(vqs); 433fc58e6bf2c8b Asias He2016-07-28 646 out: 433fc58e6bf2c8b Asias He2016-07-28 647 vhost_vsock_free(vsock); 433fc58e6bf2c8b Asias He2016-07-28 @648 return ret; 433fc58e6bf2c8b Asias He2016-07-28 649 } --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org .config.gz Description: application/gzip ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v7 09/10] dma-buf-map: Add memcpy and pointer-increment interfaces
To do framebuffer updates, one needs memcpy from system memory and a pointer-increment function. Add both interfaces with documentation. v5: * include to build on sparc64 (Sam) Signed-off-by: Thomas Zimmermann Reviewed-by: Sam Ravnborg Tested-by: Sam Ravnborg --- include/linux/dma-buf-map.h | 73 - 1 file changed, 63 insertions(+), 10 deletions(-) diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index 2e8bbecb5091..583a3a1f9447 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -7,6 +7,7 @@ #define __DMA_BUF_MAP_H__ #include +#include /** * DOC: overview @@ -32,6 +33,14 @@ * accessing the buffer. Use the returned instance and the helper functions * to access the buffer's memory in the correct way. * + * The type :c:type:`struct dma_buf_map ` and its helpers are + * actually independent from the dma-buf infrastructure. When sharing buffers + * among devices, drivers have to know the location of the memory to access + * the buffers in a safe way. :c:type:`struct dma_buf_map ` + * solves this problem for dma-buf and its users. If other drivers or + * sub-systems require similar functionality, the type could be generalized + * and moved to a more prominent header file. + * * Open-coding access to :c:type:`struct dma_buf_map ` is * considered bad style. Rather then accessing its fields directly, use one * of the provided helper functions, or implement your own. For example, @@ -51,6 +60,14 @@ * * dma_buf_map_set_vaddr_iomem( 0xdeadbeaf); * + * Instances of struct dma_buf_map do not have to be cleaned up, but + * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings + * always refer to system memory. + * + * .. code-block:: c + * + * dma_buf_map_clear(); + * * Test if a mapping is valid with either dma_buf_map_is_set() or * dma_buf_map_is_null(). * @@ -73,17 +90,19 @@ * if (dma_buf_map_is_equal(_map, _map)) * // always false * - * Instances of struct dma_buf_map do not have to be cleaned up, but - * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings - * always refer to system memory. + * A set up instance of struct dma_buf_map can be used to access or manipulate + * the buffer memory. Depending on the location of the memory, the provided + * helpers will pick the correct operations. Data can be copied into the memory + * with dma_buf_map_memcpy_to(). The address can be manipulated with + * dma_buf_map_incr(). * - * The type :c:type:`struct dma_buf_map ` and its helpers are - * actually independent from the dma-buf infrastructure. When sharing buffers - * among devices, drivers have to know the location of the memory to access - * the buffers in a safe way. :c:type:`struct dma_buf_map ` - * solves this problem for dma-buf and its users. If other drivers or - * sub-systems require similar functionality, the type could be generalized - * and moved to a more prominent header file. + * .. code-block:: c + * + * const void *src = ...; // source buffer + * size_t len = ...; // length of src + * + * dma_buf_map_memcpy_to(, src, len); + * dma_buf_map_incr(, len); // go to first byte after the memcpy */ /** @@ -210,4 +229,38 @@ static inline void dma_buf_map_clear(struct dma_buf_map *map) } } +/** + * dma_buf_map_memcpy_to - Memcpy into dma-buf mapping + * @dst: The dma-buf mapping structure + * @src: The source buffer + * @len: The number of byte in src + * + * Copies data into a dma-buf mapping. The source buffer is in system + * memory. Depending on the buffer's location, the helper picks the correct + * method of accessing the memory. + */ +static inline void dma_buf_map_memcpy_to(struct dma_buf_map *dst, const void *src, size_t len) +{ + if (dst->is_iomem) + memcpy_toio(dst->vaddr_iomem, src, len); + else + memcpy(dst->vaddr, src, len); +} + +/** + * dma_buf_map_incr - Increments the address stored in a dma-buf mapping + * @map: The dma-buf mapping structure + * @incr: The number of bytes to increment + * + * Increments the address stored in a dma-buf mapping. Depending on the + * buffer's location, the correct value will be updated. + */ +static inline void dma_buf_map_incr(struct dma_buf_map *map, size_t incr) +{ + if (map->is_iomem) + map->vaddr_iomem += incr; + else + map->vaddr += incr; +} + #endif /* __DMA_BUF_MAP_H__ */ -- 2.29.0 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v7 10/10] drm/fb_helper: Support framebuffers in I/O memory
At least sparc64 requires I/O-specific access to framebuffers. This patch updates the fbdev console accordingly. For drivers with direct access to the framebuffer memory, the callback functions in struct fb_ops test for the type of memory and call the rsp fb_sys_ of fb_cfb_ functions. Read and write operations are implemented internally by DRM's fbdev helper. For drivers that employ a shadow buffer, fbdev's blit function retrieves the framebuffer address as struct dma_buf_map, and uses dma_buf_map interfaces to access the buffer. The bochs driver on sparc64 uses a workaround to flag the framebuffer as I/O memory and avoid a HW exception. With the introduction of struct dma_buf_map, this is not required any longer. The patch removes the rsp code from both, bochs and fbdev. v7: * use min_t(size_t,) (kernel test robot) * return the number of bytes read/written, if any (fbdev testcase) v5: * implement fb_read/fb_write internally (Daniel, Sam) v4: * move dma_buf_map changes into separate patch (Daniel) * TODO list: comment on fbdev updates (Daniel) Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Reviewed-by: Sam Ravnborg Tested-by: Sam Ravnborg --- Documentation/gpu/todo.rst| 19 ++- drivers/gpu/drm/bochs/bochs_kms.c | 1 - drivers/gpu/drm/drm_fb_helper.c | 220 -- include/drm/drm_mode_config.h | 12 -- 4 files changed, 223 insertions(+), 29 deletions(-) diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 59f63f1d7680..acca232b025b 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -201,13 +201,28 @@ Convert drivers to use drm_fbdev_generic_setup() Most drivers can use drm_fbdev_generic_setup(). Driver have to implement -atomic modesetting and GEM vmap support. Current generic fbdev emulation -expects the framebuffer in system memory (or system-like memory). +atomic modesetting and GEM vmap support. Historically, generic fbdev emulation +expected the framebuffer in system memory or system-like memory. By employing +struct dma_buf_map, drivers with frambuffers in I/O memory can be supported +as well. Contact: Maintainer of the driver you plan to convert Level: Intermediate +Reimplement functions in drm_fbdev_fb_ops without fbdev +--- + +A number of callback functions in drm_fbdev_fb_ops could benefit from +being rewritten without dependencies on the fbdev module. Some of the +helpers could further benefit from using struct dma_buf_map instead of +raw pointers. + +Contact: Thomas Zimmermann , Daniel Vetter + +Level: Advanced + + drm_framebuffer_funcs and drm_mode_config_funcs.fb_create cleanup - diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c index 13d0d04c4457..853081d186d5 100644 --- a/drivers/gpu/drm/bochs/bochs_kms.c +++ b/drivers/gpu/drm/bochs/bochs_kms.c @@ -151,7 +151,6 @@ int bochs_kms_init(struct bochs_device *bochs) bochs->dev->mode_config.preferred_depth = 24; bochs->dev->mode_config.prefer_shadow = 0; bochs->dev->mode_config.prefer_shadow_fbdev = 1; - bochs->dev->mode_config.fbdev_use_iomem = true; bochs->dev->mode_config.quirk_addfb_prefer_host_byte_order = true; bochs->dev->mode_config.funcs = _mode_funcs; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index a0d88130fedb..01ba1da28511 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -372,24 +372,22 @@ static void drm_fb_helper_resume_worker(struct work_struct *work) } static void drm_fb_helper_dirty_blit_real(struct drm_fb_helper *fb_helper, - struct drm_clip_rect *clip) + struct drm_clip_rect *clip, + struct dma_buf_map *dst) { struct drm_framebuffer *fb = fb_helper->fb; unsigned int cpp = fb->format->cpp[0]; size_t offset = clip->y1 * fb->pitches[0] + clip->x1 * cpp; void *src = fb_helper->fbdev->screen_buffer + offset; - void *dst = fb_helper->buffer->map.vaddr + offset; size_t len = (clip->x2 - clip->x1) * cpp; unsigned int y; - for (y = clip->y1; y < clip->y2; y++) { - if (!fb_helper->dev->mode_config.fbdev_use_iomem) - memcpy(dst, src, len); - else - memcpy_toio((void __iomem *)dst, src, len); + dma_buf_map_incr(dst, offset); /* go to first pixel within clip rect */ + for (y = clip->y1; y < clip->y2; y++) { + dma_buf_map_memcpy_to(dst, src, len); + dma_buf_map_incr(dst, fb->pitches[0]); src += fb->pitches[0]; - dst += fb->pitches[0];
[PATCH v7 01/10] drm/vram-helper: Remove invariant parameters from internal kmap function
The parameters map and is_iomem are always of the same value. Removed them to prepares the function for conversion to struct dma_buf_map. v4: * don't check for !kmap->virtual; will always be false Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Reviewed-by: Christian König Tested-by: Sam Ravnborg --- drivers/gpu/drm/drm_gem_vram_helper.c | 18 -- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c index 16d68c04ea5d..e305fadb8bc8 100644 --- a/drivers/gpu/drm/drm_gem_vram_helper.c +++ b/drivers/gpu/drm/drm_gem_vram_helper.c @@ -378,32 +378,22 @@ int drm_gem_vram_unpin(struct drm_gem_vram_object *gbo) } EXPORT_SYMBOL(drm_gem_vram_unpin); -static void *drm_gem_vram_kmap_locked(struct drm_gem_vram_object *gbo, - bool map, bool *is_iomem) +static void *drm_gem_vram_kmap_locked(struct drm_gem_vram_object *gbo) { int ret; struct ttm_bo_kmap_obj *kmap = >kmap; + bool is_iomem; if (gbo->kmap_use_count > 0) goto out; - if (kmap->virtual || !map) - goto out; - ret = ttm_bo_kmap(>bo, 0, gbo->bo.num_pages, kmap); if (ret) return ERR_PTR(ret); out: - if (!kmap->virtual) { - if (is_iomem) - *is_iomem = false; - return NULL; /* not mapped; don't increment ref */ - } ++gbo->kmap_use_count; - if (is_iomem) - return ttm_kmap_obj_virtual(kmap, is_iomem); - return kmap->virtual; + return ttm_kmap_obj_virtual(kmap, _iomem); } static void drm_gem_vram_kunmap_locked(struct drm_gem_vram_object *gbo) @@ -448,7 +438,7 @@ void *drm_gem_vram_vmap(struct drm_gem_vram_object *gbo) ret = drm_gem_vram_pin_locked(gbo, 0); if (ret) goto err_ttm_bo_unreserve; - base = drm_gem_vram_kmap_locked(gbo, true, NULL); + base = drm_gem_vram_kmap_locked(gbo); if (IS_ERR(base)) { ret = PTR_ERR(base); goto err_drm_gem_vram_unpin_locked; -- 2.29.0 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v7 05/10] drm/ttm: Add vmap/vunmap to TTM and TTM GEM helpers
The new functions ttm_bo_{vmap,vunmap}() map and unmap a TTM BO in kernel address space. The mapping's address is returned as struct dma_buf_map. Each function is a simplified version of TTM's existing kmap code. Both functions respect the memory's location ani/or writecombine flags. On top TTM's functions, GEM TTM helpers got drm_gem_ttm_{vmap,vunmap}(), two helpers that convert a GEM object into the TTM BO and forward the call to TTM's vmap/vunmap. These helpers can be dropped into the rsp GEM object callbacks. v5: * use size_t for storing mapping size (Christian) * ignore premapped memory areas correctly in ttm_bo_vunmap() * rebase onto latest TTM interfaces (Christian) * remove BUG() from ttm_bo_vmap() (Christian) v4: * drop ttm_kmap_obj_to_dma_buf() in favor of vmap helpers (Daniel, Christian) Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Acked-by: Daniel Vetter Tested-by: Sam Ravnborg --- drivers/gpu/drm/drm_gem_ttm_helper.c | 38 +++ drivers/gpu/drm/ttm/ttm_bo_util.c| 72 include/drm/drm_gem_ttm_helper.h | 6 +++ include/drm/ttm/ttm_bo_api.h | 28 +++ include/linux/dma-buf-map.h | 20 5 files changed, 164 insertions(+) diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c index 0e4fb9ba43ad..db4c14d78a30 100644 --- a/drivers/gpu/drm/drm_gem_ttm_helper.c +++ b/drivers/gpu/drm/drm_gem_ttm_helper.c @@ -49,6 +49,44 @@ void drm_gem_ttm_print_info(struct drm_printer *p, unsigned int indent, } EXPORT_SYMBOL(drm_gem_ttm_print_info); +/** + * drm_gem_ttm_vmap() - vmap _buffer_object + * @gem: GEM object. + * @map: [out] returns the dma-buf mapping. + * + * Maps a GEM object with ttm_bo_vmap(). This function can be used as + * _gem_object_funcs.vmap callback. + * + * Returns: + * 0 on success, or a negative errno code otherwise. + */ +int drm_gem_ttm_vmap(struct drm_gem_object *gem, +struct dma_buf_map *map) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + + return ttm_bo_vmap(bo, map); + +} +EXPORT_SYMBOL(drm_gem_ttm_vmap); + +/** + * drm_gem_ttm_vunmap() - vunmap _buffer_object + * @gem: GEM object. + * @map: dma-buf mapping. + * + * Unmaps a GEM object with ttm_bo_vunmap(). This function can be used as + * _gem_object_funcs.vmap callback. + */ +void drm_gem_ttm_vunmap(struct drm_gem_object *gem, + struct dma_buf_map *map) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + + ttm_bo_vunmap(bo, map); +} +EXPORT_SYMBOL(drm_gem_ttm_vunmap); + /** * drm_gem_ttm_mmap() - mmap _buffer_object * @gem: GEM object. diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index ecb54415d1ca..7ccb2295cac1 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -471,6 +472,77 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map) } EXPORT_SYMBOL(ttm_bo_kunmap); +int ttm_bo_vmap(struct ttm_buffer_object *bo, struct dma_buf_map *map) +{ + struct ttm_resource *mem = >mem; + int ret; + + ret = ttm_mem_io_reserve(bo->bdev, mem); + if (ret) + return ret; + + if (mem->bus.is_iomem) { + void __iomem *vaddr_iomem; + size_t size = bo->num_pages << PAGE_SHIFT; + + if (mem->bus.addr) + vaddr_iomem = (void __iomem *)mem->bus.addr; + else if (mem->bus.caching == ttm_write_combined) + vaddr_iomem = ioremap_wc(mem->bus.offset, size); + else + vaddr_iomem = ioremap(mem->bus.offset, size); + + if (!vaddr_iomem) + return -ENOMEM; + + dma_buf_map_set_vaddr_iomem(map, vaddr_iomem); + + } else { + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false + }; + struct ttm_tt *ttm = bo->ttm; + pgprot_t prot; + void *vaddr; + + ret = ttm_tt_populate(bo->bdev, ttm, ); + if (ret) + return ret; + + /* +* We need to use vmap to get the desired page protection +* or to make the buffer object look contiguous. +*/ + prot = ttm_io_prot(bo, mem, PAGE_KERNEL); + vaddr = vmap(ttm->pages, bo->num_pages, 0, prot); + if (!vaddr) + return -ENOMEM; + + dma_buf_map_set_vaddr(map, vaddr); + } + + return 0; +} +EXPORT_SYMBOL(ttm_bo_vmap); + +void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct dma_buf_map *map) +{ + struct ttm_resource *mem =
[PATCH v7 06/10] drm/gem: Use struct dma_buf_map in GEM vmap ops and convert GEM backends
This patch replaces the vmap/vunmap's use of raw pointers in GEM object functions with instances of struct dma_buf_map. GEM backends are converted as well. For most of them, this simply changes the returned type. TTM-based drivers now return information about the location of the memory, either system or I/O memory. GEM VRAM helpers and qxl now use ttm_bo_vmap() et al. Amdgpu, nouveau and radeon use drm_gem_ttm_vmap() et al instead of implementing their own vmap callbacks. v7: * init QXL cursor to mapped BO buffer (kernel test robot) v5: * update vkms after switch to shmem v4: * use ttm_bo_vmap(), drm_gem_ttm_vmap(), et al. (Daniel, Christian) * fix a trailing { in drm_gem_vmap() * remove several empty functions instead of converting them (Daniel) * comment uses of raw pointers with a TODO (Daniel) * TODO list: convert more helpers to use struct dma_buf_map Signed-off-by: Thomas Zimmermann Acked-by: Christian König Tested-by: Sam Ravnborg --- Documentation/gpu/todo.rst | 18 drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 36 --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 - drivers/gpu/drm/ast/ast_cursor.c| 27 +++-- drivers/gpu/drm/ast/ast_drv.h | 7 +- drivers/gpu/drm/drm_gem.c | 23 +++-- drivers/gpu/drm/drm_gem_cma_helper.c| 10 +- drivers/gpu/drm/drm_gem_shmem_helper.c | 48 + drivers/gpu/drm/drm_gem_vram_helper.c | 107 ++-- drivers/gpu/drm/etnaviv/etnaviv_drv.h | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 9 +- drivers/gpu/drm/lima/lima_gem.c | 6 +- drivers/gpu/drm/lima/lima_sched.c | 11 +- drivers/gpu/drm/mgag200/mgag200_mode.c | 10 +- drivers/gpu/drm/nouveau/Kconfig | 1 + drivers/gpu/drm/nouveau/nouveau_bo.h| 2 - drivers/gpu/drm/nouveau/nouveau_gem.c | 6 +- drivers/gpu/drm/nouveau/nouveau_gem.h | 2 - drivers/gpu/drm/nouveau/nouveau_prime.c | 20 drivers/gpu/drm/panfrost/panfrost_perfcnt.c | 14 +-- drivers/gpu/drm/qxl/qxl_display.c | 15 ++- drivers/gpu/drm/qxl/qxl_draw.c | 14 ++- drivers/gpu/drm/qxl/qxl_drv.h | 11 +- drivers/gpu/drm/qxl/qxl_object.c| 31 +++--- drivers/gpu/drm/qxl/qxl_object.h| 2 +- drivers/gpu/drm/qxl/qxl_prime.c | 12 +-- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_gem.c | 7 +- drivers/gpu/drm/radeon/radeon_prime.c | 20 drivers/gpu/drm/rockchip/rockchip_drm_gem.c | 22 ++-- drivers/gpu/drm/rockchip/rockchip_drm_gem.h | 4 +- drivers/gpu/drm/tiny/cirrus.c | 10 +- drivers/gpu/drm/tiny/gm12u320.c | 10 +- drivers/gpu/drm/udl/udl_modeset.c | 8 +- drivers/gpu/drm/vboxvideo/vbox_mode.c | 11 +- drivers/gpu/drm/vc4/vc4_bo.c| 6 +- drivers/gpu/drm/vc4/vc4_drv.h | 2 +- drivers/gpu/drm/vgem/vgem_drv.c | 16 ++- drivers/gpu/drm/vkms/vkms_plane.c | 15 ++- drivers/gpu/drm/vkms/vkms_writeback.c | 22 ++-- drivers/gpu/drm/xen/xen_drm_front_gem.c | 18 ++-- drivers/gpu/drm/xen/xen_drm_front_gem.h | 6 +- include/drm/drm_gem.h | 5 +- include/drm/drm_gem_cma_helper.h| 2 +- include/drm/drm_gem_shmem_helper.h | 4 +- include/drm/drm_gem_vram_helper.h | 14 +-- 49 files changed, 349 insertions(+), 308 deletions(-) diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 6b224ef14455..59f63f1d7680 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -450,6 +450,24 @@ Contact: Ville Syrjälä, Daniel Vetter Level: Intermediate +Use struct dma_buf_map throughout codebase +-- + +Pointers to shared device memory are stored in struct dma_buf_map. Each +instance knows whether it refers to system or I/O memory. Most of the DRM-wide +interface have been converted to use struct dma_buf_map, but implementations +often still use raw pointers. + +The task is to use struct dma_buf_map where it makes sense. + +* Memory managers should use struct dma_buf_map for dma-buf-imported buffers. +* TTM might benefit from using struct dma_buf_map internally. +* Framebuffer copying and blitting helpers should operate on struct dma_buf_map. + +Contact: Thomas Zimmermann , Christian König, Daniel Vetter + +Level: Intermediate + Core refactorings = diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 64376dd298ed..f5c7aa7894d5 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -232,6 +232,7 @@ config
[PATCH v7 08/10] drm/gem: Store client buffer mappings as struct dma_buf_map
Kernel DRM clients now store their framebuffer address in an instance of struct dma_buf_map. Depending on the buffer's location, the address refers to system or I/O memory. Callers of drm_client_buffer_vmap() receive a copy of the value in the call's supplied arguments. It can be accessed and modified with dma_buf_map interfaces. v6: * don't call page_to_phys() on framebuffers in I/O memory; warn instead (Daniel) Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Tested-by: Sam Ravnborg --- drivers/gpu/drm/drm_client.c| 34 +++-- drivers/gpu/drm/drm_fb_helper.c | 32 --- include/drm/drm_client.h| 7 --- 3 files changed, 45 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index ac0082bed966..fe573acf1067 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -235,7 +235,7 @@ static void drm_client_buffer_delete(struct drm_client_buffer *buffer) { struct drm_device *dev = buffer->client->dev; - drm_gem_vunmap(buffer->gem, buffer->vaddr); + drm_gem_vunmap(buffer->gem, >map); if (buffer->gem) drm_gem_object_put(buffer->gem); @@ -291,25 +291,31 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u /** * drm_client_buffer_vmap - Map DRM client buffer into address space * @buffer: DRM client buffer + * @map_copy: Returns the mapped memory's address * * This function maps a client buffer into kernel address space. If the - * buffer is already mapped, it returns the mapping's address. + * buffer is already mapped, it returns the existing mapping's address. * * Client buffer mappings are not ref'counted. Each call to * drm_client_buffer_vmap() should be followed by a call to * drm_client_buffer_vunmap(); or the client buffer should be mapped * throughout its lifetime. * + * The returned address is a copy of the internal value. In contrast to + * other vmap interfaces, you don't need it for the client's vunmap + * function. So you can modify it at will during blit and draw operations. + * * Returns: - * The mapped memory's address + * 0 on success, or a negative errno code otherwise. */ -void *drm_client_buffer_vmap(struct drm_client_buffer *buffer) +int +drm_client_buffer_vmap(struct drm_client_buffer *buffer, struct dma_buf_map *map_copy) { - struct dma_buf_map map; + struct dma_buf_map *map = >map; int ret; - if (buffer->vaddr) - return buffer->vaddr; + if (dma_buf_map_is_set(map)) + goto out; /* * FIXME: The dependency on GEM here isn't required, we could @@ -319,13 +325,14 @@ void *drm_client_buffer_vmap(struct drm_client_buffer *buffer) * fd_install step out of the driver backend hooks, to make that * final step optional for internal users. */ - ret = drm_gem_vmap(buffer->gem, ); + ret = drm_gem_vmap(buffer->gem, map); if (ret) - return ERR_PTR(ret); + return ret; - buffer->vaddr = map.vaddr; +out: + *map_copy = *map; - return map.vaddr; + return 0; } EXPORT_SYMBOL(drm_client_buffer_vmap); @@ -339,10 +346,9 @@ EXPORT_SYMBOL(drm_client_buffer_vmap); */ void drm_client_buffer_vunmap(struct drm_client_buffer *buffer) { - struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(buffer->vaddr); + struct dma_buf_map *map = >map; - drm_gem_vunmap(buffer->gem, ); - buffer->vaddr = NULL; + drm_gem_vunmap(buffer->gem, map); } EXPORT_SYMBOL(drm_client_buffer_vunmap); diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 92e0db30fdf7..a0d88130fedb 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -378,7 +378,7 @@ static void drm_fb_helper_dirty_blit_real(struct drm_fb_helper *fb_helper, unsigned int cpp = fb->format->cpp[0]; size_t offset = clip->y1 * fb->pitches[0] + clip->x1 * cpp; void *src = fb_helper->fbdev->screen_buffer + offset; - void *dst = fb_helper->buffer->vaddr + offset; + void *dst = fb_helper->buffer->map.vaddr + offset; size_t len = (clip->x2 - clip->x1) * cpp; unsigned int y; @@ -400,7 +400,8 @@ static void drm_fb_helper_dirty_work(struct work_struct *work) struct drm_clip_rect *clip = >dirty_clip; struct drm_clip_rect clip_copy; unsigned long flags; - void *vaddr; + struct dma_buf_map map; + int ret; spin_lock_irqsave(>dirty_lock, flags); clip_copy = *clip; @@ -413,8 +414,8 @@ static void drm_fb_helper_dirty_work(struct work_struct *work) /* Generic fbdev uses a shadow buffer */ if (helper->buffer) { - vaddr = drm_client_buffer_vmap(helper->buffer); -
[PATCH v7 02/10] drm/cma-helper: Remove empty drm_gem_cma_prime_vunmap()
The function drm_gem_cma_prime_vunmap() is empty. Remove it before changing the interface to use struct drm_buf_map. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Tested-by: Sam Ravnborg --- drivers/gpu/drm/drm_gem_cma_helper.c | 17 - drivers/gpu/drm/vc4/vc4_bo.c | 1 - include/drm/drm_gem_cma_helper.h | 1 - 3 files changed, 19 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 2165633c9b9e..d527485ea0b7 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -537,23 +537,6 @@ void *drm_gem_cma_prime_vmap(struct drm_gem_object *obj) } EXPORT_SYMBOL_GPL(drm_gem_cma_prime_vmap); -/** - * drm_gem_cma_prime_vunmap - unmap a CMA GEM object from the kernel's virtual - * address space - * @obj: GEM object - * @vaddr: kernel virtual address where the CMA GEM object was mapped - * - * This function removes a buffer exported via DRM PRIME from the kernel's - * virtual address space. This is a no-op because CMA buffers cannot be - * unmapped from kernel space. Drivers using the CMA helpers should set this - * as their _gem_object_funcs.vunmap callback. - */ -void drm_gem_cma_prime_vunmap(struct drm_gem_object *obj, void *vaddr) -{ - /* Nothing to do */ -} -EXPORT_SYMBOL_GPL(drm_gem_cma_prime_vunmap); - static const struct drm_gem_object_funcs drm_gem_cma_default_funcs = { .free = drm_gem_cma_free_object, .print_info = drm_gem_cma_print_info, diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index f432278173cd..557f0d1e6437 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -387,7 +387,6 @@ static const struct drm_gem_object_funcs vc4_gem_object_funcs = { .export = vc4_prime_export, .get_sg_table = drm_gem_cma_prime_get_sg_table, .vmap = vc4_prime_vmap, - .vunmap = drm_gem_cma_prime_vunmap, .vm_ops = _vm_ops, }; diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index 2bfa2502607a..a064b0d1c480 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -104,7 +104,6 @@ drm_gem_cma_prime_import_sg_table(struct drm_device *dev, int drm_gem_cma_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); void *drm_gem_cma_prime_vmap(struct drm_gem_object *obj); -void drm_gem_cma_prime_vunmap(struct drm_gem_object *obj, void *vaddr); struct drm_gem_object * drm_gem_cma_create_object_default_funcs(struct drm_device *dev, size_t size); -- 2.29.0 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v7 03/10] drm/etnaviv: Remove empty etnaviv_gem_prime_vunmap()
The function etnaviv_gem_prime_vunmap() is empty. Remove it before changing the interface to use struct drm_buf_map. Signed-off-by: Thomas Zimmermann Acked-by: Christian König Tested-by: Sam Ravnborg --- drivers/gpu/drm/etnaviv/etnaviv_drv.h | 1 - drivers/gpu/drm/etnaviv/etnaviv_gem.c | 1 - drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 5 - 3 files changed, 7 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h index 914f0867ff71..9682c26d89bb 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h @@ -52,7 +52,6 @@ int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma); int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset); struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj); void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj); -void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int etnaviv_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 67d9a2b9ea6a..bbd235473645 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -571,7 +571,6 @@ static const struct drm_gem_object_funcs etnaviv_gem_object_funcs = { .unpin = etnaviv_gem_prime_unpin, .get_sg_table = etnaviv_gem_prime_get_sg_table, .vmap = etnaviv_gem_prime_vmap, - .vunmap = etnaviv_gem_prime_vunmap, .vm_ops = _ops, }; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index 135fbff6fecf..a6d9932a32ae 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -27,11 +27,6 @@ void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj) return etnaviv_gem_vmap(obj); } -void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) -{ - /* TODO msm_gem_vunmap() */ -} - int etnaviv_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { -- 2.29.0 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v7 04/10] drm/exynos: Remove empty exynos_drm_gem_prime_{vmap, vunmap}()
The functions exynos_drm_gem_prime_{vmap,vunmap}() are empty. Remove them before changing the interface to use struct drm_buf_map. As a side effect of removing drm_gem_prime_vmap(), the error code changes from ENOMEM to EOPNOTSUPP. Signed-off-by: Thomas Zimmermann Acked-by: Christian König Tested-by: Sam Ravnborg --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 12 drivers/gpu/drm/exynos/exynos_drm_gem.h | 2 -- 2 files changed, 14 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 4afbf5109cbf..4396224227d1 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -135,8 +135,6 @@ static const struct vm_operations_struct exynos_drm_gem_vm_ops = { static const struct drm_gem_object_funcs exynos_drm_gem_object_funcs = { .free = exynos_drm_gem_free_object, .get_sg_table = exynos_drm_gem_prime_get_sg_table, - .vmap = exynos_drm_gem_prime_vmap, - .vunmap = exynos_drm_gem_prime_vunmap, .vm_ops = _drm_gem_vm_ops, }; @@ -469,16 +467,6 @@ exynos_drm_gem_prime_import_sg_table(struct drm_device *dev, return _gem->base; } -void *exynos_drm_gem_prime_vmap(struct drm_gem_object *obj) -{ - return NULL; -} - -void exynos_drm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) -{ - /* Nothing to do */ -} - int exynos_drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index 74e926abeff0..a23272fb96fb 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -107,8 +107,6 @@ struct drm_gem_object * exynos_drm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); -void *exynos_drm_gem_prime_vmap(struct drm_gem_object *obj); -void exynos_drm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int exynos_drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); -- 2.29.0 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v7 00/10] Support GEM object mappings from I/O memory
DRM's fbdev console uses regular load and store operations to update framebuffer memory. The bochs driver on sparc64 requires the use of I/O-specific load and store operations. We have a workaround, but need a long-term solution to the problem. This patchset changes GEM's vmap/vunmap interfaces to forward pointers of type struct dma_buf_map and updates the generic fbdev emulation to use them correctly. This enables I/O-memory operations on all framebuffers that require and support them. Patches #1 to #4 prepare VRAM helpers and drivers. Next is the update of the GEM vmap functions. Patch #5 adds vmap and vunmap that is usable with TTM-based GEM drivers, and patch #6 updates GEM's vmap/vunmap callback to forward instances of type struct dma_buf_map. While the patch touches many files throughout the DRM modules, the applied changes are mostly trivial interface fixes. Several TTM-based GEM drivers now use the new vmap code. Patch #7 updates GEM's internal vmap/vunmap functions to forward struct dma_buf_map. With struct dma_buf_map propagated through the layers, patches #8 to #10 convert DRM clients and generic fbdev emulation to use it. Updating the fbdev framebuffer will select the correct functions, either for system or I/O memory. There is also a set of IGT testcases for fbdev at [1]. Reading and writting fbdev device files has several corner cases near the EOF that the tests cover as well. The original fbdev code has different semantics with the different implementations (sys, cfb). Patch #10 and the testcases intend to harmonize the behaviour and serve as a reference. v7: * return number of read/written bytes in fbdev code; if any * init QXL cursor from BO buffer (kernel test robot) * use min_t(size_t,) (kernel test robot) v6: * don't call page_to_phys() on fbdev framebuffers in I/O memory; warn instead (Daniel) v5: * rebase onto latest TTM changes (Christian) * support TTM premapped memory correctly (Christian) * implement fb_read/fb_write internally (Sam, Daniel) * cleanups v4: * provide TTM vmap/vunmap plus GEM helpers and convert drivers over (Christian, Daniel) * remove several empty functions * more TODOs and documentation (Daniel) v3: * recreate the whole patchset on top of struct dma_buf_map v2: * RFC patchset [1] https://gitlab.freedesktop.org/tzimmermann/igt-gpu-tools/-/merge_requests/1 Thomas Zimmermann (10): drm/vram-helper: Remove invariant parameters from internal kmap function drm/cma-helper: Remove empty drm_gem_cma_prime_vunmap() drm/etnaviv: Remove empty etnaviv_gem_prime_vunmap() drm/exynos: Remove empty exynos_drm_gem_prime_{vmap,vunmap}() drm/ttm: Add vmap/vunmap to TTM and TTM GEM helpers drm/gem: Use struct dma_buf_map in GEM vmap ops and convert GEM backends drm/gem: Update internal GEM vmap/vunmap interfaces to use struct dma_buf_map drm/gem: Store client buffer mappings as struct dma_buf_map dma-buf-map: Add memcpy and pointer-increment interfaces drm/fb_helper: Support framebuffers in I/O memory Documentation/gpu/todo.rst | 37 ++- drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 36 --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 - drivers/gpu/drm/ast/ast_cursor.c| 27 +-- drivers/gpu/drm/ast/ast_drv.h | 7 +- drivers/gpu/drm/bochs/bochs_kms.c | 1 - drivers/gpu/drm/drm_client.c| 38 +-- drivers/gpu/drm/drm_fb_helper.c | 250 ++-- drivers/gpu/drm/drm_gem.c | 29 ++- drivers/gpu/drm/drm_gem_cma_helper.c| 27 +-- drivers/gpu/drm/drm_gem_shmem_helper.c | 48 ++-- drivers/gpu/drm/drm_gem_ttm_helper.c| 38 +++ drivers/gpu/drm/drm_gem_vram_helper.c | 117 + drivers/gpu/drm/drm_internal.h | 5 +- drivers/gpu/drm/drm_prime.c | 14 +- drivers/gpu/drm/etnaviv/etnaviv_drv.h | 3 +- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 1 - drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 12 +- drivers/gpu/drm/exynos/exynos_drm_gem.c | 12 - drivers/gpu/drm/exynos/exynos_drm_gem.h | 2 - drivers/gpu/drm/lima/lima_gem.c | 6 +- drivers/gpu/drm/lima/lima_sched.c | 11 +- drivers/gpu/drm/mgag200/mgag200_mode.c | 10 +- drivers/gpu/drm/nouveau/Kconfig | 1 + drivers/gpu/drm/nouveau/nouveau_bo.h| 2 - drivers/gpu/drm/nouveau/nouveau_gem.c | 6 +- drivers/gpu/drm/nouveau/nouveau_gem.h | 2 - drivers/gpu/drm/nouveau/nouveau_prime.c | 20 -- drivers/gpu/drm/panfrost/panfrost_perfcnt.c | 14 +- drivers/gpu/drm/qxl/qxl_display.c | 15 +- drivers/gpu/drm/qxl/qxl_draw.c |
[PATCH v7 07/10] drm/gem: Update internal GEM vmap/vunmap interfaces to use struct dma_buf_map
GEM's vmap and vunmap interfaces now wrap memory pointers in struct dma_buf_map. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Tested-by: Sam Ravnborg --- drivers/gpu/drm/drm_client.c | 18 +++--- drivers/gpu/drm/drm_gem.c | 26 +- drivers/gpu/drm/drm_internal.h | 5 +++-- drivers/gpu/drm/drm_prime.c| 14 -- 4 files changed, 31 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 495f47d23d87..ac0082bed966 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -3,6 +3,7 @@ * Copyright 2018 Noralf Trønnes */ +#include #include #include #include @@ -304,7 +305,8 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u */ void *drm_client_buffer_vmap(struct drm_client_buffer *buffer) { - void *vaddr; + struct dma_buf_map map; + int ret; if (buffer->vaddr) return buffer->vaddr; @@ -317,13 +319,13 @@ void *drm_client_buffer_vmap(struct drm_client_buffer *buffer) * fd_install step out of the driver backend hooks, to make that * final step optional for internal users. */ - vaddr = drm_gem_vmap(buffer->gem); - if (IS_ERR(vaddr)) - return vaddr; + ret = drm_gem_vmap(buffer->gem, ); + if (ret) + return ERR_PTR(ret); - buffer->vaddr = vaddr; + buffer->vaddr = map.vaddr; - return vaddr; + return map.vaddr; } EXPORT_SYMBOL(drm_client_buffer_vmap); @@ -337,7 +339,9 @@ EXPORT_SYMBOL(drm_client_buffer_vmap); */ void drm_client_buffer_vunmap(struct drm_client_buffer *buffer) { - drm_gem_vunmap(buffer->gem, buffer->vaddr); + struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(buffer->vaddr); + + drm_gem_vunmap(buffer->gem, ); buffer->vaddr = NULL; } EXPORT_SYMBOL(drm_client_buffer_vunmap); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 4231fda26e70..eb2d23e04be9 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1206,32 +1206,32 @@ void drm_gem_unpin(struct drm_gem_object *obj) obj->funcs->unpin(obj); } -void *drm_gem_vmap(struct drm_gem_object *obj) +int drm_gem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) { - struct dma_buf_map map; int ret; if (!obj->funcs->vmap) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; - ret = obj->funcs->vmap(obj, ); + ret = obj->funcs->vmap(obj, map); if (ret) - return ERR_PTR(ret); - else if (dma_buf_map_is_null()) - return ERR_PTR(-ENOMEM); + return ret; + else if (dma_buf_map_is_null(map)) + return -ENOMEM; - return map.vaddr; + return 0; } -void drm_gem_vunmap(struct drm_gem_object *obj, void *vaddr) +void drm_gem_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map) { - struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(vaddr); - - if (!vaddr) + if (dma_buf_map_is_null(map)) return; if (obj->funcs->vunmap) - obj->funcs->vunmap(obj, ); + obj->funcs->vunmap(obj, map); + + /* Always set the mapping to NULL. Callers may rely on this. */ + dma_buf_map_clear(map); } /** diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 2bdac3557765..81d386b5b92a 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -33,6 +33,7 @@ struct dentry; struct dma_buf; +struct dma_buf_map; struct drm_connector; struct drm_crtc; struct drm_framebuffer; @@ -187,8 +188,8 @@ void drm_gem_print_info(struct drm_printer *p, unsigned int indent, int drm_gem_pin(struct drm_gem_object *obj); void drm_gem_unpin(struct drm_gem_object *obj); -void *drm_gem_vmap(struct drm_gem_object *obj); -void drm_gem_vunmap(struct drm_gem_object *obj, void *vaddr); +int drm_gem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); +void drm_gem_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map); /* drm_debugfs.c drm_debugfs_crc.c */ #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 187b55ede62e..302e2bb3dfff 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -667,21 +667,15 @@ EXPORT_SYMBOL(drm_gem_unmap_dma_buf); * * Sets up a kernel virtual mapping. This can be used as the _buf_ops.vmap * callback. Calls into _gem_object_funcs.vmap for device specific handling. + * The kernel virtual address is returned in map. * - * Returns the kernel virtual address or NULL on failure. + * Returns 0 on success or a negative errno code otherwise. */ int drm_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map *map) { struct drm_gem_object *obj
Re: [PATCH] vhost/vsock: add IOTLB API support
On 2020/11/3 上午1:11, Stefano Garzarella wrote: On Fri, Oct 30, 2020 at 07:44:43PM +0800, Jason Wang wrote: On 2020/10/30 下午6:54, Stefano Garzarella wrote: On Fri, Oct 30, 2020 at 06:02:18PM +0800, Jason Wang wrote: On 2020/10/30 上午1:43, Stefano Garzarella wrote: This patch enables the IOTLB API support for vhost-vsock devices, allowing the userspace to emulate an IOMMU for the guest. These changes were made following vhost-net, in details this patch: - exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb device if the feature is acked - implements VHOST_GET_BACKEND_FEATURES and VHOST_SET_BACKEND_FEATURES ioctls - calls vq_meta_prefetch() before vq processing to prefetch vq metadata address in IOTLB - provides .read_iter, .write_iter, and .poll callbacks for the chardev; they are used by the userspace to exchange IOTLB messages This patch was tested with QEMU and a patch applied [1] to fix a simple issue: $ qemu -M q35,accel=kvm,kernel-irqchip=split \ -drive file=fedora.qcow2,format=qcow2,if=virtio \ -device intel-iommu,intremap=on \ -device vhost-vsock-pci,guest-cid=3,iommu_platform=on Patch looks good, but a question: It looks to me you don't enable ATS which means vhost won't get any invalidation request or did I miss anything? You're right, I didn't see invalidation requests, only miss and updates. Now I have tried to enable 'ats' and 'device-iotlb' but I still don't see any invalidation. How can I test it? (Sorry but I don't have much experience yet with vIOMMU) I guess it's because the batched unmap. Maybe you can try to use "intel_iommu=strict" in guest kernel command line to see if it works. Btw, make sure the qemu contains the patch [1]. Otherwise ATS won't be enabled for recent Linux Kernel in the guest. The problem was my kernel, it was built with a tiny configuration. Using fedora stock kernel I can see the 'invalidate' requests, but I also had the following issues. Do they make you ring any bells? $ ./qemu -m 4G -smp 4 -M q35,accel=kvm,kernel-irqchip=split \ -drive file=fedora.qcow2,format=qcow2,if=virtio \ -device intel-iommu,intremap=on,device-iotlb=on \ -device vhost-vsock-pci,guest-cid=6,iommu_platform=on,ats=on,id=v1 qemu-system-x86_64: vtd_iova_to_slpte: detected IOVA overflow (iova=0x1d4030c0) It's a hint that IOVA exceeds the AW. It might be worth to check whether the missed IOVA reported from IOTLB is legal. Thanks qemu-system-x86_64: vtd_iommu_translate: detected translation failure (dev=00:03:00, iova=0x1d4030c0) qemu-system-x86_64: New fault is not recorded due to compression of faults Guest kernel messages: [ 44.940872] DMAR: DRHD: handling fault status reg 2 [ 44.941989] DMAR: [DMA Read] Request device [00:03.0] PASID fault addr 88W [ 49.785884] DMAR: DRHD: handling fault status reg 2 [ 49.788874] DMAR: [DMA Read] Request device [00:03.0] PASID fault addr 88W QEMU: b149dea55c Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20201102' into staging Linux guest: 5.8.16-200.fc32.x86_64 Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/2] Revert "vhost-vdpa: fix page pinning leakage in error path"
On 2020/10/30 下午3:45, Si-Wei Liu wrote: This reverts commit 7ed9e3d97c32d969caded2dfb6e67c1a2cc5a0b1. Signed-off-by: Si-Wei Liu --- drivers/vhost/vdpa.c | 119 +-- 1 file changed, 48 insertions(+), 71 deletions(-) I saw this has been reverted there https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/drivers/vhost?id=5e1a3149eec8675c2767cc465903f5e4829de5b0. :) Thanks diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index a2dbc85..b6d9016 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -588,19 +588,21 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, struct vhost_dev *dev = >vdev; struct vhost_iotlb *iotlb = dev->iotlb; struct page **page_list; - struct vm_area_struct **vmas; + unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; - unsigned long map_pfn, last_pfn = 0; - unsigned long npages, lock_limit; - unsigned long i, nmap = 0; + unsigned long npages, cur_base, map_pfn, last_pfn = 0; + unsigned long locked, lock_limit, pinned, i; u64 iova = msg->iova; - long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; + page_list = (struct page **) __get_free_page(GFP_KERNEL); + if (!page_list) + return -ENOMEM; + if (msg->perm & VHOST_ACCESS_WO) gup_flags |= FOLL_WRITE; @@ -608,86 +610,61 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, if (!npages) return -EINVAL; - page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *), - GFP_KERNEL); - if (!page_list || !vmas) { - ret = -ENOMEM; - goto free; - } - mmap_read_lock(dev->mm); + locked = atomic64_add_return(npages, >mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) { - ret = -ENOMEM; - goto unlock; - } - pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags, - page_list, vmas); - if (npages != pinned) { - if (pinned < 0) { - ret = pinned; - } else { - unpin_user_pages(page_list, pinned); - ret = -ENOMEM; - } - goto unlock; + if (locked > lock_limit) { + ret = -ENOMEM; + goto out; } + cur_base = msg->uaddr & PAGE_MASK; iova &= PAGE_MASK; - map_pfn = page_to_pfn(page_list[0]); - - /* One more iteration to avoid extra vdpa_map() call out of loop. */ - for (i = 0; i <= npages; i++) { - unsigned long this_pfn; - u64 csize; - - /* The last chunk may have no valid PFN next to it */ - this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL; - - if (last_pfn && (this_pfn == -1UL || -this_pfn != last_pfn + 1)) { - /* Pin a contiguous chunk of memory */ - csize = last_pfn - map_pfn + 1; - ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT, -map_pfn << PAGE_SHIFT, -msg->perm); - if (ret) { - /* -* Unpin the rest chunks of memory on the -* flight with no corresponding vdpa_map() -* calls having been made yet. On the other -* hand, vdpa_unmap() in the failure path -* is in charge of accounting the number of -* pinned pages for its own. -* This asymmetrical pattern of accounting -* is for efficiency to pin all pages at -* once, while there is no other callsite -* of vdpa_map() than here above. -*/ - unpin_user_pages(_list[nmap], -npages - nmap); - goto out; + + while (npages) { + pinned = min_t(unsigned long, npages, list_size); + ret = pin_user_pages(cur_base, pinned, +gup_flags, page_list, NULL); + if (ret != pinned) +