Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread Jason Wang


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 +---
  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
  
  	if (r)

vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+   else
+   atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
  
  	return r;

  }
@@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
unsigned int gup_flags = FOLL_LONGTERM;
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-   unsigned long locked, lock_limit, pinned, i;
+   unsigned long lock_limit, sz2pin, nchunks, i;
u64 iova = msg->iova;
+   long pinned;
int ret = 0;
  
  	if (vhost_iotlb_itree_first(iotlb, msg->iova,

msg->iova + msg->size - 1))
return -EEXIST;
  
+	/* Limit the use of memory for bookkeeping */

page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
@@ -607,52 +611,64 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
gup_flags |= FOLL_WRITE;
  
  	npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;

-   if (!npages)
-   return -EINVAL;
+   if (!npages) {
+   ret = -EINVAL;
+   goto free;
+   }
  
  	mmap_read_lock(dev->mm);
  
-	locked = atomic64_add_return(npages, >mm->pinned_vm);

lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-   if (locked > lock_limit) {
+   if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
-   goto out;
+   goto unlock;
}
  
  	cur_base = msg->uaddr & PAGE_MASK;

iova &= PAGE_MASK;
+   nchunks = 0;
  
  	while (npages) {

-   pinned = min_t(unsigned long, npages, list_size);
-   ret = pin_user_pages(cur_base, pinned,
-gup_flags, page_list, NULL);
-   if (ret != pinned)
+   sz2pin = min_t(unsigned long, npages, list_size);
+   pinned = pin_user_pages(cur_base, sz2pin,
+   gup_flags, page_list, NULL);
+   if (sz2pin != pinned) {
+   if (pinned < 0) {
+   ret = pinned;
+   } else {
+   unpin_user_pages(page_list, pinned);
+   ret = -ENOMEM;
+   }
goto out;
+   }
+   nchunks++;
  
  		if (!last_pfn)

map_pfn = page_to_pfn(page_list[0]);
  
-		for (i = 0; i < ret; i++) {

+   for (i = 0; i < pinned; i++) {
unsigned long this_pfn = page_to_pfn(page_list[i]);
u64 csize;
  
  			if (last_pfn && (this_pfn != last_pfn + 1)) {

/* Pin a contiguous chunk of memory */
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-   if (vhost_vdpa_map(v, iova, csize,
-  map_pfn << PAGE_SHIFT,
-  msg->perm))
+   ret = vhost_vdpa_map(v, iova, csize,
+map_pfn << PAGE_SHIFT,
+msg->perm);
+   if (ret)
goto out;
+
map_pfn = this_pfn;
iova += csize;
+   nchunks = 0;
}
  
  			last_pfn = this_pfn;

}
  
-		cur_base += ret << PAGE_SHIFT;

-   npages -= ret;
+   cur_base += pinned << PAGE_SHIFT;
+   npages -= pinned;
}
  
  	/* Pin the rest chunk */

@@ -660,10 +676,22 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
 map_pfn << PAGE_SHIFT, 

Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread si-wei liu


On 11/3/2020 5:58 PM, Jason Wang wrote:


On 2020/11/4 上午9:08, si-wei liu wrote:


On 11/3/2020 5:06 PM, si-wei liu wrote:


On 11/3/2020 5:00 AM, Jason Wang wrote:


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 
+---

  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
if (r)
  vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+else
+atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
return r;
  }
@@ -591,14 +593,16 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  unsigned int gup_flags = FOLL_LONGTERM;
  unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-unsigned long locked, lock_limit, pinned, i;
+unsigned long lock_limit, sz2pin, nchunks, i;
  u64 iova = msg->iova;
+long pinned;
  int ret = 0;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
  msg->iova + msg->size - 1))
  return -EEXIST;
  +/* Limit the use of memory for bookkeeping */
  page_list = (struct page **) __get_free_page(GFP_KERNEL);
  if (!page_list)
  return -ENOMEM;
@@ -607,52 +611,64 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  gup_flags |= FOLL_WRITE;
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> 
PAGE_SHIFT;

-if (!npages)
-return -EINVAL;
+if (!npages) {
+ret = -EINVAL;
+goto free;
+}
mmap_read_lock(dev->mm);
  -locked = atomic64_add_return(npages, >mm->pinned_vm);
  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-if (locked > lock_limit) {
+if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
  ret = -ENOMEM;
-goto out;
+goto unlock;
  }
cur_base = msg->uaddr & PAGE_MASK;
  iova &= PAGE_MASK;
+nchunks = 0;
while (npages) {
-pinned = min_t(unsigned long, npages, list_size);
-ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
-if (ret != pinned)
+sz2pin = min_t(unsigned long, npages, list_size);
+pinned = pin_user_pages(cur_base, sz2pin,
+gup_flags, page_list, NULL);
+if (sz2pin != pinned) {
+if (pinned < 0) {
+ret = pinned;
+} else {
+unpin_user_pages(page_list, pinned);
+ret = -ENOMEM;
+}
  goto out;
+}
+nchunks++;
if (!last_pfn)
  map_pfn = page_to_pfn(page_list[0]);
  -for (i = 0; i < ret; i++) {
+for (i = 0; i < pinned; i++) {
  unsigned long this_pfn = page_to_pfn(page_list[i]);
  u64 csize;
if (last_pfn && (this_pfn != last_pfn + 1)) {
  /* Pin a contiguous chunk of memory */
  csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-if (vhost_vdpa_map(v, iova, csize,
-   map_pfn << PAGE_SHIFT,
-   msg->perm))
+ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+if (ret)
  goto out;
+
  map_pfn = this_pfn;
  iova += csize;
+nchunks = 0;
  }
last_pfn = this_pfn;
  }
  -cur_base += ret << PAGE_SHIFT;
-npages -= ret;
+cur_base += pinned << PAGE_SHIFT;
+npages -= pinned;
  }
/* Pin the rest chunk */
@@ -660,10 +676,22 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

   map_pfn << PAGE_SHIFT, msg->perm);
  out:
  if (ret) {
+if (nchunks && last_pfn) {
+unsigned long pfn;
+
+/*
+ * Unpin the outstanding pages which are unmapped.
+ * Mapped pages are accounted in vdpa_map(), thus
+ * will be handled by vdpa_unmap().
+ */
+for (pfn = map_pfn; pfn <= last_pfn; pfn++)
+

Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread Jason Wang


On 2020/11/4 上午9:08, si-wei liu wrote:


On 11/3/2020 5:06 PM, si-wei liu wrote:


On 11/3/2020 5:00 AM, Jason Wang wrote:


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 
+---

  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
    if (r)
  vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+    else
+    atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
    return r;
  }
@@ -591,14 +593,16 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  unsigned int gup_flags = FOLL_LONGTERM;
  unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-    unsigned long locked, lock_limit, pinned, i;
+    unsigned long lock_limit, sz2pin, nchunks, i;
  u64 iova = msg->iova;
+    long pinned;
  int ret = 0;
    if (vhost_iotlb_itree_first(iotlb, msg->iova,
  msg->iova + msg->size - 1))
  return -EEXIST;
  +    /* Limit the use of memory for bookkeeping */
  page_list = (struct page **) __get_free_page(GFP_KERNEL);
  if (!page_list)
  return -ENOMEM;
@@ -607,52 +611,64 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  gup_flags |= FOLL_WRITE;
    npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> 
PAGE_SHIFT;

-    if (!npages)
-    return -EINVAL;
+    if (!npages) {
+    ret = -EINVAL;
+    goto free;
+    }
    mmap_read_lock(dev->mm);
  -    locked = atomic64_add_return(npages, >mm->pinned_vm);
  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-    if (locked > lock_limit) {
+    if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
  ret = -ENOMEM;
-    goto out;
+    goto unlock;
  }
    cur_base = msg->uaddr & PAGE_MASK;
  iova &= PAGE_MASK;
+    nchunks = 0;
    while (npages) {
-    pinned = min_t(unsigned long, npages, list_size);
-    ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
-    if (ret != pinned)
+    sz2pin = min_t(unsigned long, npages, list_size);
+    pinned = pin_user_pages(cur_base, sz2pin,
+    gup_flags, page_list, NULL);
+    if (sz2pin != pinned) {
+    if (pinned < 0) {
+    ret = pinned;
+    } else {
+    unpin_user_pages(page_list, pinned);
+    ret = -ENOMEM;
+    }
  goto out;
+    }
+    nchunks++;
    if (!last_pfn)
  map_pfn = page_to_pfn(page_list[0]);
  -    for (i = 0; i < ret; i++) {
+    for (i = 0; i < pinned; i++) {
  unsigned long this_pfn = page_to_pfn(page_list[i]);
  u64 csize;
    if (last_pfn && (this_pfn != last_pfn + 1)) {
  /* Pin a contiguous chunk of memory */
  csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-    if (vhost_vdpa_map(v, iova, csize,
-   map_pfn << PAGE_SHIFT,
-   msg->perm))
+    ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+    if (ret)
  goto out;
+
  map_pfn = this_pfn;
  iova += csize;
+    nchunks = 0;
  }
    last_pfn = this_pfn;
  }
  -    cur_base += ret << PAGE_SHIFT;
-    npages -= ret;
+    cur_base += pinned << PAGE_SHIFT;
+    npages -= pinned;
  }
    /* Pin the rest chunk */
@@ -660,10 +676,22 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

   map_pfn << PAGE_SHIFT, msg->perm);
  out:
  if (ret) {
+    if (nchunks && last_pfn) {
+    unsigned long pfn;
+
+    /*
+ * Unpin the outstanding pages which are unmapped.
+ * Mapped pages are accounted in vdpa_map(), thus
+ * will be handled by vdpa_unmap().
+ */
+    for (pfn = map_pfn; pfn <= last_pfn; pfn++)
+    unpin_user_page(pfn_to_page(pfn));
+    }
  

Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread si-wei liu


On 11/3/2020 5:06 PM, si-wei liu wrote:


On 11/3/2020 5:00 AM, Jason Wang wrote:


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 
+---

  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
if (r)
  vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+else
+atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
return r;
  }
@@ -591,14 +593,16 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  unsigned int gup_flags = FOLL_LONGTERM;
  unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-unsigned long locked, lock_limit, pinned, i;
+unsigned long lock_limit, sz2pin, nchunks, i;
  u64 iova = msg->iova;
+long pinned;
  int ret = 0;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
  msg->iova + msg->size - 1))
  return -EEXIST;
  +/* Limit the use of memory for bookkeeping */
  page_list = (struct page **) __get_free_page(GFP_KERNEL);
  if (!page_list)
  return -ENOMEM;
@@ -607,52 +611,64 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  gup_flags |= FOLL_WRITE;
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> 
PAGE_SHIFT;

-if (!npages)
-return -EINVAL;
+if (!npages) {
+ret = -EINVAL;
+goto free;
+}
mmap_read_lock(dev->mm);
  -locked = atomic64_add_return(npages, >mm->pinned_vm);
  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-if (locked > lock_limit) {
+if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
  ret = -ENOMEM;
-goto out;
+goto unlock;
  }
cur_base = msg->uaddr & PAGE_MASK;
  iova &= PAGE_MASK;
+nchunks = 0;
while (npages) {
-pinned = min_t(unsigned long, npages, list_size);
-ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
-if (ret != pinned)
+sz2pin = min_t(unsigned long, npages, list_size);
+pinned = pin_user_pages(cur_base, sz2pin,
+gup_flags, page_list, NULL);
+if (sz2pin != pinned) {
+if (pinned < 0) {
+ret = pinned;
+} else {
+unpin_user_pages(page_list, pinned);
+ret = -ENOMEM;
+}
  goto out;
+}
+nchunks++;
if (!last_pfn)
  map_pfn = page_to_pfn(page_list[0]);
  -for (i = 0; i < ret; i++) {
+for (i = 0; i < pinned; i++) {
  unsigned long this_pfn = page_to_pfn(page_list[i]);
  u64 csize;
if (last_pfn && (this_pfn != last_pfn + 1)) {
  /* Pin a contiguous chunk of memory */
  csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-if (vhost_vdpa_map(v, iova, csize,
-   map_pfn << PAGE_SHIFT,
-   msg->perm))
+ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+if (ret)
  goto out;
+
  map_pfn = this_pfn;
  iova += csize;
+nchunks = 0;
  }
last_pfn = this_pfn;
  }
  -cur_base += ret << PAGE_SHIFT;
-npages -= ret;
+cur_base += pinned << PAGE_SHIFT;
+npages -= pinned;
  }
/* Pin the rest chunk */
@@ -660,10 +676,22 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

   map_pfn << PAGE_SHIFT, msg->perm);
  out:
  if (ret) {
+if (nchunks && last_pfn) {
+unsigned long pfn;
+
+/*
+ * Unpin the outstanding pages which are unmapped.
+ * Mapped pages are accounted in vdpa_map(), thus
+ * will be handled by vdpa_unmap().
+ */
+for (pfn = map_pfn; pfn <= last_pfn; pfn++)
+unpin_user_page(pfn_to_page(pfn));
+}
  vhost_vdpa_unmap(v, msg->iova, msg->size);



I want 

Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread si-wei liu


On 11/3/2020 5:00 AM, Jason Wang wrote:


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 
+---

  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
if (r)
  vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+else
+atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
return r;
  }
@@ -591,14 +593,16 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  unsigned int gup_flags = FOLL_LONGTERM;
  unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-unsigned long locked, lock_limit, pinned, i;
+unsigned long lock_limit, sz2pin, nchunks, i;
  u64 iova = msg->iova;
+long pinned;
  int ret = 0;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
  msg->iova + msg->size - 1))
  return -EEXIST;
  +/* Limit the use of memory for bookkeeping */
  page_list = (struct page **) __get_free_page(GFP_KERNEL);
  if (!page_list)
  return -ENOMEM;
@@ -607,52 +611,64 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  gup_flags |= FOLL_WRITE;
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> 
PAGE_SHIFT;

-if (!npages)
-return -EINVAL;
+if (!npages) {
+ret = -EINVAL;
+goto free;
+}
mmap_read_lock(dev->mm);
  -locked = atomic64_add_return(npages, >mm->pinned_vm);
  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-if (locked > lock_limit) {
+if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
  ret = -ENOMEM;
-goto out;
+goto unlock;
  }
cur_base = msg->uaddr & PAGE_MASK;
  iova &= PAGE_MASK;
+nchunks = 0;
while (npages) {
-pinned = min_t(unsigned long, npages, list_size);
-ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
-if (ret != pinned)
+sz2pin = min_t(unsigned long, npages, list_size);
+pinned = pin_user_pages(cur_base, sz2pin,
+gup_flags, page_list, NULL);
+if (sz2pin != pinned) {
+if (pinned < 0) {
+ret = pinned;
+} else {
+unpin_user_pages(page_list, pinned);
+ret = -ENOMEM;
+}
  goto out;
+}
+nchunks++;
if (!last_pfn)
  map_pfn = page_to_pfn(page_list[0]);
  -for (i = 0; i < ret; i++) {
+for (i = 0; i < pinned; i++) {
  unsigned long this_pfn = page_to_pfn(page_list[i]);
  u64 csize;
if (last_pfn && (this_pfn != last_pfn + 1)) {
  /* Pin a contiguous chunk of memory */
  csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-if (vhost_vdpa_map(v, iova, csize,
-   map_pfn << PAGE_SHIFT,
-   msg->perm))
+ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+if (ret)
  goto out;
+
  map_pfn = this_pfn;
  iova += csize;
+nchunks = 0;
  }
last_pfn = this_pfn;
  }
  -cur_base += ret << PAGE_SHIFT;
-npages -= ret;
+cur_base += pinned << PAGE_SHIFT;
+npages -= pinned;
  }
/* Pin the rest chunk */
@@ -660,10 +676,22 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

   map_pfn << PAGE_SHIFT, msg->perm);
  out:
  if (ret) {
+if (nchunks && last_pfn) {
+unsigned long pfn;
+
+/*
+ * Unpin the outstanding pages which are unmapped.
+ * Mapped pages are accounted in vdpa_map(), thus
+ * will be handled by vdpa_unmap().
+ */
+for (pfn = map_pfn; pfn <= last_pfn; pfn++)
+unpin_user_page(pfn_to_page(pfn));
+}
  vhost_vdpa_unmap(v, msg->iova, msg->size);



I want to know what's wrong with current code.

We 

Re: [PATCH] vhost/vsock: add IOTLB API support

2020-11-03 Thread Peter Xu
On Tue, Nov 03, 2020 at 05:04:23PM +0800, Jason Wang wrote:
> 
> On 2020/11/3 上午1:11, Stefano Garzarella wrote:
> > On Fri, Oct 30, 2020 at 07:44:43PM +0800, Jason Wang wrote:
> > > 
> > > On 2020/10/30 下午6:54, Stefano Garzarella wrote:
> > > > On Fri, Oct 30, 2020 at 06:02:18PM +0800, Jason Wang wrote:
> > > > > 
> > > > > On 2020/10/30 上午1:43, Stefano Garzarella wrote:
> > > > > > This patch enables the IOTLB API support for vhost-vsock devices,
> > > > > > allowing the userspace to emulate an IOMMU for the guest.
> > > > > > 
> > > > > > These changes were made following vhost-net, in details this patch:
> > > > > > - exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
> > > > > >   device if the feature is acked
> > > > > > - implements VHOST_GET_BACKEND_FEATURES and
> > > > > >   VHOST_SET_BACKEND_FEATURES ioctls
> > > > > > - calls vq_meta_prefetch() before vq processing to prefetch vq
> > > > > >   metadata address in IOTLB
> > > > > > - provides .read_iter, .write_iter, and .poll callbacks for the
> > > > > >   chardev; they are used by the userspace to exchange IOTLB messages
> > > > > > 
> > > > > > This patch was tested with QEMU and a patch applied [1] to fix a
> > > > > > simple issue:
> > > > > >     $ qemu -M q35,accel=kvm,kernel-irqchip=split \
> > > > > >    -drive file=fedora.qcow2,format=qcow2,if=virtio \
> > > > > >    -device intel-iommu,intremap=on \
> > > > > >    -device vhost-vsock-pci,guest-cid=3,iommu_platform=on
> > > > > 
> > > > > 
> > > > > Patch looks good, but a question:
> > > > > 
> > > > > It looks to me you don't enable ATS which means vhost won't
> > > > > get any invalidation request or did I miss anything?
> > > > > 
> > > > 
> > > > You're right, I didn't see invalidation requests, only miss and
> > > > updates.
> > > > Now I have tried to enable 'ats' and 'device-iotlb' but I still
> > > > don't see any invalidation.
> > > > 
> > > > How can I test it? (Sorry but I don't have much experience yet
> > > > with vIOMMU)
> > > 
> > > 
> > > I guess it's because the batched unmap. Maybe you can try to use
> > > "intel_iommu=strict" in guest kernel command line to see if it
> > > works.
> > > 
> > > Btw, make sure the qemu contains the patch [1]. Otherwise ATS won't
> > > be enabled for recent Linux Kernel in the guest.
> > 
> > The problem was my kernel, it was built with a tiny configuration.
> > Using fedora stock kernel I can see the 'invalidate' requests, but I
> > also had the following issues.
> > 
> > Do they make you ring any bells?
> > 
> > $ ./qemu -m 4G -smp 4 -M q35,accel=kvm,kernel-irqchip=split \
> >     -drive file=fedora.qcow2,format=qcow2,if=virtio \
> >     -device intel-iommu,intremap=on,device-iotlb=on \
> >     -device vhost-vsock-pci,guest-cid=6,iommu_platform=on,ats=on,id=v1
> > 
> >     qemu-system-x86_64: vtd_iova_to_slpte: detected IOVA overflow    
> > (iova=0x1d4030c0)
> 
> 
> It's a hint that IOVA exceeds the AW. It might be worth to check whether the
> missed IOVA reported from IOTLB is legal.

Yeah.  By default the QEMU vIOMMU should only support 39bits width for guest
iova address space.  To extend it, we can use:

  -device intel-iommu,aw-bits=48

So we'll enable 4-level iommu pgtable.

Here the iova is obvious longer than this, so it'll be interesting to know why
that iova is allocated in the guest driver since the driver should know somehow
that this iova is beyond what's supported (guest iommu driver should be able to
probe viommu capability on this width information too).

-- 
Peter Xu

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [patch V3 22/37] highmem: High implementation details and document API

2020-11-03 Thread Thomas Gleixner
On Tue, Nov 03 2020 at 09:48, Linus Torvalds wrote:
> I have no complaints about the patch, but it strikes me that if people
> want to actually have much better debug coverage, this is where it
> should be (I like the "every other address" thing too, don't get me
> wrong).
>
> In particular, instead of these PageHighMem(page) tests, I think
> something like this would be better:
>
>#ifdef CONFIG_DEBUG_HIGHMEM
>  #define page_use_kmap(page) ((page),1)
>#else
>  #define page_use_kmap(page) PageHighMem(page)
>#endif
>
> adn then replace those "if (!PageHighMem(page))" tests with "if
> (!page_use_kmap())" instead.
>
> IOW, in debug mode, it would _always_ remap the page, whether it's
> highmem or not. That would really stress the highmem code and find any
> fragilities.

Yes, that makes a lot of sense. We just have to avoid that for the
architectures with aliasing issues.

> Anyway, this is all sepatrate from the series, which still looks fine
> to me. Just a reaction to seeing the patch, and Thomas' earlier
> mention that the highmem debugging doesn't actually do much.

Right, forcing it for both kmap and kmap_local is straight forward. I'll
cook a patch on top for that.

Thanks,

tglx


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [patch V3 22/37] highmem: High implementation details and document API

2020-11-03 Thread Linus Torvalds
On Tue, Nov 3, 2020 at 2:33 AM Thomas Gleixner  wrote:
>
> +static inline void *kmap(struct page *page)
> +{
> +   void *addr;
> +
> +   might_sleep();
> +   if (!PageHighMem(page))
> +   addr = page_address(page);
> +   else
> +   addr = kmap_high(page);
> +   kmap_flush_tlb((unsigned long)addr);
> +   return addr;
> +}
> +
> +static inline void kunmap(struct page *page)
> +{
> +   might_sleep();
> +   if (!PageHighMem(page))
> +   return;
> +   kunmap_high(page);
> +}

I have no complaints about the patch, but it strikes me that if people
want to actually have much better debug coverage, this is where it
should be (I like the "every other address" thing too, don't get me
wrong).

In particular, instead of these PageHighMem(page) tests, I think
something like this would be better:

   #ifdef CONFIG_DEBUG_HIGHMEM
 #define page_use_kmap(page) ((page),1)
   #else
 #define page_use_kmap(page) PageHighMem(page)
   #endif

adn then replace those "if (!PageHighMem(page))" tests with "if
(!page_use_kmap())" instead.

IOW, in debug mode, it would _always_ remap the page, whether it's
highmem or not. That would really stress the highmem code and find any
fragilities.

No?

Anyway, this is all sepatrate from the series, which still looks fine
to me. Just a reaction to seeing the patch, and Thomas' earlier
mention that the highmem debugging doesn't actually do much.

   Linus
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH mlx5-next v1 06/11] vdpa/mlx5: Connect mlx5_vdpa to auxiliary bus

2020-11-03 Thread Jason Gunthorpe
On Sun, Nov 01, 2020 at 10:15:37PM +0200, Leon Romanovsky wrote:
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 6c218b47b9f1..5316e51e72d4 100644
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -1,18 +1,27 @@
>  // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>  /* Copyright (c) 2020 Mellanox Technologies Ltd. */
> 
> +#include 
>  #include 
> +#include 
> +#include 
>  #include 
>  #include 
> +#include 
> +#include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
> -#include 
>  #include 
> -#include "mlx5_vnet.h"
>  #include "mlx5_vdpa.h"
> 
> +MODULE_AUTHOR("Eli Cohen ");
> +MODULE_DESCRIPTION("Mellanox VDPA driver");
> +MODULE_LICENSE("Dual BSD/GPL");
> +
> +#define to_mlx5_vdpa_ndev(__mvdev) container_of(__mvdev, struct 
> mlx5_vdpa_net, mvdev)
>  #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
> 
>  #define VALID_FEATURES_MASK  
>   \
> @@ -159,6 +168,11 @@ static bool mlx5_vdpa_debug;
>   mlx5_vdpa_info(mvdev, "%s\n", #_status);
>\
>   } while (0)
> 
> +static inline u32 mlx5_vdpa_max_qps(int max_vqs)
> +{
> + return max_vqs / 2;
> +}
> +
>  static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
>  {
>   if (status & ~VALID_STATUS_MASK)
> @@ -1928,8 +1942,11 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
>   }
>  }
> 
> -void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev)
> +static int mlx5v_probe(struct auxiliary_device *adev,
> +const struct auxiliary_device_id *id)
>  {
> + struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
> + struct mlx5_core_dev *mdev = madev->mdev;
>   struct virtio_net_config *config;
>   struct mlx5_vdpa_dev *mvdev;
>   struct mlx5_vdpa_net *ndev;
> @@ -1943,7 +1960,7 @@ void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev)
>   ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, 
> mdev->device, _vdpa_ops,
>2 * mlx5_vdpa_max_qps(max_vqs));
>   if (IS_ERR(ndev))
> - return ndev;
> + return PTR_ERR(ndev);
> 
>   ndev->mvdev.max_vqs = max_vqs;
>   mvdev = >mvdev;
> @@ -1972,7 +1989,8 @@ void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev)
>   if (err)
>   goto err_reg;
> 
> - return ndev;
> + dev_set_drvdata(>dev, ndev);
> + return 0;
> 
>  err_reg:
>   free_resources(ndev);
> @@ -1981,10 +1999,29 @@ void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev)
>  err_mtu:
>   mutex_destroy(>reslock);
>   put_device(>vdev.dev);
> - return ERR_PTR(err);
> + return err;
>  }
> 
> -void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev)
> +static int mlx5v_remove(struct auxiliary_device *adev)
>  {
> + struct mlx5_vdpa_dev *mvdev = dev_get_drvdata(>dev);
> +
>   vdpa_unregister_device(>vdev);
> + return 0;
>  }
> +
> +static const struct auxiliary_device_id mlx5v_id_table[] = {
> + { .name = MLX5_ADEV_NAME ".vnet", },
> + {},
> +};
> +
> +MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
> +
> +static struct auxiliary_driver mlx5v_driver = {
> + .name = "vnet",
> + .probe = mlx5v_probe,
> + .remove = mlx5v_remove,
> + .id_table = mlx5v_id_table,
> +};

It is hard to see from the diff, but when this patch is applied the
vdpa module looks like I imagined things would look with the auxiliary
bus. It is very similar in structure to a PCI driver with the probe()
function cleanly registering with its subsystem. This is what I'd like
to see from the new Intel RDMA driver.

Greg, I think this patch is the best clean usage example.

I've looked over this series and it has the right idea and
parts. There is definitely more that can be done to improve mlx5 in
this area, but this series is well scoped and cleans a good part of
it.

Jason
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V4 24/37] sched: highmem: Store local kmaps in task struct

2020-11-03 Thread Thomas Gleixner
Instead of storing the map per CPU provide and use per task storage. That
prepares for local kmaps which are preemptible.

The context switch code is preparatory and not yet in use because
kmap_atomic() runs with preemption disabled. Will be made usable in the
next step.

The context switch logic is safe even when an interrupt happens after
clearing or before restoring the kmaps. The kmap index in task struct is
not modified so any nesting kmap in an interrupt will use unused indices
and on return the counter is the same as before.

Also add an assert into the return to user space code. Going back to user
space with an active kmap local is a nono.

Signed-off-by: Thomas Gleixner 
---
V4: Use the version which actually compiles and works
V3: Handle the debug case correctly
---
 include/linux/highmem-internal.h |   10 +++
 include/linux/sched.h|9 +++
 kernel/entry/common.c|2 
 kernel/fork.c|1 
 kernel/sched/core.c  |   18 +++
 mm/highmem.c |   99 +++
 6 files changed, 129 insertions(+), 10 deletions(-)

--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -9,6 +9,16 @@
 void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
 void *__kmap_local_page_prot(struct page *page, pgprot_t prot);
 void kunmap_local_indexed(void *vaddr);
+void kmap_local_fork(struct task_struct *tsk);
+void __kmap_local_sched_out(void);
+void __kmap_local_sched_in(void);
+static inline void kmap_assert_nomap(void)
+{
+   DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx);
+}
+#else
+static inline void kmap_local_fork(struct task_struct *tsk) { }
+static inline void kmap_assert_nomap(void) { }
 #endif
 
 #ifdef CONFIG_HIGHMEM
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -629,6 +630,13 @@ struct wake_q_node {
struct wake_q_node *next;
 };
 
+struct kmap_ctrl {
+#ifdef CONFIG_KMAP_LOCAL
+   int idx;
+   pte_t   pteval[KM_MAX_IDX];
+#endif
+};
+
 struct task_struct {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
/*
@@ -1294,6 +1302,7 @@ struct task_struct {
unsigned intsequential_io;
unsigned intsequential_io_avg;
 #endif
+   struct kmap_ctrlkmap_ctrl;
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long   task_state_change;
 #endif
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -2,6 +2,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -194,6 +195,7 @@ static void exit_to_user_mode_prepare(st
 
/* Ensure that the address limit is intact and no locks are held */
addr_limit_user_check();
+   kmap_assert_nomap();
lockdep_assert_irqs_disabled();
lockdep_sys_exit();
 }
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -930,6 +930,7 @@ static struct task_struct *dup_task_stru
account_kernel_stack(tsk, 1);
 
kcov_task_init(tsk);
+   kmap_local_fork(tsk);
 
 #ifdef CONFIG_FAULT_INJECTION
tsk->fail_nth = 0;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4053,6 +4053,22 @@ static inline void finish_lock_switch(st
 # define finish_arch_post_lock_switch()do { } while (0)
 #endif
 
+static inline void kmap_local_sched_out(void)
+{
+#ifdef CONFIG_KMAP_LOCAL
+   if (unlikely(current->kmap_ctrl.idx))
+   __kmap_local_sched_out();
+#endif
+}
+
+static inline void kmap_local_sched_in(void)
+{
+#ifdef CONFIG_KMAP_LOCAL
+   if (unlikely(current->kmap_ctrl.idx))
+   __kmap_local_sched_in();
+#endif
+}
+
 /**
  * prepare_task_switch - prepare to switch tasks
  * @rq: the runqueue preparing to switch
@@ -4075,6 +4091,7 @@ prepare_task_switch(struct rq *rq, struc
perf_event_task_sched_out(prev, next);
rseq_preempt(prev);
fire_sched_out_preempt_notifiers(prev, next);
+   kmap_local_sched_out();
prepare_task(next);
prepare_arch_switch(next);
 }
@@ -4141,6 +4158,7 @@ static struct rq *finish_task_switch(str
finish_lock_switch(rq);
finish_arch_post_lock_switch();
kcov_finish_switch(current);
+   kmap_local_sched_in();
 
fire_sched_in_preempt_notifiers(current);
/*
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -365,8 +365,6 @@ EXPORT_SYMBOL(kunmap_high);
 
 #include 
 
-static DEFINE_PER_CPU(int, __kmap_local_idx);
-
 /*
  * With DEBUG_HIGHMEM the stack depth is doubled and every second
  * slot is unused which acts as a guard page
@@ -379,23 +377,21 @@ static DEFINE_PER_CPU(int, __kmap_local_
 
 static inline int kmap_local_idx_push(void)
 {
-   int idx = __this_cpu_add_return(__kmap_local_idx, KM_INCR) - 1;
-
WARN_ON_ONCE(in_irq() && !irqs_disabled());
-  

Re: [patch V3 24/37] sched: highmem: Store local kmaps in task struct

2020-11-03 Thread Thomas Gleixner
On Tue, Nov 03 2020 at 10:27, Thomas Gleixner wrote:
> +struct kmap_ctrl {
> +#ifdef CONFIG_KMAP_LOCAL
> + int idx;
> + pte_t   pteval[KM_TYPE_NR];

I'm a moron. Fixed it on the test machine ...
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread Jason Wang


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 +---
  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
  
  	if (r)

vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+   else
+   atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
  
  	return r;

  }
@@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
unsigned int gup_flags = FOLL_LONGTERM;
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-   unsigned long locked, lock_limit, pinned, i;
+   unsigned long lock_limit, sz2pin, nchunks, i;
u64 iova = msg->iova;
+   long pinned;
int ret = 0;
  
  	if (vhost_iotlb_itree_first(iotlb, msg->iova,

msg->iova + msg->size - 1))
return -EEXIST;
  
+	/* Limit the use of memory for bookkeeping */

page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
@@ -607,52 +611,64 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
gup_flags |= FOLL_WRITE;
  
  	npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;

-   if (!npages)
-   return -EINVAL;
+   if (!npages) {
+   ret = -EINVAL;
+   goto free;
+   }
  
  	mmap_read_lock(dev->mm);
  
-	locked = atomic64_add_return(npages, >mm->pinned_vm);

lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-   if (locked > lock_limit) {
+   if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
-   goto out;
+   goto unlock;
}
  
  	cur_base = msg->uaddr & PAGE_MASK;

iova &= PAGE_MASK;
+   nchunks = 0;
  
  	while (npages) {

-   pinned = min_t(unsigned long, npages, list_size);
-   ret = pin_user_pages(cur_base, pinned,
-gup_flags, page_list, NULL);
-   if (ret != pinned)
+   sz2pin = min_t(unsigned long, npages, list_size);
+   pinned = pin_user_pages(cur_base, sz2pin,
+   gup_flags, page_list, NULL);
+   if (sz2pin != pinned) {
+   if (pinned < 0) {
+   ret = pinned;
+   } else {
+   unpin_user_pages(page_list, pinned);
+   ret = -ENOMEM;
+   }
goto out;
+   }
+   nchunks++;
  
  		if (!last_pfn)

map_pfn = page_to_pfn(page_list[0]);
  
-		for (i = 0; i < ret; i++) {

+   for (i = 0; i < pinned; i++) {
unsigned long this_pfn = page_to_pfn(page_list[i]);
u64 csize;
  
  			if (last_pfn && (this_pfn != last_pfn + 1)) {

/* Pin a contiguous chunk of memory */
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-   if (vhost_vdpa_map(v, iova, csize,
-  map_pfn << PAGE_SHIFT,
-  msg->perm))
+   ret = vhost_vdpa_map(v, iova, csize,
+map_pfn << PAGE_SHIFT,
+msg->perm);
+   if (ret)
goto out;
+
map_pfn = this_pfn;
iova += csize;
+   nchunks = 0;
}
  
  			last_pfn = this_pfn;

}
  
-		cur_base += ret << PAGE_SHIFT;

-   npages -= ret;
+   cur_base += pinned << PAGE_SHIFT;
+   npages -= pinned;
}
  
  	/* Pin the rest chunk */

@@ -660,10 +676,22 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
 map_pfn << PAGE_SHIFT, 

Re: [patch V3 03/37] fs: Remove asm/kmap_types.h includes

2020-11-03 Thread David Sterba
On Tue, Nov 03, 2020 at 10:27:15AM +0100, Thomas Gleixner wrote:
> Historical leftovers from the time where kmap() had fixed slots.
> 
> Signed-off-by: Thomas Gleixner 
> Cc: Alexander Viro 
> Cc: Benjamin LaHaise 
> Cc: linux-fsde...@vger.kernel.org
> Cc: linux-...@kvack.org
> Cc: Chris Mason 
> Cc: Josef Bacik 
> Cc: David Sterba 

Acked-by: David Sterba 

For the btrfs bits

>  fs/btrfs/ctree.h |1 -

> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -17,7 +17,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
>  #include 
>  #include 
>  #include 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 0/8] slab: provide and use krealloc_array()

2020-11-03 Thread Andy Shevchenko
On Tue, Nov 3, 2020 at 12:13 PM Bartosz Golaszewski  wrote:
> On Tue, Nov 3, 2020 at 5:14 AM Joe Perches  wrote:
> > On Mon, 2020-11-02 at 16:20 +0100, Bartosz Golaszewski wrote:
> > > From: Bartosz Golaszewski 

> Yeah so I had this concern for devm_krealloc() and even sent a patch
> that extended it to honor __GFP_ZERO before I noticed that regular
> krealloc() silently ignores __GFP_ZERO. I'm not sure if this is on
> purpose. Maybe we should either make krealloc() honor __GFP_ZERO or
> explicitly state in its documentation that it ignores it?

And my voice here is to ignore for the same reasons: respect
realloc(3) and making common sense with the idea of REallocating
(capital letters on purpose).

-- 
With Best Regards,
Andy Shevchenko
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 27/37] x86/crashdump/32: Simplify copy_oldmem_page()

2020-11-03 Thread Thomas Gleixner
Replace kmap_atomic_pfn() with kmap_local_pfn() which is preemptible and
can take page faults.

Remove the indirection of the dump page and the related cruft which is not
longer required.

Signed-off-by: Thomas Gleixner 
---
V3: New patch
---
 arch/x86/kernel/crash_dump_32.c |   48 
 1 file changed, 10 insertions(+), 38 deletions(-)

--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -13,8 +13,6 @@
 
 #include 
 
-static void *kdump_buf_page;
-
 static inline bool is_crashed_pfn_valid(unsigned long pfn)
 {
 #ifndef CONFIG_X86_PAE
@@ -41,15 +39,11 @@ static inline bool is_crashed_pfn_valid(
  * @userbuf: if set, @buf is in user address space, use copy_to_user(),
  * otherwise @buf is in kernel address space, use memcpy().
  *
- * Copy a page from "oldmem". For this page, there is no pte mapped
- * in the current kernel. We stitch up a pte, similar to kmap_atomic.
- *
- * Calling copy_to_user() in atomic context is not desirable. Hence first
- * copying the data to a pre-allocated kernel page and then copying to user
- * space in non-atomic context.
+ * Copy a page from "oldmem". For this page, there might be no pte mapped
+ * in the current kernel.
  */
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
-   size_t csize, unsigned long offset, int userbuf)
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+unsigned long offset, int userbuf)
 {
void  *vaddr;
 
@@ -59,38 +53,16 @@ ssize_t copy_oldmem_page(unsigned long p
if (!is_crashed_pfn_valid(pfn))
return -EFAULT;
 
-   vaddr = kmap_atomic_pfn(pfn);
+   vaddr = kmap_local_pfn(pfn);
 
if (!userbuf) {
-   memcpy(buf, (vaddr + offset), csize);
-   kunmap_atomic(vaddr);
+   memcpy(buf, vaddr + offset, csize);
} else {
-   if (!kdump_buf_page) {
-   printk(KERN_WARNING "Kdump: Kdump buffer page not"
-   " allocated\n");
-   kunmap_atomic(vaddr);
-   return -EFAULT;
-   }
-   copy_page(kdump_buf_page, vaddr);
-   kunmap_atomic(vaddr);
-   if (copy_to_user(buf, (kdump_buf_page + offset), csize))
-   return -EFAULT;
+   if (copy_to_user(buf, vaddr + offset, csize))
+   csize = -EFAULT;
}
 
-   return csize;
-}
+   kunmap_local(vaddr);
 
-static int __init kdump_buf_page_init(void)
-{
-   int ret = 0;
-
-   kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
-   if (!kdump_buf_page) {
-   printk(KERN_WARNING "Kdump: Failed to allocate kdump buffer"
-" page\n");
-   ret = -ENOMEM;
-   }
-
-   return ret;
+   return csize;
 }
-arch_initcall(kdump_buf_page_init);

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 19/37] mm/highmem: Remove the old kmap_atomic cruft

2020-11-03 Thread Thomas Gleixner
All users gone.

Signed-off-by: Thomas Gleixner 
---
 include/linux/highmem.h |   63 +++-
 mm/highmem.c|7 -
 2 files changed, 5 insertions(+), 65 deletions(-)

--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -86,31 +86,16 @@ static inline void kunmap(struct page *p
  * be used in IRQ contexts, so in some (very limited) cases we need
  * it.
  */
-
-#ifndef CONFIG_KMAP_LOCAL
-void *kmap_atomic_high_prot(struct page *page, pgprot_t prot);
-void kunmap_atomic_high(void *kvaddr);
-
 static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 {
preempt_disable();
pagefault_disable();
-   if (!PageHighMem(page))
-   return page_address(page);
-   return kmap_atomic_high_prot(page, prot);
-}
-
-static inline void __kunmap_atomic(void *vaddr)
-{
-   kunmap_atomic_high(vaddr);
+   return __kmap_local_page_prot(page, prot);
 }
-#else /* !CONFIG_KMAP_LOCAL */
 
-static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+static inline void *kmap_atomic(struct page *page)
 {
-   preempt_disable();
-   pagefault_disable();
-   return __kmap_local_page_prot(page, prot);
+   return kmap_atomic_prot(page, kmap_prot);
 }
 
 static inline void *kmap_atomic_pfn(unsigned long pfn)
@@ -125,13 +110,6 @@ static inline void __kunmap_atomic(void
kunmap_local_indexed(addr);
 }
 
-#endif /* CONFIG_KMAP_LOCAL */
-
-static inline void *kmap_atomic(struct page *page)
-{
-   return kmap_atomic_prot(page, kmap_prot);
-}
-
 /* declarations for linux/mm/highmem.c */
 unsigned int nr_free_highpages(void);
 extern atomic_long_t _totalhigh_pages;
@@ -212,41 +190,8 @@ static inline void __kunmap_atomic(void
 
 #define kmap_flush_unused()do {} while(0)
 
-#endif /* CONFIG_HIGHMEM */
-
-#if !defined(CONFIG_KMAP_LOCAL)
-#if defined(CONFIG_HIGHMEM)
-
-DECLARE_PER_CPU(int, __kmap_atomic_idx);
-
-static inline int kmap_atomic_idx_push(void)
-{
-   int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-   WARN_ON_ONCE(in_irq() && !irqs_disabled());
-   BUG_ON(idx >= KM_TYPE_NR);
-#endif
-   return idx;
-}
-
-static inline int kmap_atomic_idx(void)
-{
-   return __this_cpu_read(__kmap_atomic_idx) - 1;
-}
 
-static inline void kmap_atomic_idx_pop(void)
-{
-#ifdef CONFIG_DEBUG_HIGHMEM
-   int idx = __this_cpu_dec_return(__kmap_atomic_idx);
-
-   BUG_ON(idx < 0);
-#else
-   __this_cpu_dec(__kmap_atomic_idx);
-#endif
-}
-#endif
-#endif
+#endif /* CONFIG_HIGHMEM */
 
 /*
  * Prevent people trying to call kunmap_atomic() as if it were kunmap()
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -31,12 +31,6 @@
 #include 
 #include 
 
-#ifndef CONFIG_KMAP_LOCAL
-#ifdef CONFIG_HIGHMEM
-DEFINE_PER_CPU(int, __kmap_atomic_idx);
-#endif
-#endif
-
 /*
  * Virtual_count is not a pure "count".
  *  0 means that it is not mapped, and has not been mapped
@@ -410,6 +404,7 @@ static inline void kmap_local_idx_pop(vo
 #ifndef arch_kmap_local_post_map
 # define arch_kmap_local_post_map(vaddr, pteval)   do { } while (0)
 #endif
+
 #ifndef arch_kmap_local_pre_unmap
 # define arch_kmap_local_pre_unmap(vaddr)  do { } while (0)
 #endif

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 37/37] io-mapping: Remove io_mapping_map_atomic_wc()

2020-11-03 Thread Thomas Gleixner
No more users. Get rid of it and remove the traces in documentation.

Signed-off-by: Thomas Gleixner 
---
V3: New patch
---
 Documentation/driver-api/io-mapping.rst |   22 +---
 include/linux/io-mapping.h  |   42 +---
 2 files changed, 9 insertions(+), 55 deletions(-)

--- a/Documentation/driver-api/io-mapping.rst
+++ b/Documentation/driver-api/io-mapping.rst
@@ -21,19 +21,15 @@ mappable, while 'size' indicates how lar
 enable. Both are in bytes.
 
 This _wc variant provides a mapping which may only be used with
-io_mapping_map_atomic_wc(), io_mapping_map_local_wc() or
-io_mapping_map_wc().
+io_mapping_map_local_wc() or io_mapping_map_wc().
 
 With this mapping object, individual pages can be mapped either temporarily
 or long term, depending on the requirements. Of course, temporary maps are
-more efficient. They come in two flavours::
+more efficient.
 
void *io_mapping_map_local_wc(struct io_mapping *mapping,
  unsigned long offset)
 
-   void *io_mapping_map_atomic_wc(struct io_mapping *mapping,
-  unsigned long offset)
-
 'offset' is the offset within the defined mapping region.  Accessing
 addresses beyond the region specified in the creation function yields
 undefined results. Using an offset which is not page aligned yields an
@@ -50,9 +46,6 @@ io_mapping_map_local_wc() has a side eff
 migration to make the mapping code work. No caller can rely on this side
 effect.
 
-io_mapping_map_atomic_wc() has the side effect of disabling preemption and
-pagefaults. Don't use in new code. Use io_mapping_map_local_wc() instead.
-
 Nested mappings need to be undone in reverse order because the mapping
 code uses a stack for keeping track of them::
 
@@ -65,11 +58,10 @@ Nested mappings need to be undone in rev
 The mappings are released with::
 
void io_mapping_unmap_local(void *vaddr)
-   void io_mapping_unmap_atomic(void *vaddr)
 
-'vaddr' must be the value returned by the last io_mapping_map_local_wc() or
-io_mapping_map_atomic_wc() call. This unmaps the specified mapping and
-undoes the side effects of the mapping functions.
+'vaddr' must be the value returned by the last io_mapping_map_local_wc()
+call. This unmaps the specified mapping and undoes eventual side effects of
+the mapping function.
 
 If you need to sleep while holding a mapping, you can use the regular
 variant, although this may be significantly slower::
@@ -77,8 +69,8 @@ If you need to sleep while holding a map
void *io_mapping_map_wc(struct io_mapping *mapping,
unsigned long offset)
 
-This works like io_mapping_map_atomic/local_wc() except it has no side
-effects and the pointer is globaly visible.
+This works like io_mapping_map_local_wc() except it has no side effects and
+the pointer is globaly visible.
 
 The mappings are released with::
 
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -60,28 +60,7 @@ io_mapping_fini(struct io_mapping *mappi
iomap_free(mapping->base, mapping->size);
 }
 
-/* Atomic map/unmap */
-static inline void __iomem *
-io_mapping_map_atomic_wc(struct io_mapping *mapping,
-unsigned long offset)
-{
-   resource_size_t phys_addr;
-
-   BUG_ON(offset >= mapping->size);
-   phys_addr = mapping->base + offset;
-   preempt_disable();
-   pagefault_disable();
-   return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot);
-}
-
-static inline void
-io_mapping_unmap_atomic(void __iomem *vaddr)
-{
-   kunmap_local_indexed((void __force *)vaddr);
-   pagefault_enable();
-   preempt_enable();
-}
-
+/* Temporary mappings which are only valid in the current context */
 static inline void __iomem *
 io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset)
 {
@@ -163,24 +142,7 @@ io_mapping_unmap(void __iomem *vaddr)
 {
 }
 
-/* Atomic map/unmap */
-static inline void __iomem *
-io_mapping_map_atomic_wc(struct io_mapping *mapping,
-unsigned long offset)
-{
-   preempt_disable();
-   pagefault_disable();
-   return io_mapping_map_wc(mapping, offset, PAGE_SIZE);
-}
-
-static inline void
-io_mapping_unmap_atomic(void __iomem *vaddr)
-{
-   io_mapping_unmap(vaddr);
-   pagefault_enable();
-   preempt_enable();
-}
-
+/* Temporary mappings which are only valid in the current context */
 static inline void __iomem *
 io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset)
 {

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 20/37] io-mapping: Cleanup atomic iomap

2020-11-03 Thread Thomas Gleixner
Switch the atomic iomap implementation over to kmap_local and stick the
preempt/pagefault mechanics into the generic code similar to the
kmap_atomic variants.

Rename the x86 map function in preparation for a non-atomic variant.

Signed-off-by: Thomas Gleixner 
---
V2: New patch to make review easier
---
 arch/x86/include/asm/iomap.h |9 +
 arch/x86/mm/iomap_32.c   |6 ++
 include/linux/io-mapping.h   |8 ++--
 3 files changed, 9 insertions(+), 14 deletions(-)

--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -13,14 +13,7 @@
 #include 
 #include 
 
-void __iomem *iomap_atomic_pfn_prot(unsigned long pfn, pgprot_t prot);
-
-static inline void iounmap_atomic(void __iomem *vaddr)
-{
-   kunmap_local_indexed((void __force *)vaddr);
-   pagefault_enable();
-   preempt_enable();
-}
+void __iomem *__iomap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
 
 int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
 
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -44,7 +44,7 @@ void iomap_free(resource_size_t base, un
 }
 EXPORT_SYMBOL_GPL(iomap_free);
 
-void __iomem *iomap_atomic_pfn_prot(unsigned long pfn, pgprot_t prot)
+void __iomem *__iomap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
 {
/*
 * For non-PAT systems, translate non-WB request to UC- just in
@@ -60,8 +60,6 @@ void __iomem *iomap_atomic_pfn_prot(unsi
/* Filter out unsupported __PAGE_KERNEL* bits: */
pgprot_val(prot) &= __default_kernel_pte_mask;
 
-   preempt_disable();
-   pagefault_disable();
return (void __force __iomem *)__kmap_local_pfn_prot(pfn, prot);
 }
-EXPORT_SYMBOL_GPL(iomap_atomic_pfn_prot);
+EXPORT_SYMBOL_GPL(__iomap_local_pfn_prot);
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -69,13 +69,17 @@ io_mapping_map_atomic_wc(struct io_mappi
 
BUG_ON(offset >= mapping->size);
phys_addr = mapping->base + offset;
-   return iomap_atomic_pfn_prot(PHYS_PFN(phys_addr), mapping->prot);
+   preempt_disable();
+   pagefault_disable();
+   return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot);
 }
 
 static inline void
 io_mapping_unmap_atomic(void __iomem *vaddr)
 {
-   iounmap_atomic(vaddr);
+   kunmap_local_indexed((void __force *)vaddr);
+   pagefault_enable();
+   preempt_enable();
 }
 
 static inline void __iomem *

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 31/37] drm/ttm: Replace kmap_atomic() usage

2020-11-03 Thread Thomas Gleixner
There is no reason to disable pagefaults and preemption as a side effect of
kmap_atomic_prot().

Use kmap_local_page_prot() instead and document the reasoning for the
mapping usage with the given pgprot.

Remove the NULL pointer check for the map. These functions return a valid
address for valid pages and the return was bogus anyway as it would have
left preemption and pagefaults disabled.

Signed-off-by: Thomas Gleixner 
Cc: Christian Koenig 
Cc: Huang Rui 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: dri-de...@lists.freedesktop.org
---
V3: New patch
---
 drivers/gpu/drm/ttm/ttm_bo_util.c |   20 
 1 file changed, 12 insertions(+), 8 deletions(-)

--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -181,13 +181,15 @@ static int ttm_copy_io_ttm_page(struct t
return -ENOMEM;
 
src = (void *)((unsigned long)src + (page << PAGE_SHIFT));
-   dst = kmap_atomic_prot(d, prot);
-   if (!dst)
-   return -ENOMEM;
+   /*
+* Ensure that a highmem page is mapped with the correct
+* pgprot. For non highmem the mapping is already there.
+*/
+   dst = kmap_local_page_prot(d, prot);
 
memcpy_fromio(dst, src, PAGE_SIZE);
 
-   kunmap_atomic(dst);
+   kunmap_local(dst);
 
return 0;
 }
@@ -203,13 +205,15 @@ static int ttm_copy_ttm_io_page(struct t
return -ENOMEM;
 
dst = (void *)((unsigned long)dst + (page << PAGE_SHIFT));
-   src = kmap_atomic_prot(s, prot);
-   if (!src)
-   return -ENOMEM;
+   /*
+* Ensure that a highmem page is mapped with the correct
+* pgprot. For non highmem the mapping is already there.
+*/
+   src = kmap_local_page_prot(s, prot);
 
memcpy_toio(dst, src, PAGE_SIZE);
 
-   kunmap_atomic(src);
+   kunmap_local(src);
 
return 0;
 }

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 09/37] arc/mm/highmem: Use generic kmap atomic implementation

2020-11-03 Thread Thomas Gleixner
Adopt the map ordering to match the other architectures and the generic
code. Also make the maximum entries limited and not dependend on the number
of CPUs. With the original implementation did the following calculation:

   nr_slots = mapsize >> PAGE_SHIFT;

The results in either 512 or 1024 total slots depending on
configuration. The total slots have to be divided by the number of CPUs to
get the number of slots per CPU (former KM_TYPE_NR). ARC supports up to 4k
CPUs, so this just falls apart in random ways depending on the number of
CPUs and the actual kmap (atomic) nesting. The comment in highmem.c:

 * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE
 *   slots across NR_CPUS would be more than sufficient (generic code defines
 *   KM_TYPE_NR as 20).

is just wrong. KM_TYPE_NR (now KM_MAX_IDX) is the number of slots per CPU
because kmap_local/atomic() needs to support nested mappings (thread,
softirq, interrupt). While KM_MAX_IDX might be overestimated, the above
reasoning is just wrong and clearly the highmem code was never tested with
any system with more than a few CPUs.

Use the default number of slots and fail the build when it does not
fit. Randomly failing at runtime is not a really good option.

Signed-off-by: Thomas Gleixner 
Cc: Vineet Gupta 
Cc: linux-snps-...@lists.infradead.org
---
V3: Make it actually more correct.
---
 arch/arc/Kconfig  |1 
 arch/arc/include/asm/highmem.h|   26 ++
 arch/arc/include/asm/kmap_types.h |   14 -
 arch/arc/mm/highmem.c |   54 +++---
 4 files changed, 26 insertions(+), 69 deletions(-)

--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -507,6 +507,7 @@ config LINUX_RAM_BASE
 config HIGHMEM
bool "High Memory Support"
select ARCH_DISCONTIGMEM_ENABLE
+   select KMAP_LOCAL
help
  With ARC 2G:2G address split, only upper 2G is directly addressable by
  kernel. Enable this to potentially allow access to rest of 2G and PAE
--- a/arch/arc/include/asm/highmem.h
+++ b/arch/arc/include/asm/highmem.h
@@ -9,17 +9,29 @@
 #ifdef CONFIG_HIGHMEM
 
 #include 
-#include 
+#include 
+
+#define FIXMAP_SIZEPGDIR_SIZE
+#define PKMAP_SIZE PGDIR_SIZE
 
 /* start after vmalloc area */
 #define FIXMAP_BASE(PAGE_OFFSET - FIXMAP_SIZE - PKMAP_SIZE)
-#define FIXMAP_SIZEPGDIR_SIZE  /* only 1 PGD worth */
-#define KM_TYPE_NR ((FIXMAP_SIZE >> PAGE_SHIFT)/NR_CPUS)
-#define FIXMAP_ADDR(nr)(FIXMAP_BASE + ((nr) << PAGE_SHIFT))
+
+#define FIX_KMAP_SLOTS (KM_MAX_IDX * NR_CPUS)
+#define FIX_KMAP_BEGIN (0UL)
+#define FIX_KMAP_END   ((FIX_KMAP_BEGIN + FIX_KMAP_SLOTS) - 1)
+
+#define FIXADDR_TOP(FIXMAP_BASE + (FIX_KMAP_END << PAGE_SHIFT))
+
+/*
+ * This should be converted to the asm-generic version, but of course this
+ * is needlessly different from all other architectures. Sigh - tglx
+ */
+#define __fix_to_virt(x)   (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)   (((FIXADDR_TOP - ((x) & PAGE_MASK))) >> 
PAGE_SHIFT)
 
 /* start after fixmap area */
 #define PKMAP_BASE (FIXMAP_BASE + FIXMAP_SIZE)
-#define PKMAP_SIZE PGDIR_SIZE
 #define LAST_PKMAP (PKMAP_SIZE >> PAGE_SHIFT)
 #define LAST_PKMAP_MASK(LAST_PKMAP - 1)
 #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT))
@@ -29,11 +41,13 @@
 
 extern void kmap_init(void);
 
+#define arch_kmap_local_post_unmap(vaddr)  \
+   local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE)
+
 static inline void flush_cache_kmaps(void)
 {
flush_cache_all();
 }
-
 #endif
 
 #endif
--- a/arch/arc/include/asm/kmap_types.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-/*
- * We primarily need to define KM_TYPE_NR here but that in turn
- * is a function of PGDIR_SIZE etc.
- * To avoid circular deps issue, put everything in asm/highmem.h
- */
-#endif
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -36,9 +36,8 @@
  *   This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which 
means
  *   2M of kvaddr space for typical config (8K page and 11:8:13 traversal 
split)
  *
- * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE
- *   slots across NR_CPUS would be more than sufficient (generic code defines
- *   KM_TYPE_NR as 20).
+ * - The fixed KMAP slots for kmap_local/atomic() require KM_MAX_IDX slots per
+ *   CPU. So the number of CPUs sharing a single PTE page is limited.
  *
  * - pkmap being preemptible, in theory could do with more than 256 concurrent
  *   mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse
@@ -47,48 +46,6 @@
  */
 
 extern pte_t * 

[patch V3 13/37] mips/mm/highmem: Switch to generic kmap atomic

2020-11-03 Thread Thomas Gleixner
No reason having the same code in every architecture

Signed-off-by: Thomas Gleixner 
Cc: Thomas Bogendoerfer 
Cc: linux-m...@vger.kernel.org
---
V3: Remove the kmap types cruft
---
 arch/mips/Kconfig  |1 
 arch/mips/include/asm/fixmap.h |4 -
 arch/mips/include/asm/highmem.h|6 +-
 arch/mips/include/asm/kmap_types.h |   13 --
 arch/mips/mm/highmem.c |   77 -
 arch/mips/mm/init.c|4 -
 6 files changed, 6 insertions(+), 99 deletions(-)

--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2719,6 +2719,7 @@ config WAR_MIPS34K_MISSED_ITLB
 config HIGHMEM
bool "High Memory Support"
depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && 
!CPU_MIPS32_3_5_EVA
+   select KMAP_LOCAL
 
 config CPU_SUPPORTS_HIGHMEM
bool
--- a/arch/mips/include/asm/fixmap.h
+++ b/arch/mips/include/asm/fixmap.h
@@ -17,7 +17,7 @@
 #include 
 #ifdef CONFIG_HIGHMEM
 #include 
-#include 
+#include 
 #endif
 
 /*
@@ -52,7 +52,7 @@ enum fixed_addresses {
 #ifdef CONFIG_HIGHMEM
/* reserved pte's for temporary kernel mappings */
FIX_KMAP_BEGIN = FIX_CMAP_END + 1,
-   FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
 #endif
__end_of_fixed_addresses
 };
--- a/arch/mips/include/asm/highmem.h
+++ b/arch/mips/include/asm/highmem.h
@@ -24,7 +24,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 
 /* declarations for highmem.c */
 extern unsigned long highstart_pfn, highend_pfn;
@@ -48,11 +48,11 @@ extern pte_t *pkmap_page_table;
 
 #define ARCH_HAS_KMAP_FLUSH_TLB
 extern void kmap_flush_tlb(unsigned long addr);
-extern void *kmap_atomic_pfn(unsigned long pfn);
 
 #define flush_cache_kmaps()BUG_ON(cpu_has_dc_aliases)
 
-extern void kmap_init(void);
+#define arch_kmap_local_post_map(vaddr, pteval)
local_flush_tlb_one(vaddr)
+#define arch_kmap_local_post_unmap(vaddr)  local_flush_tlb_one(vaddr)
 
 #endif /* __KERNEL__ */
 
--- a/arch/mips/include/asm/kmap_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-#define __WITH_KM_FENCE
-#endif
-
-#include 
-
-#undef __WITH_KM_FENCE
-
-#endif
--- a/arch/mips/mm/highmem.c
+++ b/arch/mips/mm/highmem.c
@@ -8,8 +8,6 @@
 #include 
 #include 
 
-static pte_t *kmap_pte;
-
 unsigned long highstart_pfn, highend_pfn;
 
 void kmap_flush_tlb(unsigned long addr)
@@ -17,78 +15,3 @@ void kmap_flush_tlb(unsigned long addr)
flush_tlb_one(addr);
 }
 EXPORT_SYMBOL(kmap_flush_tlb);
-
-void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
-{
-   unsigned long vaddr;
-   int idx, type;
-
-   type = kmap_atomic_idx_push();
-   idx = type + KM_TYPE_NR*smp_processor_id();
-   vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
-   BUG_ON(!pte_none(*(kmap_pte - idx)));
-#endif
-   set_pte(kmap_pte-idx, mk_pte(page, prot));
-   local_flush_tlb_one((unsigned long)vaddr);
-
-   return (void*) vaddr;
-}
-EXPORT_SYMBOL(kmap_atomic_high_prot);
-
-void kunmap_atomic_high(void *kvaddr)
-{
-   unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-   int type __maybe_unused;
-
-   if (vaddr < FIXADDR_START)
-   return;
-
-   type = kmap_atomic_idx();
-#ifdef CONFIG_DEBUG_HIGHMEM
-   {
-   int idx = type + KM_TYPE_NR * smp_processor_id();
-
-   BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
-
-   /*
-* force other mappings to Oops if they'll try to access
-* this pte without first remap it
-*/
-   pte_clear(_mm, vaddr, kmap_pte-idx);
-   local_flush_tlb_one(vaddr);
-   }
-#endif
-   kmap_atomic_idx_pop();
-}
-EXPORT_SYMBOL(kunmap_atomic_high);
-
-/*
- * This is the same as kmap_atomic() but can map memory that doesn't
- * have a struct page associated with it.
- */
-void *kmap_atomic_pfn(unsigned long pfn)
-{
-   unsigned long vaddr;
-   int idx, type;
-
-   preempt_disable();
-   pagefault_disable();
-
-   type = kmap_atomic_idx_push();
-   idx = type + KM_TYPE_NR*smp_processor_id();
-   vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-   set_pte(kmap_pte-idx, pfn_pte(pfn, PAGE_KERNEL));
-   flush_tlb_one(vaddr);
-
-   return (void*) vaddr;
-}
-
-void __init kmap_init(void)
-{
-   unsigned long kmap_vstart;
-
-   /* cache the first kmap pte */
-   kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
-   kmap_pte = virt_to_kpte(kmap_vstart);
-}
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -36,7 +36,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -402,9 +401,6 @@ void __init paging_init(void)
 
pagetable_init();
 
-#ifdef 

[patch V3 18/37] highmem: Get rid of kmap_types.h

2020-11-03 Thread Thomas Gleixner
The header is not longer used and on alpha, ia64, openrisc, parisc and um
it was completely unused anyway as these architectures have no highmem
support.

Signed-off-by: Thomas Gleixner 
---
V3: New patch
---
 arch/alpha/include/asm/kmap_types.h  |   15 ---
 arch/ia64/include/asm/kmap_types.h   |   13 -
 arch/openrisc/mm/init.c  |1 -
 arch/openrisc/mm/ioremap.c   |1 -
 arch/parisc/include/asm/kmap_types.h |   13 -
 arch/um/include/asm/fixmap.h |1 -
 arch/um/include/asm/kmap_types.h |   13 -
 include/asm-generic/Kbuild   |1 -
 include/asm-generic/kmap_types.h |   11 ---
 include/linux/highmem.h  |2 --
 10 files changed, 71 deletions(-)

--- a/arch/alpha/include/asm/kmap_types.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-/* Dummy header just to define km_type. */
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-#define  __WITH_KM_FENCE
-#endif
-
-#include 
-
-#undef __WITH_KM_FENCE
-
-#endif
--- a/arch/ia64/include/asm/kmap_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_IA64_KMAP_TYPES_H
-#define _ASM_IA64_KMAP_TYPES_H
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-#define  __WITH_KM_FENCE
-#endif
-
-#include 
-
-#undef __WITH_KM_FENCE
-
-#endif /* _ASM_IA64_KMAP_TYPES_H */
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -33,7 +33,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -15,7 +15,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
--- a/arch/parisc/include/asm/kmap_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-#define  __WITH_KM_FENCE
-#endif
-
-#include 
-
-#undef __WITH_KM_FENCE
-
-#endif
--- a/arch/um/include/asm/fixmap.h
+++ b/arch/um/include/asm/fixmap.h
@@ -3,7 +3,6 @@
 #define __UM_FIXMAP_H
 
 #include 
-#include 
 #include 
 #include 
 #include 
--- a/arch/um/include/asm/kmap_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* 
- * Copyright (C) 2002 Jeff Dike (jd...@karaya.com)
- */
-
-#ifndef __UM_KMAP_TYPES_H
-#define __UM_KMAP_TYPES_H
-
-/* No more #include "asm/arch/kmap_types.h" ! */
-
-#define KM_TYPE_NR 14
-
-#endif
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -30,7 +30,6 @@ mandatory-y += irq.h
 mandatory-y += irq_regs.h
 mandatory-y += irq_work.h
 mandatory-y += kdebug.h
-mandatory-y += kmap_types.h
 mandatory-y += kmap_size.h
 mandatory-y += kprobes.h
 mandatory-y += linkage.h
--- a/include/asm-generic/kmap_types.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_GENERIC_KMAP_TYPES_H
-#define _ASM_GENERIC_KMAP_TYPES_H
-
-#ifdef __WITH_KM_FENCE
-# define KM_TYPE_NR 41
-#else
-# define KM_TYPE_NR 20
-#endif
-
-#endif
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -29,8 +29,6 @@ static inline void invalidate_kernel_vma
 }
 #endif
 
-#include 
-
 /*
  * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft.
  */

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 30/37] highmem: Remove kmap_atomic_pfn()

2020-11-03 Thread Thomas Gleixner
No more users.

Signed-off-by: Thomas Gleixner 
---
V3: New patch
---
 include/linux/highmem-internal.h |   12 
 1 file changed, 12 deletions(-)

--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -99,13 +99,6 @@ static inline void *kmap_atomic(struct p
return kmap_atomic_prot(page, kmap_prot);
 }
 
-static inline void *kmap_atomic_pfn(unsigned long pfn)
-{
-   preempt_disable();
-   pagefault_disable();
-   return __kmap_local_pfn_prot(pfn, kmap_prot);
-}
-
 static inline void __kunmap_atomic(void *addr)
 {
kunmap_local_indexed(addr);
@@ -193,11 +186,6 @@ static inline void *kmap_atomic_prot(str
return kmap_atomic(page);
 }
 
-static inline void *kmap_atomic_pfn(unsigned long pfn)
-{
-   return kmap_atomic(pfn_to_page(pfn));
-}
-
 static inline void __kunmap_atomic(void *addr)
 {
 #ifdef ARCH_HAS_FLUSH_ON_KUNMAP

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 21/37] Documentation/io-mapping: Remove outdated blurb

2020-11-03 Thread Thomas Gleixner
The implementation details in the documentation are outdated and not really
helpful. Remove them.

Signed-off-by: Thomas Gleixner 
---
V3: New patch
---
 Documentation/driver-api/io-mapping.rst |   22 --
 1 file changed, 22 deletions(-)

--- a/Documentation/driver-api/io-mapping.rst
+++ b/Documentation/driver-api/io-mapping.rst
@@ -73,25 +73,3 @@ for pages mapped with io_mapping_map_wc.
 At driver close time, the io_mapping object must be freed::
 
void io_mapping_free(struct io_mapping *mapping)
-
-Current Implementation
-==
-
-The initial implementation of these functions uses existing mapping
-mechanisms and so provides only an abstraction layer and no new
-functionality.
-
-On 64-bit processors, io_mapping_create_wc calls ioremap_wc for the whole
-range, creating a permanent kernel-visible mapping to the resource. The
-map_atomic and map functions add the requested offset to the base of the
-virtual address returned by ioremap_wc.
-
-On 32-bit processors with HIGHMEM defined, io_mapping_map_atomic_wc uses
-kmap_atomic_pfn to map the specified page in an atomic fashion;
-kmap_atomic_pfn isn't really supposed to be used with device pages, but it
-provides an efficient mapping for this usage.
-
-On 32-bit processors without HIGHMEM defined, io_mapping_map_atomic_wc and
-io_mapping_map_wc both use ioremap_wc, a terribly inefficient function which
-performs an IPI to inform all processors about the new mapping. This results
-in a significant performance penalty.

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 25/37] mm/highmem: Provide kmap_local*

2020-11-03 Thread Thomas Gleixner
Now that the kmap atomic index is stored in task struct provide a
preemptible variant. On context switch the maps of an outgoing task are
removed and the map of the incoming task are restored. That's obviously
slow, but highmem is slow anyway.

The kmap_local.*() functions can be invoked from both preemptible and
atomic context. kmap local sections disable migration to keep the resulting
virtual mapping address correct, but disable neither pagefaults nor
preemption.

A wholesale conversion of kmap_atomic to be fully preemptible is not
possible because some of the usage sites might rely on the preemption
disable for serialization or on the implicit pagefault disable. Needs to be
done on a case by case basis.

Signed-off-by: Thomas Gleixner 
---
V3: Move migrate disable into the actual highmem mapping code so it only
affects real highmem mappings.
   
V2: Make it more consistent and add commentry
---
 include/linux/highmem-internal.h |   48 +++
 include/linux/highmem.h  |   43 +-
 mm/highmem.c |6 
 3 files changed, 81 insertions(+), 16 deletions(-)

--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -69,6 +69,26 @@ static inline void kmap_flush_unused(voi
__kmap_flush_unused();
 }
 
+static inline void *kmap_local_page(struct page *page)
+{
+   return __kmap_local_page_prot(page, kmap_prot);
+}
+
+static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
+{
+   return __kmap_local_page_prot(page, prot);
+}
+
+static inline void *kmap_local_pfn(unsigned long pfn)
+{
+   return __kmap_local_pfn_prot(pfn, kmap_prot);
+}
+
+static inline void __kunmap_local(void *vaddr)
+{
+   kunmap_local_indexed(vaddr);
+}
+
 static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 {
preempt_disable();
@@ -141,6 +161,28 @@ static inline void kunmap(struct page *p
 #endif
 }
 
+static inline void *kmap_local_page(struct page *page)
+{
+   return page_address(page);
+}
+
+static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
+{
+   return kmap_local_page(page);
+}
+
+static inline void *kmap_local_pfn(unsigned long pfn)
+{
+   return kmap_local_page(pfn_to_page(pfn));
+}
+
+static inline void __kunmap_local(void *addr)
+{
+#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
+   kunmap_flush_on_unmap(addr);
+#endif
+}
+
 static inline void *kmap_atomic(struct page *page)
 {
preempt_disable();
@@ -182,4 +224,10 @@ do {   
\
__kunmap_atomic(__addr);\
 } while (0)
 
+#define kunmap_local(__addr)   \
+do {   \
+   BUILD_BUG_ON(__same_type((__addr), struct page *)); \
+   __kunmap_local(__addr); \
+} while (0)
+
 #endif
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -60,24 +60,22 @@ static inline struct page *kmap_to_page(
 static inline void kmap_flush_unused(void);
 
 /**
- * kmap_atomic - Atomically map a page for temporary usage
+ * kmap_local_page - Map a page for temporary usage
  * @page:  Pointer to the page to be mapped
  *
  * Returns: The virtual address of the mapping
  *
- * Side effect: On return pagefaults and preemption are disabled.
- *
  * Can be invoked from any context.
  *
  * Requires careful handling when nesting multiple mappings because the map
  * management is stack based. The unmap has to be in the reverse order of
  * the map operation:
  *
- * addr1 = kmap_atomic(page1);
- * addr2 = kmap_atomic(page2);
+ * addr1 = kmap_local_page(page1);
+ * addr2 = kmap_local_page(page2);
  * ...
- * kunmap_atomic(addr2);
- * kunmap_atomic(addr1);
+ * kunmap_local(addr2);
+ * kunmap_local(addr1);
  *
  * Unmapping addr1 before addr2 is invalid and causes malfunction.
  *
@@ -88,10 +86,26 @@ static inline void kmap_flush_unused(voi
  * virtual address of the direct mapping. Only real highmem pages are
  * temporarily mapped.
  *
- * While it is significantly faster than kmap() it comes with restrictions
- * about the pointer validity and the side effects of disabling page faults
- * and preemption. Use it only when absolutely necessary, e.g. from non
- * preemptible contexts.
+ * While it is significantly faster than kmap() for the higmem case it
+ * comes with restrictions about the pointer validity. Only use when really
+ * necessary.
+ *
+ * On HIGHMEM enabled systems mapping a highmem page has the side effect of
+ * disabling migration in order to keep the virtual address stable across
+ * preemption. No caller of kmap_local_page() can rely on this side effect.
+ */
+static inline void *kmap_local_page(struct page *page);
+
+/**
+ * kmap_atomic - Atomically map a page for temporary usage - Deprecated!
+ * @page:  Pointer to the page to be 

[patch V3 35/37] drm/nouveau/device: Replace io_mapping_map_atomic_wc()

2020-11-03 Thread Thomas Gleixner
Neither fbmem_peek() nor fbmem_poke() require to disable pagefaults and
preemption as a side effect of io_mapping_map_atomic_wc().

Use io_mapping_map_local_wc() instead.

Signed-off-by: Thomas Gleixner 
Cc: Ben Skeggs 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: dri-de...@lists.freedesktop.org
Cc: nouv...@lists.freedesktop.org
---
V3: New patch
---
 drivers/gpu/drm/nouveau/nvkm/subdev/devinit/fbmem.h |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/fbmem.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/fbmem.h
@@ -60,19 +60,19 @@ fbmem_fini(struct io_mapping *fb)
 static inline u32
 fbmem_peek(struct io_mapping *fb, u32 off)
 {
-   u8 __iomem *p = io_mapping_map_atomic_wc(fb, off & PAGE_MASK);
+   u8 __iomem *p = io_mapping_map_local_wc(fb, off & PAGE_MASK);
u32 val = ioread32(p + (off & ~PAGE_MASK));
-   io_mapping_unmap_atomic(p);
+   io_mapping_unmap_local(p);
return val;
 }
 
 static inline void
 fbmem_poke(struct io_mapping *fb, u32 off, u32 val)
 {
-   u8 __iomem *p = io_mapping_map_atomic_wc(fb, off & PAGE_MASK);
+   u8 __iomem *p = io_mapping_map_local_wc(fb, off & PAGE_MASK);
iowrite32(val, p + (off & ~PAGE_MASK));
wmb();
-   io_mapping_unmap_atomic(p);
+   io_mapping_unmap_local(p);
 }
 
 static inline bool

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 17/37] xtensa/mm/highmem: Switch to generic kmap atomic

2020-11-03 Thread Thomas Gleixner
No reason having the same code in every architecture

Signed-off-by: Thomas Gleixner 
Cc: Chris Zankel 
Cc: Max Filippov 
Cc: linux-xte...@linux-xtensa.org
---
V3: Remove the kmap types cruft
---
 arch/xtensa/Kconfig   |1 
 arch/xtensa/include/asm/fixmap.h  |4 +--
 arch/xtensa/include/asm/highmem.h |   12 -
 arch/xtensa/mm/highmem.c  |   46 --
 4 files changed, 18 insertions(+), 45 deletions(-)

--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -666,6 +666,7 @@ endchoice
 config HIGHMEM
bool "High Memory Support"
depends on MMU
+   select KMAP_LOCAL
help
  Linux can use the full amount of RAM in the system by
  default. However, the default MMUv2 setup only maps the
--- a/arch/xtensa/include/asm/fixmap.h
+++ b/arch/xtensa/include/asm/fixmap.h
@@ -16,7 +16,7 @@
 #ifdef CONFIG_HIGHMEM
 #include 
 #include 
-#include 
+#include 
 #endif
 
 /*
@@ -39,7 +39,7 @@ enum fixed_addresses {
/* reserved pte's for temporary kernel mappings */
FIX_KMAP_BEGIN,
FIX_KMAP_END = FIX_KMAP_BEGIN +
-   (KM_TYPE_NR * NR_CPUS * DCACHE_N_COLORS) - 1,
+   (KM_MAX_IDX * NR_CPUS * DCACHE_N_COLORS) - 1,
 #endif
__end_of_fixed_addresses
 };
--- a/arch/xtensa/include/asm/highmem.h
+++ b/arch/xtensa/include/asm/highmem.h
@@ -16,9 +16,8 @@
 #include 
 #include 
 #include 
-#include 
 
-#define PKMAP_BASE ((FIXADDR_START - \
+#define PKMAP_BASE ((FIXADDR_START -   \
  (LAST_PKMAP + 1) * PAGE_SIZE) & PMD_MASK)
 #define LAST_PKMAP (PTRS_PER_PTE * DCACHE_N_COLORS)
 #define LAST_PKMAP_MASK(LAST_PKMAP - 1)
@@ -68,6 +67,15 @@ static inline void flush_cache_kmaps(voi
flush_cache_all();
 }
 
+enum fixed_addresses kmap_local_map_idx(int type, unsigned long pfn);
+#define arch_kmap_local_map_idxkmap_local_map_idx
+
+enum fixed_addresses kmap_local_unmap_idx(int type, unsigned long addr);
+#define arch_kmap_local_unmap_idx  kmap_local_unmap_idx
+
+#define arch_kmap_local_post_unmap(vaddr)  \
+   local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE)
+
 void kmap_init(void);
 
 #endif
--- a/arch/xtensa/mm/highmem.c
+++ b/arch/xtensa/mm/highmem.c
@@ -12,8 +12,6 @@
 #include 
 #include 
 
-static pte_t *kmap_pte;
-
 #if DCACHE_WAY_SIZE > PAGE_SIZE
 unsigned int last_pkmap_nr_arr[DCACHE_N_COLORS];
 wait_queue_head_t pkmap_map_wait_arr[DCACHE_N_COLORS];
@@ -33,59 +31,25 @@ static inline void kmap_waitqueues_init(
 
 static inline enum fixed_addresses kmap_idx(int type, unsigned long color)
 {
-   return (type + KM_TYPE_NR * smp_processor_id()) * DCACHE_N_COLORS +
+   return (type + KM_MAX_IDX * smp_processor_id()) * DCACHE_N_COLORS +
color;
 }
 
-void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
+enum fixed_addresses kmap_local_map_idx(int type, unsigned long pfn)
 {
-   enum fixed_addresses idx;
-   unsigned long vaddr;
-
-   idx = kmap_idx(kmap_atomic_idx_push(),
-  DCACHE_ALIAS(page_to_phys(page)));
-   vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
-   BUG_ON(!pte_none(*(kmap_pte + idx)));
-#endif
-   set_pte(kmap_pte + idx, mk_pte(page, prot));
-
-   return (void *)vaddr;
+   return kmap_idx(type, DCACHE_ALIAS(pfn << PAGE_SHIFT));
 }
-EXPORT_SYMBOL(kmap_atomic_high_prot);
 
-void kunmap_atomic_high(void *kvaddr)
+enum fixed_addresses kmap_local_unmap_idx(int type, unsigned long addr)
 {
-   if (kvaddr >= (void *)FIXADDR_START &&
-   kvaddr < (void *)FIXADDR_TOP) {
-   int idx = kmap_idx(kmap_atomic_idx(),
-  DCACHE_ALIAS((unsigned long)kvaddr));
-
-   /*
-* Force other mappings to Oops if they'll try to access this
-* pte without first remap it.  Keeping stale mappings around
-* is a bad idea also, in case the page changes cacheability
-* attributes or becomes a protected page in a hypervisor.
-*/
-   pte_clear(_mm, kvaddr, kmap_pte + idx);
-   local_flush_tlb_kernel_range((unsigned long)kvaddr,
-(unsigned long)kvaddr + PAGE_SIZE);
-
-   kmap_atomic_idx_pop();
-   }
+   return kmap_idx(type, DCACHE_ALIAS(addr));
 }
-EXPORT_SYMBOL(kunmap_atomic_high);
 
 void __init kmap_init(void)
 {
-   unsigned long kmap_vstart;
-
/* Check if this memory layout is broken because PKMAP overlaps
 * page table.
 */
BUILD_BUG_ON(PKMAP_BASE < TLBTEMP_BASE_1 + TLBTEMP_SIZE);
-   /* cache the first kmap pte */
-   kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
-   kmap_pte = virt_to_kpte(kmap_vstart);
kmap_waitqueues_init();
 }


[patch V3 28/37] mips/crashdump: Simplify copy_oldmem_page()

2020-11-03 Thread Thomas Gleixner
Replace kmap_atomic_pfn() with kmap_local_pfn() which is preemptible and
can take page faults.

Remove the indirection of the dump page and the related cruft which is not
longer required.

Signed-off-by: Thomas Gleixner 
Cc: Thomas Bogendoerfer 
Cc: linux-m...@vger.kernel.org
---
V3: New patch
---
 arch/mips/kernel/crash_dump.c |   42 +++---
 1 file changed, 7 insertions(+), 35 deletions(-)

--- a/arch/mips/kernel/crash_dump.c
+++ b/arch/mips/kernel/crash_dump.c
@@ -5,8 +5,6 @@
 #include 
 #include 
 
-static void *kdump_buf_page;
-
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
@@ -17,51 +15,25 @@ static void *kdump_buf_page;
  * @userbuf: if set, @buf is in user address space, use copy_to_user(),
  * otherwise @buf is in kernel address space, use memcpy().
  *
- * Copy a page from "oldmem". For this page, there is no pte mapped
+ * Copy a page from "oldmem". For this page, there might be no pte mapped
  * in the current kernel.
- *
- * Calling copy_to_user() in atomic context is not desirable. Hence first
- * copying the data to a pre-allocated kernel page and then copying to user
- * space in non-atomic context.
  */
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
-size_t csize, unsigned long offset, int userbuf)
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+unsigned long offset, int userbuf)
 {
void  *vaddr;
 
if (!csize)
return 0;
 
-   vaddr = kmap_atomic_pfn(pfn);
+   vaddr = kmap_local_pfn(pfn);
 
if (!userbuf) {
-   memcpy(buf, (vaddr + offset), csize);
-   kunmap_atomic(vaddr);
+   memcpy(buf, vaddr + offset, csize);
} else {
-   if (!kdump_buf_page) {
-   pr_warn("Kdump: Kdump buffer page not allocated\n");
-
-   return -EFAULT;
-   }
-   copy_page(kdump_buf_page, vaddr);
-   kunmap_atomic(vaddr);
-   if (copy_to_user(buf, (kdump_buf_page + offset), csize))
-   return -EFAULT;
+   if (copy_to_user(buf, vaddr + offset, csize))
+   csize = -EFAULT;
}
 
return csize;
 }
-
-static int __init kdump_buf_page_init(void)
-{
-   int ret = 0;
-
-   kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
-   if (!kdump_buf_page) {
-   pr_warn("Kdump: Failed to allocate kdump buffer page\n");
-   ret = -ENOMEM;
-   }
-
-   return ret;
-}
-arch_initcall(kdump_buf_page_init);

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 32/37] drm/vmgfx: Replace kmap_atomic()

2020-11-03 Thread Thomas Gleixner
There is no reason to disable pagefaults and preemption as a side effect of
kmap_atomic_prot().

Use kmap_local_page_prot() instead and document the reasoning for the
mapping usage with the given pgprot.

Remove the NULL pointer check for the map. These functions return a valid
address for valid pages and the return was bogus anyway as it would have
left preemption and pagefaults disabled.

Signed-off-by: Thomas Gleixner 
Cc: VMware Graphics 
Cc: Roland Scheidegger 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: dri-de...@lists.freedesktop.org
---
V3: New patch
---
 drivers/gpu/drm/vmwgfx/vmwgfx_blit.c |   30 --
 1 file changed, 12 insertions(+), 18 deletions(-)

--- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
@@ -375,12 +375,12 @@ static int vmw_bo_cpu_blit_line(struct v
copy_size = min_t(u32, copy_size, PAGE_SIZE - src_page_offset);
 
if (unmap_src) {
-   kunmap_atomic(d->src_addr);
+   kunmap_local(d->src_addr);
d->src_addr = NULL;
}
 
if (unmap_dst) {
-   kunmap_atomic(d->dst_addr);
+   kunmap_local(d->dst_addr);
d->dst_addr = NULL;
}
 
@@ -388,12 +388,8 @@ static int vmw_bo_cpu_blit_line(struct v
if (WARN_ON_ONCE(dst_page >= d->dst_num_pages))
return -EINVAL;
 
-   d->dst_addr =
-   kmap_atomic_prot(d->dst_pages[dst_page],
-d->dst_prot);
-   if (!d->dst_addr)
-   return -ENOMEM;
-
+   d->dst_addr = 
kmap_local_page_prot(d->dst_pages[dst_page],
+  d->dst_prot);
d->mapped_dst = dst_page;
}
 
@@ -401,12 +397,8 @@ static int vmw_bo_cpu_blit_line(struct v
if (WARN_ON_ONCE(src_page >= d->src_num_pages))
return -EINVAL;
 
-   d->src_addr =
-   kmap_atomic_prot(d->src_pages[src_page],
-d->src_prot);
-   if (!d->src_addr)
-   return -ENOMEM;
-
+   d->src_addr = 
kmap_local_page_prot(d->src_pages[src_page],
+  d->src_prot);
d->mapped_src = src_page;
}
diff->do_cpy(diff, d->dst_addr + dst_page_offset,
@@ -436,8 +428,10 @@ static int vmw_bo_cpu_blit_line(struct v
  *
  * Performs a CPU blit from one buffer object to another avoiding a full
  * bo vmap which may exhaust- or fragment vmalloc space.
- * On supported architectures (x86), we're using kmap_atomic which avoids
- * cross-processor TLB- and cache flushes and may, on non-HIGHMEM systems
+ *
+ * On supported architectures (x86), we're using kmap_local_prot() which
+ * avoids cross-processor TLB- and cache flushes. kmap_local_prot() will
+ * either map a highmem page with the proper pgprot on HIGHMEM=y systems or
  * reference already set-up mappings.
  *
  * Neither of the buffer objects may be placed in PCI memory
@@ -500,9 +494,9 @@ int vmw_bo_cpu_blit(struct ttm_buffer_ob
}
 out:
if (d.src_addr)
-   kunmap_atomic(d.src_addr);
+   kunmap_local(d.src_addr);
if (d.dst_addr)
-   kunmap_atomic(d.dst_addr);
+   kunmap_local(d.dst_addr);
 
return ret;
 }

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 29/37] ARM: mm: Replace kmap_atomic_pfn()

2020-11-03 Thread Thomas Gleixner
There is no requirement to disable pagefaults and preemption for these
cache management mappings.

Replace kmap_atomic_pfn() with kmap_local_pfn(). This allows to remove
kmap_atomic_pfn() in the next step.

Signed-off-by: Thomas Gleixner 
Cc: Russell King 
Cc: linux-arm-ker...@lists.infradead.org
---
V3: New patch
---
 arch/arm/mm/cache-feroceon-l2.c |6 +++---
 arch/arm/mm/cache-xsc3l2.c  |4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -49,9 +49,9 @@ static inline unsigned long l2_get_va(un
 * we simply install a virtual mapping for it only for the
 * TLB lookup to occur, hence no need to flush the untouched
 * memory mapping afterwards (note: a cache flush may happen
-* in some circumstances depending on the path taken in kunmap_atomic).
+* in some circumstances depending on the path taken in kunmap_local).
 */
-   void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT);
+   void *vaddr = kmap_local_pfn(paddr >> PAGE_SHIFT);
return (unsigned long)vaddr + (paddr & ~PAGE_MASK);
 #else
return __phys_to_virt(paddr);
@@ -61,7 +61,7 @@ static inline unsigned long l2_get_va(un
 static inline void l2_put_va(unsigned long vaddr)
 {
 #ifdef CONFIG_HIGHMEM
-   kunmap_atomic((void *)vaddr);
+   kunmap_local((void *)vaddr);
 #endif
 }
 
--- a/arch/arm/mm/cache-xsc3l2.c
+++ b/arch/arm/mm/cache-xsc3l2.c
@@ -59,7 +59,7 @@ static inline void l2_unmap_va(unsigned
 {
 #ifdef CONFIG_HIGHMEM
if (va != -1)
-   kunmap_atomic((void *)va);
+   kunmap_local((void *)va);
 #endif
 }
 
@@ -75,7 +75,7 @@ static inline unsigned long l2_map_va(un
 * in place for it.
 */
l2_unmap_va(prev_va);
-   va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT);
+   va = (unsigned long)kmap_local_pfn(pa >> PAGE_SHIFT);
}
return va + (pa_offset >> (32 - PAGE_SHIFT));
 #else

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 22/37] highmem: High implementation details and document API

2020-11-03 Thread Thomas Gleixner
Move the gory details of kmap & al into a private header and only document
the interfaces which are usable by drivers.

Signed-off-by: Thomas Gleixner 
---
V3: New patch
---
 include/linux/highmem-internal.h |  174 +
 include/linux/highmem.h  |  270 ++-
 mm/highmem.c |   11 -
 3 files changed, 276 insertions(+), 179 deletions(-)

--- /dev/null
+++ b/include/linux/highmem-internal.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_HIGHMEM_INTERNAL_H
+#define _LINUX_HIGHMEM_INTERNAL_H
+
+/*
+ * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft.
+ */
+#ifdef CONFIG_KMAP_LOCAL
+void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
+void *__kmap_local_page_prot(struct page *page, pgprot_t prot);
+void kunmap_local_indexed(void *vaddr);
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#include 
+
+#ifndef ARCH_HAS_KMAP_FLUSH_TLB
+static inline void kmap_flush_tlb(unsigned long addr) { }
+#endif
+
+#ifndef kmap_prot
+#define kmap_prot PAGE_KERNEL
+#endif
+
+void *kmap_high(struct page *page);
+void kunmap_high(struct page *page);
+void __kmap_flush_unused(void);
+struct page *__kmap_to_page(void *addr);
+
+static inline void *kmap(struct page *page)
+{
+   void *addr;
+
+   might_sleep();
+   if (!PageHighMem(page))
+   addr = page_address(page);
+   else
+   addr = kmap_high(page);
+   kmap_flush_tlb((unsigned long)addr);
+   return addr;
+}
+
+static inline void kunmap(struct page *page)
+{
+   might_sleep();
+   if (!PageHighMem(page))
+   return;
+   kunmap_high(page);
+}
+
+static inline struct page *kmap_to_page(void *addr)
+{
+   return __kmap_to_page(addr);
+}
+
+static inline void kmap_flush_unused(void)
+{
+   __kmap_flush_unused();
+}
+
+static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+{
+   preempt_disable();
+   pagefault_disable();
+   return __kmap_local_page_prot(page, prot);
+}
+
+static inline void *kmap_atomic(struct page *page)
+{
+   return kmap_atomic_prot(page, kmap_prot);
+}
+
+static inline void *kmap_atomic_pfn(unsigned long pfn)
+{
+   preempt_disable();
+   pagefault_disable();
+   return __kmap_local_pfn_prot(pfn, kmap_prot);
+}
+
+static inline void __kunmap_atomic(void *addr)
+{
+   kunmap_local_indexed(addr);
+   pagefault_enable();
+   preempt_enable();
+}
+
+unsigned int __nr_free_highpages(void);
+extern atomic_long_t _totalhigh_pages;
+
+static inline unsigned int nr_free_highpages(void)
+{
+   return __nr_free_highpages();
+}
+
+static inline unsigned long totalhigh_pages(void)
+{
+   return (unsigned long)atomic_long_read(&_totalhigh_pages);
+}
+
+static inline void totalhigh_pages_inc(void)
+{
+   atomic_long_inc(&_totalhigh_pages);
+}
+
+static inline void totalhigh_pages_add(long count)
+{
+   atomic_long_add(count, &_totalhigh_pages);
+}
+
+#else /* CONFIG_HIGHMEM */
+
+static inline struct page *kmap_to_page(void *addr)
+{
+   return virt_to_page(addr);
+}
+
+static inline void *kmap(struct page *page)
+{
+   might_sleep();
+   return page_address(page);
+}
+
+static inline void kunmap_high(struct page *page) { }
+static inline void kmap_flush_unused(void) { }
+
+static inline void kunmap(struct page *page)
+{
+#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
+   kunmap_flush_on_unmap(page_address(page));
+#endif
+}
+
+static inline void *kmap_atomic(struct page *page)
+{
+   preempt_disable();
+   pagefault_disable();
+   return page_address(page);
+}
+
+static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+{
+   return kmap_atomic(page);
+}
+
+static inline void *kmap_atomic_pfn(unsigned long pfn)
+{
+   return kmap_atomic(pfn_to_page(pfn));
+}
+
+static inline void __kunmap_atomic(void *addr)
+{
+#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
+   kunmap_flush_on_unmap(addr);
+#endif
+   pagefault_enable();
+   preempt_enable();
+}
+
+static inline unsigned int nr_free_highpages(void) { return 0; }
+static inline unsigned long totalhigh_pages(void) { return 0UL; }
+
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * Prevent people trying to call kunmap_atomic() as if it were kunmap()
+ * kunmap_atomic() should get the return value of kmap_atomic, not the page.
+ */
+#define kunmap_atomic(__addr)  \
+do {   \
+   BUILD_BUG_ON(__same_type((__addr), struct page *)); \
+   __kunmap_atomic(__addr);\
+} while (0)
+
+#endif
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -11,199 +11,125 @@
 
 #include 
 
-#ifndef ARCH_HAS_FLUSH_ANON_PAGE
-static inline void flush_anon_page(struct vm_area_struct *vma, struct page 
*page, unsigned long vmaddr)
-{
-}
-#endif
+#include "highmem-internal.h"
 
-#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE

[patch V3 36/37] drm/i915: Replace io_mapping_map_atomic_wc()

2020-11-03 Thread Thomas Gleixner
None of these mapping requires the side effect of disabling pagefaults and
preemption.

Use io_mapping_map_local_wc() instead, and clean up gtt_user_read() and
gtt_user_write() to use a plain copy_from_user() as the local maps are not
disabling pagefaults.

Signed-off-by: Thomas Gleixner 
Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: intel-...@lists.freedesktop.org
Cc: dri-de...@lists.freedesktop.org
---
V3: New patch
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c |7 +---
 drivers/gpu/drm/i915/i915_gem.c|   40 -
 drivers/gpu/drm/i915/selftests/i915_gem.c  |4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c  |8 ++---
 4 files changed, 22 insertions(+), 37 deletions(-)

--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1081,7 +1081,7 @@ static void reloc_cache_reset(struct rel
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 
intel_gt_flush_ggtt_writes(ggtt->vm.gt);
-   io_mapping_unmap_atomic((void __iomem *)vaddr);
+   io_mapping_unmap_local((void __iomem *)vaddr);
 
if (drm_mm_node_allocated(>node)) {
ggtt->vm.clear_range(>vm,
@@ -1147,7 +1147,7 @@ static void *reloc_iomap(struct drm_i915
 
if (cache->vaddr) {
intel_gt_flush_ggtt_writes(ggtt->vm.gt);
-   io_mapping_unmap_atomic((void __force __iomem *) 
unmask_page(cache->vaddr));
+   io_mapping_unmap_local((void __force __iomem *) 
unmask_page(cache->vaddr));
} else {
struct i915_vma *vma;
int err;
@@ -1195,8 +1195,7 @@ static void *reloc_iomap(struct drm_i915
offset += page << PAGE_SHIFT;
}
 
-   vaddr = (void __force *)io_mapping_map_atomic_wc(>iomap,
-offset);
+   vaddr = (void __force *)io_mapping_map_local_wc(>iomap, offset);
cache->page = page;
cache->vaddr = (unsigned long)vaddr;
 
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -379,22 +379,15 @@ gtt_user_read(struct io_mapping *mapping
  char __user *user_data, int length)
 {
void __iomem *vaddr;
-   unsigned long unwritten;
+   bool fail = false;
 
/* We can use the cpu mem copy function because this is X86. */
-   vaddr = io_mapping_map_atomic_wc(mapping, base);
-   unwritten = __copy_to_user_inatomic(user_data,
-   (void __force *)vaddr + offset,
-   length);
-   io_mapping_unmap_atomic(vaddr);
-   if (unwritten) {
-   vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
-   unwritten = copy_to_user(user_data,
-(void __force *)vaddr + offset,
-length);
-   io_mapping_unmap(vaddr);
-   }
-   return unwritten;
+   vaddr = io_mapping_map_local_wc(mapping, base);
+   if (copy_to_user(user_data, (void __force *)vaddr + offset, length))
+   fail = true;
+   io_mapping_unmap_local(vaddr);
+
+   return fail;
 }
 
 static int
@@ -557,21 +550,14 @@ ggtt_write(struct io_mapping *mapping,
   char __user *user_data, int length)
 {
void __iomem *vaddr;
-   unsigned long unwritten;
+   bool fail = false;
 
/* We can use the cpu mem copy function because this is X86. */
-   vaddr = io_mapping_map_atomic_wc(mapping, base);
-   unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + 
offset,
- user_data, length);
-   io_mapping_unmap_atomic(vaddr);
-   if (unwritten) {
-   vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
-   unwritten = copy_from_user((void __force *)vaddr + offset,
-  user_data, length);
-   io_mapping_unmap(vaddr);
-   }
-
-   return unwritten;
+   vaddr = io_mapping_map_local_wc(mapping, base);
+   if (copy_from_user((void __force *)vaddr + offset, user_data, length))
+   fail = true;
+   io_mapping_unmap_local(vaddr);
+   return fail;
 }
 
 /**
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -57,12 +57,12 @@ static void trash_stolen(struct drm_i915
 
ggtt->vm.insert_page(>vm, dma, slot, I915_CACHE_NONE, 0);
 
-   s = io_mapping_map_atomic_wc(>iomap, slot);
+   s = io_mapping_map_local_wc(>iomap, slot);
for (x = 0; x < PAGE_SIZE / sizeof(u32); x++) {
prng = next_pseudo_random32(prng);
iowrite32(prng, [x]);
}
-   

[patch V3 26/37] io-mapping: Provide iomap_local variant

2020-11-03 Thread Thomas Gleixner
Similar to kmap local provide a iomap local variant which only disables
migration, but neither disables pagefaults nor preemption.

Signed-off-by: Thomas Gleixner 
---
V3: Restrict migrate disable to the 32bit mapping case and update documentation.

V2: Split out from the large combo patch and add the !IOMAP_ATOMIC variants
---
 Documentation/driver-api/io-mapping.rst |   76 +++-
 include/linux/io-mapping.h  |   30 +++-
 2 files changed, 74 insertions(+), 32 deletions(-)

--- a/Documentation/driver-api/io-mapping.rst
+++ b/Documentation/driver-api/io-mapping.rst
@@ -20,55 +20,71 @@ as it would consume too much of the kern
 mappable, while 'size' indicates how large a mapping region to
 enable. Both are in bytes.
 
-This _wc variant provides a mapping which may only be used
-with the io_mapping_map_atomic_wc or io_mapping_map_wc.
+This _wc variant provides a mapping which may only be used with
+io_mapping_map_atomic_wc(), io_mapping_map_local_wc() or
+io_mapping_map_wc().
+
+With this mapping object, individual pages can be mapped either temporarily
+or long term, depending on the requirements. Of course, temporary maps are
+more efficient. They come in two flavours::
 
-With this mapping object, individual pages can be mapped either atomically
-or not, depending on the necessary scheduling environment. Of course, atomic
-maps are more efficient::
+   void *io_mapping_map_local_wc(struct io_mapping *mapping,
+ unsigned long offset)
 
void *io_mapping_map_atomic_wc(struct io_mapping *mapping,
   unsigned long offset)
 
-'offset' is the offset within the defined mapping region.
-Accessing addresses beyond the region specified in the
-creation function yields undefined results. Using an offset
-which is not page aligned yields an undefined result. The
-return value points to a single page in CPU address space.
-
-This _wc variant returns a write-combining map to the
-page and may only be used with mappings created by
-io_mapping_create_wc
+'offset' is the offset within the defined mapping region.  Accessing
+addresses beyond the region specified in the creation function yields
+undefined results. Using an offset which is not page aligned yields an
+undefined result. The return value points to a single page in CPU address
+space.
 
-Note that the task may not sleep while holding this page
-mapped.
+This _wc variant returns a write-combining map to the page and may only be
+used with mappings created by io_mapping_create_wc()
 
-::
+Temporary mappings are only valid in the context of the caller. The mapping
+is not guaranteed to be globaly visible.
 
-   void io_mapping_unmap_atomic(void *vaddr)
+io_mapping_map_local_wc() has a side effect on X86 32bit as it disables
+migration to make the mapping code work. No caller can rely on this side
+effect.
+
+io_mapping_map_atomic_wc() has the side effect of disabling preemption and
+pagefaults. Don't use in new code. Use io_mapping_map_local_wc() instead.
 
-'vaddr' must be the value returned by the last
-io_mapping_map_atomic_wc call. This unmaps the specified
-page and allows the task to sleep once again.
+Nested mappings need to be undone in reverse order because the mapping
+code uses a stack for keeping track of them::
 
-If you need to sleep while holding the lock, you can use the non-atomic
-variant, although they may be significantly slower.
+ addr1 = io_mapping_map_local_wc(map1, offset1);
+ addr2 = io_mapping_map_local_wc(map2, offset2);
+ ...
+ io_mapping_unmap_local(addr2);
+ io_mapping_unmap_local(addr1);
 
-::
+The mappings are released with::
+
+   void io_mapping_unmap_local(void *vaddr)
+   void io_mapping_unmap_atomic(void *vaddr)
+
+'vaddr' must be the value returned by the last io_mapping_map_local_wc() or
+io_mapping_map_atomic_wc() call. This unmaps the specified mapping and
+undoes the side effects of the mapping functions.
+
+If you need to sleep while holding a mapping, you can use the regular
+variant, although this may be significantly slower::
 
void *io_mapping_map_wc(struct io_mapping *mapping,
unsigned long offset)
 
-This works like io_mapping_map_atomic_wc except it allows
-the task to sleep while holding the page mapped.
-
+This works like io_mapping_map_atomic/local_wc() except it has no side
+effects and the pointer is globaly visible.
 
-::
+The mappings are released with::
 
void io_mapping_unmap(void *vaddr)
 
-This works like io_mapping_unmap_atomic, except it is used
-for pages mapped with io_mapping_map_wc.
+Use for pages mapped with io_mapping_map_wc().
 
 At driver close time, the io_mapping object must be freed::
 
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -83,6 +83,21 @@ io_mapping_unmap_atomic(void __iomem *va
 }
 
 static inline void __iomem *
+io_mapping_map_local_wc(struct io_mapping *mapping, unsigned 

[patch V3 33/37] highmem: Remove kmap_atomic_prot()

2020-11-03 Thread Thomas Gleixner
No more users.

Signed-off-by: Thomas Gleixner 
---
V3: New patch
---
 include/linux/highmem-internal.h |   14 ++
 1 file changed, 2 insertions(+), 12 deletions(-)

--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -87,16 +87,11 @@ static inline void __kunmap_local(void *
kunmap_local_indexed(vaddr);
 }
 
-static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+static inline void *kmap_atomic(struct page *page)
 {
preempt_disable();
pagefault_disable();
-   return __kmap_local_page_prot(page, prot);
-}
-
-static inline void *kmap_atomic(struct page *page)
-{
-   return kmap_atomic_prot(page, kmap_prot);
+   return __kmap_local_page_prot(page, kmap_prot);
 }
 
 static inline void __kunmap_atomic(void *addr)
@@ -181,11 +176,6 @@ static inline void *kmap_atomic(struct p
return page_address(page);
 }
 
-static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
-{
-   return kmap_atomic(page);
-}
-
 static inline void __kunmap_atomic(void *addr)
 {
 #ifdef ARCH_HAS_FLUSH_ON_KUNMAP

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 34/37] drm/qxl: Replace io_mapping_map_atomic_wc()

2020-11-03 Thread Thomas Gleixner
None of these mapping requires the side effect of disabling pagefaults and
preemption.

Use io_mapping_map_local_wc() instead, rename the related functions
accordingly and clean up qxl_process_single_command() to use a plain
copy_from_user() as the local maps are not disabling pagefaults.

Signed-off-by: Thomas Gleixner 
Cc: Dave Airlie 
Cc: Gerd Hoffmann 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: virtualization@lists.linux-foundation.org
Cc: spice-de...@lists.freedesktop.org
---
V3: New patch
---
 drivers/gpu/drm/qxl/qxl_image.c   |   18 +-
 drivers/gpu/drm/qxl/qxl_ioctl.c   |   27 +--
 drivers/gpu/drm/qxl/qxl_object.c  |   12 ++--
 drivers/gpu/drm/qxl/qxl_object.h  |4 ++--
 drivers/gpu/drm/qxl/qxl_release.c |4 ++--
 5 files changed, 32 insertions(+), 33 deletions(-)

--- a/drivers/gpu/drm/qxl/qxl_image.c
+++ b/drivers/gpu/drm/qxl/qxl_image.c
@@ -124,12 +124,12 @@ qxl_image_init_helper(struct qxl_device
  wrong (check the bitmaps are sent correctly
  first) */
 
-   ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, 0);
+   ptr = qxl_bo_kmap_local_page(qdev, chunk_bo, 0);
chunk = ptr;
chunk->data_size = height * chunk_stride;
chunk->prev_chunk = 0;
chunk->next_chunk = 0;
-   qxl_bo_kunmap_atomic_page(qdev, chunk_bo, ptr);
+   qxl_bo_kunmap_local_page(qdev, chunk_bo, ptr);
 
{
void *k_data, *i_data;
@@ -143,7 +143,7 @@ qxl_image_init_helper(struct qxl_device
i_data = (void *)data;
 
while (remain > 0) {
-   ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, 
page << PAGE_SHIFT);
+   ptr = qxl_bo_kmap_local_page(qdev, chunk_bo, 
page << PAGE_SHIFT);
 
if (page == 0) {
chunk = ptr;
@@ -157,7 +157,7 @@ qxl_image_init_helper(struct qxl_device
 
memcpy(k_data, i_data, size);
 
-   qxl_bo_kunmap_atomic_page(qdev, chunk_bo, ptr);
+   qxl_bo_kunmap_local_page(qdev, chunk_bo, ptr);
i_data += size;
remain -= size;
page++;
@@ -175,10 +175,10 @@ qxl_image_init_helper(struct qxl_device
page_offset = 
offset_in_page(out_offset);
size = min((int)(PAGE_SIZE - 
page_offset), remain);
 
-   ptr = qxl_bo_kmap_atomic_page(qdev, 
chunk_bo, page_base);
+   ptr = qxl_bo_kmap_local_page(qdev, 
chunk_bo, page_base);
k_data = ptr + page_offset;
memcpy(k_data, i_data, size);
-   qxl_bo_kunmap_atomic_page(qdev, 
chunk_bo, ptr);
+   qxl_bo_kunmap_local_page(qdev, 
chunk_bo, ptr);
remain -= size;
i_data += size;
out_offset += size;
@@ -189,7 +189,7 @@ qxl_image_init_helper(struct qxl_device
qxl_bo_kunmap(chunk_bo);
 
image_bo = dimage->bo;
-   ptr = qxl_bo_kmap_atomic_page(qdev, image_bo, 0);
+   ptr = qxl_bo_kmap_local_page(qdev, image_bo, 0);
image = ptr;
 
image->descriptor.id = 0;
@@ -212,7 +212,7 @@ qxl_image_init_helper(struct qxl_device
break;
default:
DRM_ERROR("unsupported image bit depth\n");
-   qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr);
+   qxl_bo_kunmap_local_page(qdev, image_bo, ptr);
return -EINVAL;
}
image->u.bitmap.flags = QXL_BITMAP_TOP_DOWN;
@@ -222,7 +222,7 @@ qxl_image_init_helper(struct qxl_device
image->u.bitmap.palette = 0;
image->u.bitmap.data = qxl_bo_physical_address(qdev, chunk_bo, 0);
 
-   qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr);
+   qxl_bo_kunmap_local_page(qdev, image_bo, ptr);
 
return 0;
 }
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -89,11 +89,11 @@ apply_reloc(struct qxl_device *qdev, str
 {
void *reloc_page;
 
-   reloc_page = qxl_bo_kmap_atomic_page(qdev, info->dst_bo, 
info->dst_offset & PAGE_MASK);
+   reloc_page = qxl_bo_kmap_local_page(qdev, info->dst_bo, 
info->dst_offset & PAGE_MASK);
*(uint64_t *)(reloc_page + (info->dst_offset & ~PAGE_MASK)) = 
qxl_bo_physical_address(qdev,

  info->src_bo,

  info->src_offset);
-   

[patch V3 11/37] csky/mm/highmem: Switch to generic kmap atomic

2020-11-03 Thread Thomas Gleixner
No reason having the same code in every architecture.

Signed-off-by: Thomas Gleixner 
Cc: linux-c...@vger.kernel.org
---
V3: Does not compile with gcc 10
---
 arch/csky/Kconfig   |1 
 arch/csky/include/asm/fixmap.h  |4 +-
 arch/csky/include/asm/highmem.h |6 ++-
 arch/csky/mm/highmem.c  |   75 
 4 files changed, 8 insertions(+), 78 deletions(-)

--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -286,6 +286,7 @@ config NR_CPUS
 config HIGHMEM
bool "High Memory Support"
depends on !CPU_CK610
+   select KMAP_LOCAL
default y
 
 config FORCE_MAX_ZONEORDER
--- a/arch/csky/include/asm/fixmap.h
+++ b/arch/csky/include/asm/fixmap.h
@@ -8,7 +8,7 @@
 #include 
 #ifdef CONFIG_HIGHMEM
 #include 
-#include 
+#include 
 #endif
 
 enum fixed_addresses {
@@ -17,7 +17,7 @@ enum fixed_addresses {
 #endif
 #ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN,
-   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1,
+   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
 #endif
__end_of_fixed_addresses
 };
--- a/arch/csky/include/asm/highmem.h
+++ b/arch/csky/include/asm/highmem.h
@@ -9,7 +9,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 
 /* undef for production */
@@ -32,10 +32,12 @@ extern pte_t *pkmap_page_table;
 
 #define ARCH_HAS_KMAP_FLUSH_TLB
 extern void kmap_flush_tlb(unsigned long addr);
-extern void *kmap_atomic_pfn(unsigned long pfn);
 
 #define flush_cache_kmaps() do {} while (0)
 
+#define arch_kmap_local_post_map(vaddr, pteval)kmap_flush_tlb(vaddr)
+#define arch_kmap_local_post_unmap(vaddr)  kmap_flush_tlb(vaddr)
+
 extern void kmap_init(void);
 
 #endif /* __KERNEL__ */
--- a/arch/csky/mm/highmem.c
+++ b/arch/csky/mm/highmem.c
@@ -9,8 +9,6 @@
 #include 
 #include 
 
-static pte_t *kmap_pte;
-
 unsigned long highstart_pfn, highend_pfn;
 
 void kmap_flush_tlb(unsigned long addr)
@@ -19,67 +17,7 @@ void kmap_flush_tlb(unsigned long addr)
 }
 EXPORT_SYMBOL(kmap_flush_tlb);
 
-void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
-{
-   unsigned long vaddr;
-   int idx, type;
-
-   type = kmap_atomic_idx_push();
-   idx = type + KM_TYPE_NR*smp_processor_id();
-   vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
-   BUG_ON(!pte_none(*(kmap_pte - idx)));
-#endif
-   set_pte(kmap_pte-idx, mk_pte(page, prot));
-   flush_tlb_one((unsigned long)vaddr);
-
-   return (void *)vaddr;
-}
-EXPORT_SYMBOL(kmap_atomic_high_prot);
-
-void kunmap_atomic_high(void *kvaddr)
-{
-   unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-   int idx;
-
-   if (vaddr < FIXADDR_START)
-   return;
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-   idx = KM_TYPE_NR*smp_processor_id() + kmap_atomic_idx();
-
-   BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
-
-   pte_clear(_mm, vaddr, kmap_pte - idx);
-   flush_tlb_one(vaddr);
-#else
-   (void) idx; /* to kill a warning */
-#endif
-   kmap_atomic_idx_pop();
-}
-EXPORT_SYMBOL(kunmap_atomic_high);
-
-/*
- * This is the same as kmap_atomic() but can map memory that doesn't
- * have a struct page associated with it.
- */
-void *kmap_atomic_pfn(unsigned long pfn)
-{
-   unsigned long vaddr;
-   int idx, type;
-
-   pagefault_disable();
-
-   type = kmap_atomic_idx_push();
-   idx = type + KM_TYPE_NR*smp_processor_id();
-   vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-   set_pte(kmap_pte-idx, pfn_pte(pfn, PAGE_KERNEL));
-   flush_tlb_one(vaddr);
-
-   return (void *) vaddr;
-}
-
-static void __init kmap_pages_init(void)
+void __init kmap_init(void)
 {
unsigned long vaddr;
pgd_t *pgd;
@@ -96,14 +34,3 @@ static void __init kmap_pages_init(void)
pte = pte_offset_kernel(pmd, vaddr);
pkmap_page_table = pte;
 }
-
-void __init kmap_init(void)
-{
-   unsigned long vaddr;
-
-   kmap_pages_init();
-
-   vaddr = __fix_to_virt(FIX_KMAP_BEGIN);
-
-   kmap_pte = pte_offset_kernel((pmd_t *)pgd_offset_k(vaddr), vaddr);
-}

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 14/37] nds32/mm/highmem: Switch to generic kmap atomic

2020-11-03 Thread Thomas Gleixner
The mapping code is odd and looks broken. See FIXME in the comment.

Also fix the harmless off by one in the FIX_KMAP_END define.

Signed-off-by: Thomas Gleixner 
Cc: Nick Hu 
Cc: Greentime Hu 
Cc: Vincent Chen 
---
V3: Remove the kmap types cruft
---
 arch/nds32/Kconfig.cpu   |1 
 arch/nds32/include/asm/fixmap.h  |4 +--
 arch/nds32/include/asm/highmem.h |   22 +
 arch/nds32/mm/Makefile   |1 
 arch/nds32/mm/highmem.c  |   48 ---
 5 files changed, 19 insertions(+), 57 deletions(-)

--- a/arch/nds32/Kconfig.cpu
+++ b/arch/nds32/Kconfig.cpu
@@ -157,6 +157,7 @@ config HW_SUPPORT_UNALIGNMENT_ACCESS
 config HIGHMEM
bool "High Memory Support"
depends on MMU && !CPU_CACHE_ALIASING
+   select KMAP_LOCAL
help
  The address space of Andes processors is only 4 Gigabytes large
  and it has to accommodate user address space, kernel address
--- a/arch/nds32/include/asm/fixmap.h
+++ b/arch/nds32/include/asm/fixmap.h
@@ -6,7 +6,7 @@
 
 #ifdef CONFIG_HIGHMEM
 #include 
-#include 
+#include 
 #endif
 
 enum fixed_addresses {
@@ -14,7 +14,7 @@ enum fixed_addresses {
FIX_KMAP_RESERVED,
FIX_KMAP_BEGIN,
 #ifdef CONFIG_HIGHMEM
-   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS),
+   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
 #endif
FIX_EARLYCON_MEM_BASE,
__end_of_fixed_addresses
--- a/arch/nds32/include/asm/highmem.h
+++ b/arch/nds32/include/asm/highmem.h
@@ -5,7 +5,6 @@
 #define _ASM_HIGHMEM_H
 
 #include 
-#include 
 #include 
 
 /*
@@ -45,11 +44,22 @@ extern pte_t *pkmap_page_table;
 extern void kmap_init(void);
 
 /*
- * The following functions are already defined by 
- * when CONFIG_HIGHMEM is not set.
+ * FIXME: The below looks broken vs. a kmap_atomic() in task context which
+ * is interupted and another kmap_atomic() happens in interrupt context.
+ * But what do I know about nds32. -- tglx
  */
-#ifdef CONFIG_HIGHMEM
-extern void *kmap_atomic_pfn(unsigned long pfn);
-#endif
+#define arch_kmap_local_post_map(vaddr, pteval)\
+   do {\
+   __nds32__tlbop_inv(vaddr);  \
+   __nds32__mtsr_dsb(vaddr, NDS32_SR_TLB_VPN); \
+   __nds32__tlbop_rwr(pteval); \
+   __nds32__isb(); \
+   } while (0)
+
+#define arch_kmap_local_pre_unmap(vaddr)   \
+   do {\
+   __nds32__tlbop_inv(vaddr);  \
+   __nds32__isb(); \
+   } while (0)
 
 #endif
--- a/arch/nds32/mm/Makefile
+++ b/arch/nds32/mm/Makefile
@@ -3,7 +3,6 @@ obj-y   := extable.o tlb.o fault.o init
mm-nds32.o cacheflush.o proc.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)   += alignment.o
-obj-$(CONFIG_HIGHMEM)   += highmem.o
 
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_proc.o = $(CC_FLAGS_FTRACE)
--- a/arch/nds32/mm/highmem.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
-{
-   unsigned int idx;
-   unsigned long vaddr, pte;
-   int type;
-   pte_t *ptep;
-
-   type = kmap_atomic_idx_push();
-
-   idx = type + KM_TYPE_NR * smp_processor_id();
-   vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-   pte = (page_to_pfn(page) << PAGE_SHIFT) | prot;
-   ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
-   set_pte(ptep, pte);
-
-   __nds32__tlbop_inv(vaddr);
-   __nds32__mtsr_dsb(vaddr, NDS32_SR_TLB_VPN);
-   __nds32__tlbop_rwr(pte);
-   __nds32__isb();
-   return (void *)vaddr;
-}
-EXPORT_SYMBOL(kmap_atomic_high_prot);
-
-void kunmap_atomic_high(void *kvaddr)
-{
-   if (kvaddr >= (void *)FIXADDR_START) {
-   unsigned long vaddr = (unsigned long)kvaddr;
-   pte_t *ptep;
-   kmap_atomic_idx_pop();
-   __nds32__tlbop_inv(vaddr);
-   __nds32__isb();
-   ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
-   set_pte(ptep, 0);
-   }
-}
-EXPORT_SYMBOL(kunmap_atomic_high);

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 12/37] microblaze/mm/highmem: Switch to generic kmap atomic

2020-11-03 Thread Thomas Gleixner
No reason having the same code in every architecture.

Signed-off-by: Thomas Gleixner 
Cc: Michal Simek 
---
V3: Remove the kmap types cruft
---
 arch/microblaze/Kconfig   |1 
 arch/microblaze/include/asm/fixmap.h  |4 -
 arch/microblaze/include/asm/highmem.h |6 ++
 arch/microblaze/mm/Makefile   |1 
 arch/microblaze/mm/highmem.c  |   78 --
 arch/microblaze/mm/init.c |6 --
 6 files changed, 8 insertions(+), 88 deletions(-)

--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -155,6 +155,7 @@ config XILINX_UNCACHED_SHADOW
 config HIGHMEM
bool "High memory support"
depends on MMU
+   select KMAP_LOCAL
help
  The address space of Microblaze processors is only 4 Gigabytes large
  and it has to accommodate user address space, kernel address
--- a/arch/microblaze/include/asm/fixmap.h
+++ b/arch/microblaze/include/asm/fixmap.h
@@ -20,7 +20,7 @@
 #include 
 #ifdef CONFIG_HIGHMEM
 #include 
-#include 
+#include 
 #endif
 
 #define FIXADDR_TOP((unsigned long)(-PAGE_SIZE))
@@ -47,7 +47,7 @@ enum fixed_addresses {
FIX_HOLE,
 #ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
-   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * num_possible_cpus()) - 1,
+   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * num_possible_cpus()) - 1,
 #endif
__end_of_fixed_addresses
 };
--- a/arch/microblaze/include/asm/highmem.h
+++ b/arch/microblaze/include/asm/highmem.h
@@ -25,7 +25,6 @@
 #include 
 #include 
 
-extern pte_t *kmap_pte;
 extern pte_t *pkmap_page_table;
 
 /*
@@ -52,6 +51,11 @@ extern pte_t *pkmap_page_table;
 
 #define flush_cache_kmaps(){ flush_icache(); flush_dcache(); }
 
+#define arch_kmap_local_post_map(vaddr, pteval)\
+   local_flush_tlb_page(NULL, vaddr);
+#define arch_kmap_local_post_unmap(vaddr)  \
+   local_flush_tlb_page(NULL, vaddr);
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_HIGHMEM_H */
--- a/arch/microblaze/mm/Makefile
+++ b/arch/microblaze/mm/Makefile
@@ -6,4 +6,3 @@
 obj-y := consistent.o init.o
 
 obj-$(CONFIG_MMU) += pgtable.o mmu_context.o fault.o
-obj-$(CONFIG_HIGHMEM) += highmem.o
--- a/arch/microblaze/mm/highmem.c
+++ /dev/null
@@ -1,78 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * highmem.c: virtual kernel memory mappings for high memory
- *
- * PowerPC version, stolen from the i386 version.
- *
- * Used in CONFIG_HIGHMEM systems for memory pages which
- * are not addressable by direct kernel virtual addresses.
- *
- * Copyright (C) 1999 Gerhard Wichert, Siemens AG
- *   gerhard.wich...@pdb.siemens.de
- *
- *
- * Redesigned the x86 32-bit VM architecture to deal with
- * up to 16 Terrabyte physical memory. With current x86 CPUs
- * we now support up to 64 Gigabytes physical RAM.
- *
- * Copyright (C) 1999 Ingo Molnar 
- *
- * Reworked for PowerPC by various contributors. Moved from
- * highmem.h by Benjamin Herrenschmidt (c) 2009 IBM Corp.
- */
-
-#include 
-#include 
-
-/*
- * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
- * gives a more generic (and caching) interface. But kmap_atomic can
- * be used in IRQ contexts, so in some (very limited) cases we need
- * it.
- */
-#include 
-
-void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
-{
-
-   unsigned long vaddr;
-   int idx, type;
-
-   type = kmap_atomic_idx_push();
-   idx = type + KM_TYPE_NR*smp_processor_id();
-   vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
-   BUG_ON(!pte_none(*(kmap_pte-idx)));
-#endif
-   set_pte_at(_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
-   local_flush_tlb_page(NULL, vaddr);
-
-   return (void *) vaddr;
-}
-EXPORT_SYMBOL(kmap_atomic_high_prot);
-
-void kunmap_atomic_high(void *kvaddr)
-{
-   unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-   int type;
-   unsigned int idx;
-
-   if (vaddr < __fix_to_virt(FIX_KMAP_END))
-   return;
-
-   type = kmap_atomic_idx();
-
-   idx = type + KM_TYPE_NR * smp_processor_id();
-#ifdef CONFIG_DEBUG_HIGHMEM
-   BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
-#endif
-   /*
-* force other mappings to Oops if they'll try to access
-* this pte without first remap it
-*/
-   pte_clear(_mm, vaddr, kmap_pte-idx);
-   local_flush_tlb_page(NULL, vaddr);
-
-   kmap_atomic_idx_pop();
-}
-EXPORT_SYMBOL(kunmap_atomic_high);
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -49,17 +49,11 @@ unsigned long lowmem_size;
 EXPORT_SYMBOL(min_low_pfn);
 EXPORT_SYMBOL(max_low_pfn);
 
-#ifdef CONFIG_HIGHMEM
-pte_t *kmap_pte;
-EXPORT_SYMBOL(kmap_pte);
-
 static void __init highmem_init(void)
 {
pr_debug("%x\n", (u32)PKMAP_BASE);
map_page(PKMAP_BASE, 0, 0); /* XXX gross */
pkmap_page_table = 

[patch V3 16/37] sparc/mm/highmem: Switch to generic kmap atomic

2020-11-03 Thread Thomas Gleixner
No reason having the same code in every architecture

Signed-off-by: Thomas Gleixner 
Cc: "David S. Miller" 
Cc: sparcli...@vger.kernel.org
---
V3: Remove the kmap types cruft
---
 arch/sparc/Kconfig  |1 
 arch/sparc/include/asm/highmem.h|8 +-
 arch/sparc/include/asm/kmap_types.h |   11 ---
 arch/sparc/include/asm/vaddrs.h |4 -
 arch/sparc/mm/Makefile  |3 
 arch/sparc/mm/highmem.c |  115 
 arch/sparc/mm/srmmu.c   |2 
 7 files changed, 8 insertions(+), 136 deletions(-)

--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -139,6 +139,7 @@ config MMU
 config HIGHMEM
bool
default y if SPARC32
+   select KMAP_LOCAL
 
 config ZONE_DMA
bool
--- a/arch/sparc/include/asm/highmem.h
+++ b/arch/sparc/include/asm/highmem.h
@@ -24,7 +24,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 /* declarations for highmem.c */
@@ -33,8 +32,6 @@ extern unsigned long highstart_pfn, high
 #define kmap_prot __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE)
 extern pte_t *pkmap_page_table;
 
-void kmap_init(void) __init;
-
 /*
  * Right now we initialize only a single pte table. It can be extended
  * easily, subsequent pte tables have to be allocated in one physical
@@ -53,6 +50,11 @@ void kmap_init(void) __init;
 
 #define flush_cache_kmaps()flush_cache_all()
 
+/* FIXME: Use __flush_tlb_one(vaddr) instead of flush_cache_all() -- Anton */
+#define arch_kmap_local_post_map(vaddr, pteval)flush_cache_all()
+#define arch_kmap_local_post_unmap(vaddr)  flush_cache_all()
+
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_HIGHMEM_H */
--- a/arch/sparc/include/asm/kmap_types.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-/* Dummy header just to define km_type.  None of this
- * is actually used on sparc.  -DaveM
- */
-
-#include 
-
-#endif
--- a/arch/sparc/include/asm/vaddrs.h
+++ b/arch/sparc/include/asm/vaddrs.h
@@ -32,13 +32,13 @@
 #define SRMMU_NOCACHE_ALCRATIO 64  /* 256 pages per 64MB of system RAM */
 
 #ifndef __ASSEMBLY__
-#include 
+#include 
 
 enum fixed_addresses {
FIX_HOLE,
 #ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN,
-   FIX_KMAP_END = (KM_TYPE_NR * NR_CPUS),
+   FIX_KMAP_END = (KM_MAX_IDX * NR_CPUS),
 #endif
__end_of_fixed_addresses
 };
--- a/arch/sparc/mm/Makefile
+++ b/arch/sparc/mm/Makefile
@@ -15,6 +15,3 @@ obj-$(CONFIG_SPARC32)   += leon_mm.o
 
 # Only used by sparc64
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-
-# Only used by sparc32
-obj-$(CONFIG_HIGHMEM)   += highmem.o
--- a/arch/sparc/mm/highmem.c
+++ /dev/null
@@ -1,115 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  highmem.c: virtual kernel memory mappings for high memory
- *
- *  Provides kernel-static versions of atomic kmap functions originally
- *  found as inlines in include/asm-sparc/highmem.h.  These became
- *  needed as kmap_atomic() and kunmap_atomic() started getting
- *  called from within modules.
- *  -- Tomas Szepe , September 2002
- *
- *  But kmap_atomic() and kunmap_atomic() cannot be inlined in
- *  modules because they are loaded with btfixup-ped functions.
- */
-
-/*
- * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
- * gives a more generic (and caching) interface. But kmap_atomic can
- * be used in IRQ contexts, so in some (very limited) cases we need it.
- *
- * XXX This is an old text. Actually, it's good to use atomic kmaps,
- * provided you remember that they are atomic and not try to sleep
- * with a kmap taken, much like a spinlock. Non-atomic kmaps are
- * shared by CPUs, and so precious, and establishing them requires IPI.
- * Atomic kmaps are lightweight and we may have NCPUS more of them.
- */
-#include 
-#include 
-#include 
-
-#include 
-#include 
-#include 
-
-static pte_t *kmap_pte;
-
-void __init kmap_init(void)
-{
-   unsigned long address = __fix_to_virt(FIX_KMAP_BEGIN);
-
-/* cache the first kmap pte */
-kmap_pte = virt_to_kpte(address);
-}
-
-void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
-{
-   unsigned long vaddr;
-   long idx, type;
-
-   type = kmap_atomic_idx_push();
-   idx = type + KM_TYPE_NR*smp_processor_id();
-   vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-
-/* XXX Fix - Anton */
-#if 0
-   __flush_cache_one(vaddr);
-#else
-   flush_cache_all();
-#endif
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-   BUG_ON(!pte_none(*(kmap_pte-idx)));
-#endif
-   set_pte(kmap_pte-idx, mk_pte(page, prot));
-/* XXX Fix - Anton */
-#if 0
-   __flush_tlb_one(vaddr);
-#else
-   flush_tlb_all();
-#endif
-
-   return (void*) vaddr;
-}
-EXPORT_SYMBOL(kmap_atomic_high_prot);
-
-void kunmap_atomic_high(void *kvaddr)
-{
-   unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-   int type;
-
-   if (vaddr < FIXADDR_START)
-   return;

[patch V3 23/37] sched: Make migrate_disable/enable() independent of RT

2020-11-03 Thread Thomas Gleixner
Now that the scheduler can deal with migrate disable properly, there is no
real compelling reason to make it only available for RT.

There are quite some code pathes which needlessly disable preemption in
order to prevent migration and some constructs like kmap_atomic() enforce
it implicitly.

Making it available independent of RT allows to provide a preemptible
variant of kmap_atomic() and makes the code more consistent in general.

FIXME: Rework the comment in preempt.h

Signed-off-by: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Juri Lelli 
Cc: Vincent Guittot 
Cc: Dietmar Eggemann 
Cc: Steven Rostedt 
Cc: Ben Segall 
Cc: Mel Gorman 
Cc: Daniel Bristot de Oliveira 
---
 include/linux/kernel.h  |   21 ++---
 include/linux/preempt.h |   38 +++---
 include/linux/sched.h   |2 +-
 kernel/sched/core.c |   45 +++--
 kernel/sched/sched.h|4 ++--
 lib/smp_processor_id.c  |2 +-
 6 files changed, 56 insertions(+), 56 deletions(-)

--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -204,6 +204,7 @@ extern int _cond_resched(void);
 extern void ___might_sleep(const char *file, int line, int preempt_offset);
 extern void __might_sleep(const char *file, int line, int preempt_offset);
 extern void __cant_sleep(const char *file, int line, int preempt_offset);
+extern void __cant_migrate(const char *file, int line);
 
 /**
  * might_sleep - annotation for functions that can sleep
@@ -227,6 +228,18 @@ extern void __cant_sleep(const char *fil
 # define cant_sleep() \
do { __cant_sleep(__FILE__, __LINE__, 0); } while (0)
 # define sched_annotate_sleep()(current->task_state_change = 0)
+
+/**
+ * cant_migrate - annotation for functions that cannot migrate
+ *
+ * Will print a stack trace if executed in code which is migratable
+ */
+# define cant_migrate()
\
+   do {\
+   if (IS_ENABLED(CONFIG_SMP)) \
+   __cant_migrate(__FILE__, __LINE__); \
+   } while (0)
+
 /**
  * non_block_start - annotate the start of section where sleeping is prohibited
  *
@@ -251,6 +264,7 @@ extern void __cant_sleep(const char *fil
   int preempt_offset) { }
 # define might_sleep() do { might_resched(); } while (0)
 # define cant_sleep() do { } while (0)
+# define cant_migrate()do { } while (0)
 # define sched_annotate_sleep() do { } while (0)
 # define non_block_start() do { } while (0)
 # define non_block_end() do { } while (0)
@@ -258,13 +272,6 @@ extern void __cant_sleep(const char *fil
 
 #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
 
-#ifndef CONFIG_PREEMPT_RT
-# define cant_migrate()cant_sleep()
-#else
-  /* Placeholder for now */
-# define cant_migrate()do { } while (0)
-#endif
-
 /**
  * abs - return absolute value of an argument
  * @x: the value.  If it is unsigned type, it is converted to signed type 
first.
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -322,7 +322,7 @@ static inline void preempt_notifier_init
 
 #endif
 
-#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
+#ifdef CONFIG_SMP
 
 /*
  * Migrate-Disable and why it is undesired.
@@ -382,43 +382,11 @@ static inline void preempt_notifier_init
 extern void migrate_disable(void);
 extern void migrate_enable(void);
 
-#elif defined(CONFIG_PREEMPT_RT)
+#else
 
 static inline void migrate_disable(void) { }
 static inline void migrate_enable(void) { }
 
-#else /* !CONFIG_PREEMPT_RT */
-
-/**
- * migrate_disable - Prevent migration of the current task
- *
- * Maps to preempt_disable() which also disables preemption. Use
- * migrate_disable() to annotate that the intent is to prevent migration,
- * but not necessarily preemption.
- *
- * Can be invoked nested like preempt_disable() and needs the corresponding
- * number of migrate_enable() invocations.
- */
-static __always_inline void migrate_disable(void)
-{
-   preempt_disable();
-}
-
-/**
- * migrate_enable - Allow migration of the current task
- *
- * Counterpart to migrate_disable().
- *
- * As migrate_disable() can be invoked nested, only the outermost invocation
- * reenables migration.
- *
- * Currently mapped to preempt_enable().
- */
-static __always_inline void migrate_enable(void)
-{
-   preempt_enable();
-}
-
-#endif /* CONFIG_SMP && CONFIG_PREEMPT_RT */
+#endif /* CONFIG_SMP */
 
 #endif /* __LINUX_PREEMPT_H */
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -715,7 +715,7 @@ struct task_struct {
const cpumask_t *cpus_ptr;
cpumask_t   cpus_mask;
void*migration_pending;
-#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
+#ifdef CONFIG_SMP

[patch V3 24/37] sched: highmem: Store local kmaps in task struct

2020-11-03 Thread Thomas Gleixner
Instead of storing the map per CPU provide and use per task storage. That
prepares for local kmaps which are preemptible.

The context switch code is preparatory and not yet in use because
kmap_atomic() runs with preemption disabled. Will be made usable in the
next step.

The context switch logic is safe even when an interrupt happens after
clearing or before restoring the kmaps. The kmap index in task struct is
not modified so any nesting kmap in an interrupt will use unused indices
and on return the counter is the same as before.

Also add an assert into the return to user space code. Going back to user
space with an active kmap local is a nono.

Signed-off-by: Thomas Gleixner 
---
V3: Handle the debug case correctly
---
 include/linux/highmem-internal.h |   10 +++
 include/linux/sched.h|9 +++
 kernel/entry/common.c|2 
 kernel/fork.c|1 
 kernel/sched/core.c  |   18 +++
 mm/highmem.c |   99 +++
 6 files changed, 129 insertions(+), 10 deletions(-)

--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -9,6 +9,16 @@
 void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
 void *__kmap_local_page_prot(struct page *page, pgprot_t prot);
 void kunmap_local_indexed(void *vaddr);
+void kmap_local_fork(struct task_struct *tsk);
+void __kmap_local_sched_out(void);
+void __kmap_local_sched_in(void);
+static inline void kmap_assert_nomap(void)
+{
+   DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx);
+}
+#else
+static inline void kmap_local_fork(struct task_struct *tsk) { }
+static inline void kmap_assert_nomap(void) { }
 #endif
 
 #ifdef CONFIG_HIGHMEM
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -629,6 +630,13 @@ struct wake_q_node {
struct wake_q_node *next;
 };
 
+struct kmap_ctrl {
+#ifdef CONFIG_KMAP_LOCAL
+   int idx;
+   pte_t   pteval[KM_TYPE_NR];
+#endif
+};
+
 struct task_struct {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
/*
@@ -1294,6 +1302,7 @@ struct task_struct {
unsigned intsequential_io;
unsigned intsequential_io_avg;
 #endif
+   struct kmap_ctrlkmap_ctrl;
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long   task_state_change;
 #endif
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -2,6 +2,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -194,6 +195,7 @@ static void exit_to_user_mode_prepare(st
 
/* Ensure that the address limit is intact and no locks are held */
addr_limit_user_check();
+   kmap_assert_nomap();
lockdep_assert_irqs_disabled();
lockdep_sys_exit();
 }
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -930,6 +930,7 @@ static struct task_struct *dup_task_stru
account_kernel_stack(tsk, 1);
 
kcov_task_init(tsk);
+   kmap_local_fork(tsk);
 
 #ifdef CONFIG_FAULT_INJECTION
tsk->fail_nth = 0;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4053,6 +4053,22 @@ static inline void finish_lock_switch(st
 # define finish_arch_post_lock_switch()do { } while (0)
 #endif
 
+static inline void kmap_local_sched_out(void)
+{
+#ifdef CONFIG_KMAP_LOCAL
+   if (unlikely(current->kmap_ctrl.idx))
+   __kmap_local_sched_out();
+#endif
+}
+
+static inline void kmap_local_sched_in(void)
+{
+#ifdef CONFIG_KMAP_LOCAL
+   if (unlikely(current->kmap_ctrl.idx))
+   __kmap_local_sched_in();
+#endif
+}
+
 /**
  * prepare_task_switch - prepare to switch tasks
  * @rq: the runqueue preparing to switch
@@ -4075,6 +4091,7 @@ prepare_task_switch(struct rq *rq, struc
perf_event_task_sched_out(prev, next);
rseq_preempt(prev);
fire_sched_out_preempt_notifiers(prev, next);
+   kmap_local_sched_out();
prepare_task(next);
prepare_arch_switch(next);
 }
@@ -4141,6 +4158,7 @@ static struct rq *finish_task_switch(str
finish_lock_switch(rq);
finish_arch_post_lock_switch();
kcov_finish_switch(current);
+   kmap_local_sched_in();
 
fire_sched_in_preempt_notifiers(current);
/*
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -365,8 +365,6 @@ EXPORT_SYMBOL(kunmap_high);
 
 #include 
 
-static DEFINE_PER_CPU(int, __kmap_local_idx);
-
 /*
  * With DEBUG_HIGHMEM the stack depth is doubled and every second
  * slot is unused which acts as a guard page
@@ -379,23 +377,21 @@ static DEFINE_PER_CPU(int, __kmap_local_
 
 static inline int kmap_local_idx_push(void)
 {
-   int idx = __this_cpu_add_return(__kmap_local_idx, KM_INCR) - 1;
-
WARN_ON_ONCE(in_irq() && !irqs_disabled());
-   BUG_ON(idx >= KM_MAX_IDX);
-   return idx;
+ 

[patch V3 00/37] mm/highmem: Preemptible variant of kmap_atomic & friends

2020-11-03 Thread Thomas Gleixner
Following up to the discussion in:

  https://lore.kernel.org/r/20200914204209.256266...@linutronix.de

and the second version of this:

  https://lore.kernel.org/r/20201029221806.189523...@linutronix.de

this series provides a preemptible variant of kmap_atomic & related
interfaces.

This is achieved by:

 - Removing the RT dependency from migrate_disable/enable()

 - Consolidating all kmap atomic implementations in generic code including
   a useful version of the CONFIG_DEBUG_HIGHMEM which provides guard pages
   between the individual maps instead of just increasing the map size.

 - Switching from per CPU storage of the kmap index to a per task storage

 - Adding a pteval array to the per task storage which contains the ptevals
   of the currently active temporary kmaps

 - Adding context switch code which checks whether the outgoing or the
   incoming task has active temporary kmaps. If so, the outgoing task's
   kmaps are removed and the incoming task's kmaps are restored.

 - Adding new interfaces k[un]map_local*() which are not disabling
   preemption and can be called from any context (except NMI).

   Contrary to kmap() which provides preemptible and "persistant" mappings,
   these interfaces are meant to replace the temporary mappings provided by
   kmap_atomic*() today.

This allows to get rid of conditional mapping choices and allows to have
preemptible short term mappings on 64bit which are today enforced to be
non-preemptible due to the highmem constraints. It clearly puts overhead on
the highmem users, but highmem is slow anyway.

This is not a wholesale conversion which makes kmap_atomic magically
preemptible because there might be usage sites which rely on the implicit
preempt disable. So this needs to be done on a case by case basis and the
call sites converted to kmap_local().

Note, that this is only tested on X86 and completely untested on all other
architectures (at least it compiles except on csky which does not compile
with the newest cross tools from kernel.org independent of this change).

The lot is available from

   git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git highmem

It is based on Peter Zijlstras migrate disable branch which is close to be
merged into the tip tree, but still not finalized:

   git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git 
sched/migrate-disable

The series has the following parts:

Patches  1 - 22: Consolidation work which is independent of the scheduler
 changes

 79 files changed, 595 insertions(+), 1296 deletions(-)

Patch   23:  Needs to be folded back into the sched/migrate-disable

Patches 24 - 26: The preemptible kmap_local() implementation

 9 files changed, 283 insertions(+), 57 deletions(-)

Patches 27 - 37: Cleanup of the less common kmap/io_map_atomic users

 19 files changed, 114 insertions(+), 256 deletions(-)

Vs. merging this pile:

If everyone agrees, I'd like to take the first part (1-22) through tip so
that the preemptible implementation can be sorted in tip once the scheduler
prerequisites are there. The initial cleanups (27-37) might have to wait if
there are conflicts vs. the drm/gpu tree. We'll see.

>From what I can tell kmap_atomic() can be removed all together and
completly replaced by kmap_local(). Most of the usage sites are trivial and
just doing memcpy(), memset() or trivial operations on the temporarily
mapped page. The interesting ones are those which do either conditional
stuff or have copy_.*_user_inatomic() inside. As shown with the crash and
drm/gpu cleanups this allows to simplify the code quite a bit.

Changes vs. V2:

  - Remove the migrate disable from kmap_local and only issue that when the
there is an actual highmem mapping. (Linus)
  - Reordered the series so the consolidation is upfront
  - Get rid of kmap_types.h and the associated cruft
  - Fixup documentation and add function documentation for kmap_*
  - Splitout the internal implementation into a seperate header
  - More cleanups - removal of unused functions
  - Replace a few of the less frequently used kmap_atomic and
io_mapping_map_atomic variants and remove those interfaces.

Thanks,

tglx
---
 arch/alpha/include/asm/kmap_types.h   |   15 
 arch/arc/include/asm/kmap_types.h |   14 
 arch/arm/include/asm/kmap_types.h |   10 
 arch/arm/mm/highmem.c |  121 ---
 arch/ia64/include/asm/kmap_types.h|   13 
 arch/microblaze/mm/highmem.c  |   78 
 arch/mips/include/asm/kmap_types.h|   13 
 arch/nds32/mm/highmem.c   |   48 --
 arch/parisc/include/asm/kmap_types.h  |   13 
 arch/powerpc/include/asm/kmap_types.h |   13 
 arch/powerpc/mm/highmem.c |   67 
 

[patch V3 15/37] powerpc/mm/highmem: Switch to generic kmap atomic

2020-11-03 Thread Thomas Gleixner
No reason having the same code in every architecture

Signed-off-by: Thomas Gleixner 
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: linuxppc-...@lists.ozlabs.org
---
V3: Remove the kmap types cruft
---
 arch/powerpc/Kconfig  |1 
 arch/powerpc/include/asm/fixmap.h |4 +-
 arch/powerpc/include/asm/highmem.h|7 ++-
 arch/powerpc/include/asm/kmap_types.h |   13 --
 arch/powerpc/mm/Makefile  |1 
 arch/powerpc/mm/highmem.c |   67 --
 arch/powerpc/mm/mem.c |7 ---
 7 files changed, 8 insertions(+), 92 deletions(-)

--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -409,6 +409,7 @@ menu "Kernel options"
 config HIGHMEM
bool "High memory support"
depends on PPC32
+   select KMAP_LOCAL
 
 source "kernel/Kconfig.hz"
 
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -20,7 +20,7 @@
 #include 
 #ifdef CONFIG_HIGHMEM
 #include 
-#include 
+#include 
 #endif
 
 #ifdef CONFIG_KASAN
@@ -55,7 +55,7 @@ enum fixed_addresses {
FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, 
PAGE_SIZE)/PAGE_SIZE)-1,
 #ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
-   FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
 #endif
 #ifdef CONFIG_PPC_8xx
/* For IMMR we need an aligned 512K area */
--- a/arch/powerpc/include/asm/highmem.h
+++ b/arch/powerpc/include/asm/highmem.h
@@ -24,12 +24,10 @@
 #ifdef __KERNEL__
 
 #include 
-#include 
 #include 
 #include 
 #include 
 
-extern pte_t *kmap_pte;
 extern pte_t *pkmap_page_table;
 
 /*
@@ -60,6 +58,11 @@ extern pte_t *pkmap_page_table;
 
 #define flush_cache_kmaps()flush_cache_all()
 
+#define arch_kmap_local_post_map(vaddr, pteval)\
+   local_flush_tlb_page(NULL, vaddr)
+#define arch_kmap_local_post_unmap(vaddr)  \
+   local_flush_tlb_page(NULL, vaddr)
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_HIGHMEM_H */
--- a/arch/powerpc/include/asm/kmap_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ASM_POWERPC_KMAP_TYPES_H
-#define _ASM_POWERPC_KMAP_TYPES_H
-
-#ifdef __KERNEL__
-
-/*
- */
-
-#define KM_TYPE_NR 16
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_KMAP_TYPES_H */
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -16,7 +16,6 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += num
 obj-$(CONFIG_PPC_MM_SLICES)+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
-obj-$(CONFIG_HIGHMEM)  += highmem.o
 obj-$(CONFIG_PPC_COPRO_BASE)   += copro_fault.o
 obj-$(CONFIG_PPC_PTDUMP)   += ptdump/
 obj-$(CONFIG_KASAN)+= kasan/
--- a/arch/powerpc/mm/highmem.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * highmem.c: virtual kernel memory mappings for high memory
- *
- * PowerPC version, stolen from the i386 version.
- *
- * Used in CONFIG_HIGHMEM systems for memory pages which
- * are not addressable by direct kernel virtual addresses.
- *
- * Copyright (C) 1999 Gerhard Wichert, Siemens AG
- *   gerhard.wich...@pdb.siemens.de
- *
- *
- * Redesigned the x86 32-bit VM architecture to deal with
- * up to 16 Terrabyte physical memory. With current x86 CPUs
- * we now support up to 64 Gigabytes physical RAM.
- *
- * Copyright (C) 1999 Ingo Molnar 
- *
- * Reworked for PowerPC by various contributors. Moved from
- * highmem.h by Benjamin Herrenschmidt (c) 2009 IBM Corp.
- */
-
-#include 
-#include 
-
-void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
-{
-   unsigned long vaddr;
-   int idx, type;
-
-   type = kmap_atomic_idx_push();
-   idx = type + KM_TYPE_NR*smp_processor_id();
-   vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-   WARN_ON(IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !pte_none(*(kmap_pte - 
idx)));
-   __set_pte_at(_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1);
-   local_flush_tlb_page(NULL, vaddr);
-
-   return (void*) vaddr;
-}
-EXPORT_SYMBOL(kmap_atomic_high_prot);
-
-void kunmap_atomic_high(void *kvaddr)
-{
-   unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-
-   if (vaddr < __fix_to_virt(FIX_KMAP_END))
-   return;
-
-   if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM)) {
-   int type = kmap_atomic_idx();
-   unsigned int idx;
-
-   idx = type + KM_TYPE_NR * smp_processor_id();
-   WARN_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
-
-   /*
-* force other mappings to Oops if they'll try to access
-* this pte without first remap it
-*/
-   pte_clear(_mm, vaddr, kmap_pte-idx);
-   local_flush_tlb_page(NULL, vaddr);
-   }
-
-   

[patch V3 04/37] sh/highmem: Remove all traces of unused cruft

2020-11-03 Thread Thomas Gleixner
For whatever reasons SH has highmem bits all over the place but does
not enable it via Kconfig. Remove the bitrot.

Signed-off-by: Thomas Gleixner 
---
 arch/sh/include/asm/fixmap.h |8 
 arch/sh/include/asm/kmap_types.h |   15 ---
 arch/sh/mm/init.c|8 
 3 files changed, 31 deletions(-)

--- a/arch/sh/include/asm/fixmap.h
+++ b/arch/sh/include/asm/fixmap.h
@@ -13,9 +13,6 @@
 #include 
 #include 
 #include 
-#ifdef CONFIG_HIGHMEM
-#include 
-#endif
 
 /*
  * Here we define all the compile-time 'special' virtual
@@ -53,11 +50,6 @@ enum fixed_addresses {
FIX_CMAP_BEGIN,
FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS) - 1,
 
-#ifdef CONFIG_HIGHMEM
-   FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
-   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1,
-#endif
-
 #ifdef CONFIG_IOREMAP_FIXED
/*
 * FIX_IOREMAP entries are useful for mapping physical address
--- a/arch/sh/include/asm/kmap_types.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __SH_KMAP_TYPES_H
-#define __SH_KMAP_TYPES_H
-
-/* Dummy header just to define km_type. */
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-#define  __WITH_KM_FENCE
-#endif
-
-#include 
-
-#undef __WITH_KM_FENCE
-
-#endif
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -362,9 +362,6 @@ void __init mem_init(void)
mem_init_print_info(NULL);
pr_info("virtual kernel memory layout:\n"
"fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-#ifdef CONFIG_HIGHMEM
-   "pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-#endif
"vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
"lowmem  : 0x%08lx - 0x%08lx   (%4ld MB) (cached)\n"
 #ifdef CONFIG_UNCACHED_MAPPING
@@ -376,11 +373,6 @@ void __init mem_init(void)
FIXADDR_START, FIXADDR_TOP,
(FIXADDR_TOP - FIXADDR_START) >> 10,
 
-#ifdef CONFIG_HIGHMEM
-   PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
-   (LAST_PKMAP*PAGE_SIZE) >> 10,
-#endif
-
(unsigned long)VMALLOC_START, VMALLOC_END,
(VMALLOC_END - VMALLOC_START) >> 20,
 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 01/37] mm/highmem: Un-EXPORT __kmap_atomic_idx()

2020-11-03 Thread Thomas Gleixner
Nothing in modules can use that.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Christoph Hellwig 
Cc: Andrew Morton 
Cc: linux...@kvack.org
---
 mm/highmem.c |2 --
 1 file changed, 2 deletions(-)

--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -108,8 +108,6 @@ static inline wait_queue_head_t *get_pkm
 atomic_long_t _totalhigh_pages __read_mostly;
 EXPORT_SYMBOL(_totalhigh_pages);
 
-EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
-
 unsigned int nr_free_highpages (void)
 {
struct zone *zone;

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 03/37] fs: Remove asm/kmap_types.h includes

2020-11-03 Thread Thomas Gleixner
Historical leftovers from the time where kmap() had fixed slots.

Signed-off-by: Thomas Gleixner 
Cc: Alexander Viro 
Cc: Benjamin LaHaise 
Cc: linux-fsde...@vger.kernel.org
Cc: linux-...@kvack.org
Cc: Chris Mason 
Cc: Josef Bacik 
Cc: David Sterba 
Cc: linux-bt...@vger.kernel.org
---
 fs/aio.c |1 -
 fs/btrfs/ctree.h |1 -
 2 files changed, 2 deletions(-)

--- a/fs/aio.c
+++ b/fs/aio.c
@@ -43,7 +43,6 @@
 #include 
 #include 
 
-#include 
 #include 
 #include 
 
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -17,7 +17,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 05/37] asm-generic: Provide kmap_size.h

2020-11-03 Thread Thomas Gleixner
kmap_types.h is a misnomer because the old atomic MAP based array does not
exist anymore and the whole indirection of architectures including
kmap_types.h is inconinstent and does not allow to provide guard page
debugging for this misfeature.

Add a common header file which defines the mapping stack size for all
architectures. Will be used when converting architectures over to a
generic kmap_local/atomic implementation.

The array size is chosen with the following constraints in mind:

- The deepest nest level in one context is 3 according to code
  inspection.

- The worst case nesting for the upcoming reemptible version would be:

  2 maps in task context and a fault inside
  2 maps in the fault handler
  3 maps in softirq
  2 maps in interrupt

So a total of 16 is sufficient and probably overestimated.

Signed-off-by: Thomas Gleixner 
---
V3: New patch
---
 include/asm-generic/Kbuild  |1 +
 include/asm-generic/kmap_size.h |   12 
 2 files changed, 13 insertions(+)

--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -31,6 +31,7 @@ mandatory-y += irq_regs.h
 mandatory-y += irq_work.h
 mandatory-y += kdebug.h
 mandatory-y += kmap_types.h
+mandatory-y += kmap_size.h
 mandatory-y += kprobes.h
 mandatory-y += linkage.h
 mandatory-y += local.h
--- /dev/null
+++ b/include/asm-generic/kmap_size.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_KMAP_SIZE_H
+#define _ASM_GENERIC_KMAP_SIZE_H
+
+/* For debug this provides guard pages between the maps */
+#ifdef CONFIG_DEBUG_HIGHMEM
+# define KM_MAX_IDX33
+#else
+# define KM_MAX_IDX16
+#endif
+
+#endif

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 08/37] x86/mm/highmem: Use generic kmap atomic implementation

2020-11-03 Thread Thomas Gleixner
Convert X86 to the generic kmap atomic implementation and make the
iomap_atomic() naming convention consistent while at it.

Signed-off-by: Thomas Gleixner 
Cc: x...@kernel.org
---
V3: Remove the kmap_types cruft
---
 arch/x86/Kconfig  |3 +
 arch/x86/include/asm/fixmap.h |5 +-
 arch/x86/include/asm/highmem.h|   13 +--
 arch/x86/include/asm/iomap.h  |   18 +-
 arch/x86/include/asm/kmap_types.h |   13 ---
 arch/x86/include/asm/paravirt_types.h |1 
 arch/x86/mm/highmem_32.c  |   59 --
 arch/x86/mm/init_32.c |   15 
 arch/x86/mm/iomap_32.c|   59 ++
 include/linux/highmem.h   |2 -
 include/linux/io-mapping.h|2 -
 mm/highmem.c  |2 -
 12 files changed, 31 insertions(+), 161 deletions(-)

--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -14,10 +14,11 @@ config X86_32
select ARCH_WANT_IPC_PARSE_VERSION
select CLKSRC_I8253
select CLONE_BACKWARDS
+   select GENERIC_VDSO_32
select HAVE_DEBUG_STACKOVERFLOW
+   select KMAP_LOCAL
select MODULES_USE_ELF_REL
select OLD_SIGACTION
-   select GENERIC_VDSO_32
 
 config X86_64
def_bool y
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -31,7 +31,7 @@
 #include 
 #ifdef CONFIG_X86_32
 #include 
-#include 
+#include 
 #else
 #include 
 #endif
@@ -94,7 +94,7 @@ enum fixed_addresses {
 #endif
 #ifdef CONFIG_X86_32
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
-   FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
 #ifdef CONFIG_PCI_MMCONFIG
FIX_PCIE_MCFG,
 #endif
@@ -151,7 +151,6 @@ extern void reserve_top_address(unsigned
 
 extern int fixmaps_set;
 
-extern pte_t *kmap_pte;
 extern pte_t *pkmap_page_table;
 
 void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -23,7 +23,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -58,11 +57,17 @@ extern unsigned long highstart_pfn, high
 #define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
 
-void *kmap_atomic_pfn(unsigned long pfn);
-void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
-
 #define flush_cache_kmaps()do { } while (0)
 
+#definearch_kmap_local_post_map(vaddr, pteval) \
+   arch_flush_lazy_mmu_mode()
+
+#definearch_kmap_local_post_unmap(vaddr)   \
+   do {\
+   flush_tlb_one_kernel((vaddr));  \
+   arch_flush_lazy_mmu_mode(); \
+   } while (0)
+
 extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
unsigned long end_pfn);
 
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -9,19 +9,21 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
-void __iomem *
-iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
+void __iomem *iomap_atomic_pfn_prot(unsigned long pfn, pgprot_t prot);
 
-void
-iounmap_atomic(void __iomem *kvaddr);
+static inline void iounmap_atomic(void __iomem *vaddr)
+{
+   kunmap_local_indexed((void __force *)vaddr);
+   pagefault_enable();
+   preempt_enable();
+}
 
-int
-iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
+int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
 
-void
-iomap_free(resource_size_t base, unsigned long size);
+void iomap_free(resource_size_t base, unsigned long size);
 
 #endif /* _ASM_X86_IOMAP_H */
--- a/arch/x86/include/asm/kmap_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_KMAP_TYPES_H
-#define _ASM_X86_KMAP_TYPES_H
-
-#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM)
-#define  __WITH_KM_FENCE
-#endif
-
-#include 
-
-#undef __WITH_KM_FENCE
-
-#endif /* _ASM_X86_KMAP_TYPES_H */
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -41,7 +41,6 @@
 #ifndef __ASSEMBLY__
 
 #include 
-#include 
 #include 
 #include 
 
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -4,65 +4,6 @@
 #include  /* for totalram_pages */
 #include 
 
-void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
-{
-   unsigned long vaddr;
-   int idx, type;
-
-   type = kmap_atomic_idx_push();
-   idx = type + KM_TYPE_NR*smp_processor_id();
-   vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-   BUG_ON(!pte_none(*(kmap_pte-idx)));
-   set_pte(kmap_pte-idx, mk_pte(page, prot));
-   arch_flush_lazy_mmu_mode();
-
-   return 

[patch V3 07/37] highmem: Make DEBUG_HIGHMEM functional

2020-11-03 Thread Thomas Gleixner
For some obscure reason when CONFIG_DEBUG_HIGHMEM is enabled the stack
depth is increased from 20 to 41. But the only thing DEBUG_HIGHMEM does is
to enable a few BUG_ON()'s in the mapping code.

That's a leftover from the historical mapping code which had fixed entries
for various purposes. DEBUG_HIGHMEM inserted guard mappings between the map
types. But that got all ditched when kmap_atomic() switched to a stack
based map management. Though the WITH_KM_FENCE magic survived without being
functional. All the thing does today is to increase the stack depth.

Add a working implementation to the generic kmap_local* implementation.

Signed-off-by: Thomas Gleixner 
---
V3: New patch
---
 mm/highmem.c |   14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -374,9 +374,19 @@ EXPORT_SYMBOL(kunmap_high);
 
 static DEFINE_PER_CPU(int, __kmap_local_idx);
 
+/*
+ * With DEBUG_HIGHMEM the stack depth is doubled and every second
+ * slot is unused which acts as a guard page
+ */
+#ifdef CONFIG_DEBUG_HIGHMEM
+# define KM_INCR   2
+#else
+# define KM_INCR   1
+#endif
+
 static inline int kmap_local_idx_push(void)
 {
-   int idx = __this_cpu_inc_return(__kmap_local_idx) - 1;
+   int idx = __this_cpu_add_return(__kmap_local_idx, KM_INCR) - 1;
 
WARN_ON_ONCE(in_irq() && !irqs_disabled());
BUG_ON(idx >= KM_MAX_IDX);
@@ -390,7 +400,7 @@ static inline int kmap_local_idx(void)
 
 static inline void kmap_local_idx_pop(void)
 {
-   int idx = __this_cpu_dec_return(__kmap_local_idx);
+   int idx = __this_cpu_sub_return(__kmap_local_idx, KM_INCR);
 
BUG_ON(idx < 0);
 }

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[patch V3 10/37] ARM: highmem: Switch to generic kmap atomic

2020-11-03 Thread Thomas Gleixner
No reason having the same code in every architecture.

Signed-off-by: Thomas Gleixner 
Cc: Russell King 
Cc: Arnd Bergmann 
Cc: linux-arm-ker...@lists.infradead.org
---
V3: Remove the kmap types cruft
---
 arch/arm/Kconfig  |1 
 arch/arm/include/asm/fixmap.h |4 -
 arch/arm/include/asm/highmem.h|   33 +++---
 arch/arm/include/asm/kmap_types.h |   10 ---
 arch/arm/mm/Makefile  |1 
 arch/arm/mm/highmem.c |  121 --
 6 files changed, 26 insertions(+), 144 deletions(-)

--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1498,6 +1498,7 @@ config HAVE_ARCH_PFN_VALID
 config HIGHMEM
bool "High Memory Support"
depends on MMU
+   select KMAP_LOCAL
help
  The address space of ARM processors is only 4 Gigabytes large
  and it has to accommodate user address space, kernel address
--- a/arch/arm/include/asm/fixmap.h
+++ b/arch/arm/include/asm/fixmap.h
@@ -7,14 +7,14 @@
 #define FIXADDR_TOP(FIXADDR_END - PAGE_SIZE)
 
 #include 
-#include 
+#include 
 
 enum fixed_addresses {
FIX_EARLYCON_MEM_BASE,
__end_of_permanent_fixed_addresses,
 
FIX_KMAP_BEGIN = __end_of_permanent_fixed_addresses,
-   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1,
+   FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
 
/* Support writing RO kernel text via kprobes, jump labels, etc. */
FIX_TEXT_POKE0,
--- a/arch/arm/include/asm/highmem.h
+++ b/arch/arm/include/asm/highmem.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_HIGHMEM_H
 #define _ASM_HIGHMEM_H
 
-#include 
+#include 
 
 #define PKMAP_BASE (PAGE_OFFSET - PMD_SIZE)
 #define LAST_PKMAP PTRS_PER_PTE
@@ -46,19 +46,32 @@ extern pte_t *pkmap_page_table;
 
 #ifdef ARCH_NEEDS_KMAP_HIGH_GET
 extern void *kmap_high_get(struct page *page);
-#else
+
+static inline void *arch_kmap_local_high_get(struct page *page)
+{
+   if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !cache_is_vivt())
+   return NULL;
+   return kmap_high_get(page);
+}
+#define arch_kmap_local_high_get arch_kmap_local_high_get
+
+#else /* ARCH_NEEDS_KMAP_HIGH_GET */
 static inline void *kmap_high_get(struct page *page)
 {
return NULL;
 }
-#endif
+#endif /* !ARCH_NEEDS_KMAP_HIGH_GET */
 
-/*
- * The following functions are already defined by 
- * when CONFIG_HIGHMEM is not set.
- */
-#ifdef CONFIG_HIGHMEM
-extern void *kmap_atomic_pfn(unsigned long pfn);
-#endif
+#define arch_kmap_local_post_map(vaddr, pteval)
\
+   local_flush_tlb_kernel_page(vaddr)
+
+#define arch_kmap_local_pre_unmap(vaddr)   \
+do {   \
+   if (cache_is_vivt())\
+   __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); \
+} while (0)
+
+#define arch_kmap_local_post_unmap(vaddr)  \
+   local_flush_tlb_kernel_page(vaddr)
 
 #endif
--- a/arch/arm/include/asm/kmap_types.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ARM_KMAP_TYPES_H
-#define __ARM_KMAP_TYPES_H
-
-/*
- * This is the "bare minimum".  AIO seems to require this.
- */
-#define KM_TYPE_NR 16
-
-#endif
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_MODULES) += proc-syms.o
 obj-$(CONFIG_DEBUG_VIRTUAL)+= physaddr.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)   += alignment.o
-obj-$(CONFIG_HIGHMEM)  += highmem.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_ARM_PV_FIXUP) += pv-fixup-asm.o
 
--- a/arch/arm/mm/highmem.c
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch/arm/mm/highmem.c -- ARM highmem support
- *
- * Author: Nicolas Pitre
- * Created:september 8, 2008
- * Copyright:  Marvell Semiconductors Inc.
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include "mm.h"
-
-static inline void set_fixmap_pte(int idx, pte_t pte)
-{
-   unsigned long vaddr = __fix_to_virt(idx);
-   pte_t *ptep = virt_to_kpte(vaddr);
-
-   set_pte_ext(ptep, pte, 0);
-   local_flush_tlb_kernel_page(vaddr);
-}
-
-static inline pte_t get_fixmap_pte(unsigned long vaddr)
-{
-   pte_t *ptep = virt_to_kpte(vaddr);
-
-   return *ptep;
-}
-
-void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
-{
-   unsigned int idx;
-   unsigned long vaddr;
-   void *kmap;
-   int type;
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-   /*
-* There is no cache coherency issue when non VIVT, so force the
-* dedicated kmap usage for better debugging purposes in that case.
-*/
-   if (!cache_is_vivt())
-   kmap = NULL;
-   else
-#endif
-   kmap = kmap_high_get(page);
-   if (kmap)
-   return kmap;
-
-   type = 

[patch V3 06/37] highmem: Provide generic variant of kmap_atomic*

2020-11-03 Thread Thomas Gleixner
The kmap_atomic* interfaces in all architectures are pretty much the same
except for post map operations (flush) and pre- and post unmap operations.

Provide a generic variant for that.

Signed-off-by: Thomas Gleixner 
Cc: Andrew Morton 
Cc: linux...@kvack.org
---
V3: Do not reuse the kmap_atomic_idx pile and use kmap_size.h right away
V2: Address review comments from Christoph (style and EXPORT variant)
---
 include/linux/highmem.h |   82 ++-
 mm/Kconfig  |3 +
 mm/highmem.c|  144 +++-
 3 files changed, 211 insertions(+), 18 deletions(-)

--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -31,9 +31,16 @@ static inline void invalidate_kernel_vma
 
 #include 
 
+/*
+ * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft.
+ */
+#ifdef CONFIG_KMAP_LOCAL
+void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
+void *__kmap_local_page_prot(struct page *page, pgprot_t prot);
+void kunmap_local_indexed(void *vaddr);
+#endif
+
 #ifdef CONFIG_HIGHMEM
-extern void *kmap_atomic_high_prot(struct page *page, pgprot_t prot);
-extern void kunmap_atomic_high(void *kvaddr);
 #include 
 
 #ifndef ARCH_HAS_KMAP_FLUSH_TLB
@@ -81,6 +88,11 @@ static inline void kunmap(struct page *p
  * be used in IRQ contexts, so in some (very limited) cases we need
  * it.
  */
+
+#ifndef CONFIG_KMAP_LOCAL
+void *kmap_atomic_high_prot(struct page *page, pgprot_t prot);
+void kunmap_atomic_high(void *kvaddr);
+
 static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 {
preempt_disable();
@@ -89,7 +101,38 @@ static inline void *kmap_atomic_prot(str
return page_address(page);
return kmap_atomic_high_prot(page, prot);
 }
-#define kmap_atomic(page)  kmap_atomic_prot(page, kmap_prot)
+
+static inline void __kunmap_atomic(void *vaddr)
+{
+   kunmap_atomic_high(vaddr);
+}
+#else /* !CONFIG_KMAP_LOCAL */
+
+static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+{
+   preempt_disable();
+   pagefault_disable();
+   return __kmap_local_page_prot(page, prot);
+}
+
+static inline void *kmap_atomic_pfn(unsigned long pfn)
+{
+   preempt_disable();
+   pagefault_disable();
+   return __kmap_local_pfn_prot(pfn, kmap_prot);
+}
+
+static inline void __kunmap_atomic(void *addr)
+{
+   kunmap_local_indexed(addr);
+}
+
+#endif /* CONFIG_KMAP_LOCAL */
+
+static inline void *kmap_atomic(struct page *page)
+{
+   return kmap_atomic_prot(page, kmap_prot);
+}
 
 /* declarations for linux/mm/highmem.c */
 unsigned int nr_free_highpages(void);
@@ -147,25 +190,33 @@ static inline void *kmap_atomic(struct p
pagefault_disable();
return page_address(page);
 }
-#define kmap_atomic_prot(page, prot)   kmap_atomic(page)
 
-static inline void kunmap_atomic_high(void *addr)
+static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+{
+   return kmap_atomic(page);
+}
+
+static inline void *kmap_atomic_pfn(unsigned long pfn)
+{
+   return kmap_atomic(pfn_to_page(pfn));
+}
+
+static inline void __kunmap_atomic(void *addr)
 {
/*
 * Mostly nothing to do in the CONFIG_HIGHMEM=n case as kunmap_atomic()
-* handles re-enabling faults + preemption
+* handles re-enabling faults and preemption
 */
 #ifdef ARCH_HAS_FLUSH_ON_KUNMAP
kunmap_flush_on_unmap(addr);
 #endif
 }
 
-#define kmap_atomic_pfn(pfn)   kmap_atomic(pfn_to_page(pfn))
-
 #define kmap_flush_unused()do {} while(0)
 
 #endif /* CONFIG_HIGHMEM */
 
+#if !defined(CONFIG_KMAP_LOCAL)
 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
 
 DECLARE_PER_CPU(int, __kmap_atomic_idx);
@@ -196,22 +247,21 @@ static inline void kmap_atomic_idx_pop(v
__this_cpu_dec(__kmap_atomic_idx);
 #endif
 }
-
+#endif
 #endif
 
 /*
  * Prevent people trying to call kunmap_atomic() as if it were kunmap()
  * kunmap_atomic() should get the return value of kmap_atomic, not the page.
  */
-#define kunmap_atomic(addr) \
-do {\
-   BUILD_BUG_ON(__same_type((addr), struct page *));   \
-   kunmap_atomic_high(addr);  \
-   pagefault_enable(); \
-   preempt_enable();   \
+#define kunmap_atomic(__addr)  \
+do {   \
+   BUILD_BUG_ON(__same_type((__addr), struct page *)); \
+   __kunmap_atomic(__addr);\
+   pagefault_enable(); \
+   preempt_enable();   \
 } while (0)
 
-
 /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */
 #ifndef clear_user_highpage
 static inline void 

[patch V3 02/37] highmem: Remove unused functions

2020-11-03 Thread Thomas Gleixner
Nothing uses totalhigh_pages_dec() and totalhigh_pages_set().

Signed-off-by: Thomas Gleixner 
---
V3: New patch
---
 include/linux/highmem.h |   10 --
 1 file changed, 10 deletions(-)

--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -104,21 +104,11 @@ static inline void totalhigh_pages_inc(v
atomic_long_inc(&_totalhigh_pages);
 }
 
-static inline void totalhigh_pages_dec(void)
-{
-   atomic_long_dec(&_totalhigh_pages);
-}
-
 static inline void totalhigh_pages_add(long count)
 {
atomic_long_add(count, &_totalhigh_pages);
 }
 
-static inline void totalhigh_pages_set(long val)
-{
-   atomic_long_set(&_totalhigh_pages, val);
-}
-
 void kmap_flush_unused(void);
 
 struct page *kmap_to_page(void *addr);

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 04/17] vhost: prep vhost_dev_init users to handle failures

2020-11-03 Thread Dan Carpenter
Hi Mike,

url:
https://github.com/0day-ci/linux/commits/Mike-Christie/vhost-fix-scsi-cmd-handling-and-cgroup-support/20201022-083844
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
config: i386-randconfig-m021-20201101 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 
Reported-by: Dan Carpenter 

smatch warnings:
drivers/vhost/vsock.c:648 vhost_vsock_dev_open() error: uninitialized symbol 
'ret'.

vim +/ret +648 drivers/vhost/vsock.c

433fc58e6bf2c8b Asias He2016-07-28  605  static int 
vhost_vsock_dev_open(struct inode *inode, struct file *file)
433fc58e6bf2c8b Asias He2016-07-28  606  {
433fc58e6bf2c8b Asias He2016-07-28  607 struct vhost_virtqueue 
**vqs;
433fc58e6bf2c8b Asias He2016-07-28  608 struct vhost_vsock 
*vsock;
433fc58e6bf2c8b Asias He2016-07-28  609 int ret;
433fc58e6bf2c8b Asias He2016-07-28  610  
433fc58e6bf2c8b Asias He2016-07-28  611 /* This struct is large 
and allocation could fail, fall back to vmalloc
433fc58e6bf2c8b Asias He2016-07-28  612  * if there is no other 
way.
433fc58e6bf2c8b Asias He2016-07-28  613  */
dcda9b04713c3f6 Michal Hocko2017-07-12  614 vsock = 
kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
433fc58e6bf2c8b Asias He2016-07-28  615 if (!vsock)
433fc58e6bf2c8b Asias He2016-07-28  616 return -ENOMEM;
433fc58e6bf2c8b Asias He2016-07-28  617  
433fc58e6bf2c8b Asias He2016-07-28  618 vqs = 
kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
433fc58e6bf2c8b Asias He2016-07-28  619 if (!vqs) {
433fc58e6bf2c8b Asias He2016-07-28  620 ret = -ENOMEM;
433fc58e6bf2c8b Asias He2016-07-28  621 goto out;
433fc58e6bf2c8b Asias He2016-07-28  622 }
433fc58e6bf2c8b Asias He2016-07-28  623  
a72b69dc083a931 Stefan Hajnoczi 2017-11-09  624 vsock->guest_cid = 0; 
/* no CID assigned yet */
a72b69dc083a931 Stefan Hajnoczi 2017-11-09  625  
433fc58e6bf2c8b Asias He2016-07-28  626 
atomic_set(>queued_replies, 0);
433fc58e6bf2c8b Asias He2016-07-28  627  
433fc58e6bf2c8b Asias He2016-07-28  628 vqs[VSOCK_VQ_TX] = 
>vqs[VSOCK_VQ_TX];
433fc58e6bf2c8b Asias He2016-07-28  629 vqs[VSOCK_VQ_RX] = 
>vqs[VSOCK_VQ_RX];
433fc58e6bf2c8b Asias He2016-07-28  630 
vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
433fc58e6bf2c8b Asias He2016-07-28  631 
vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
433fc58e6bf2c8b Asias He2016-07-28  632  
6e1629548d318c2 Mike Christie   2020-10-21  633 if 
(vhost_dev_init(>dev, vqs, ARRAY_SIZE(vsock->vqs),
e82b9b0727ff6d6 Jason Wang  2019-05-17  634
UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
6e1629548d318c2 Mike Christie   2020-10-21  635
VHOST_VSOCK_WEIGHT, true, NULL))
6e1629548d318c2 Mike Christie   2020-10-21  636 goto 
err_dev_init;

^
"ret" needs to be set here.

433fc58e6bf2c8b Asias He2016-07-28  637  
433fc58e6bf2c8b Asias He2016-07-28  638 file->private_data = 
vsock;
433fc58e6bf2c8b Asias He2016-07-28  639 
spin_lock_init(>send_pkt_list_lock);
433fc58e6bf2c8b Asias He2016-07-28  640 
INIT_LIST_HEAD(>send_pkt_list);
433fc58e6bf2c8b Asias He2016-07-28  641 
vhost_work_init(>send_pkt_work, vhost_transport_send_pkt_work);
433fc58e6bf2c8b Asias He2016-07-28  642 return 0;
433fc58e6bf2c8b Asias He2016-07-28  643  
6e1629548d318c2 Mike Christie   2020-10-21  644  err_dev_init:
6e1629548d318c2 Mike Christie   2020-10-21  645 kfree(vqs);
433fc58e6bf2c8b Asias He2016-07-28  646  out:
433fc58e6bf2c8b Asias He2016-07-28  647 vhost_vsock_free(vsock);
433fc58e6bf2c8b Asias He2016-07-28 @648 return ret;
433fc58e6bf2c8b Asias He2016-07-28  649  }

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v7 09/10] dma-buf-map: Add memcpy and pointer-increment interfaces

2020-11-03 Thread Thomas Zimmermann
To do framebuffer updates, one needs memcpy from system memory and a
pointer-increment function. Add both interfaces with documentation.

v5:
* include  to build on sparc64 (Sam)

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Sam Ravnborg 
Tested-by: Sam Ravnborg 
---
 include/linux/dma-buf-map.h | 73 -
 1 file changed, 63 insertions(+), 10 deletions(-)

diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h
index 2e8bbecb5091..583a3a1f9447 100644
--- a/include/linux/dma-buf-map.h
+++ b/include/linux/dma-buf-map.h
@@ -7,6 +7,7 @@
 #define __DMA_BUF_MAP_H__
 
 #include 
+#include 
 
 /**
  * DOC: overview
@@ -32,6 +33,14 @@
  * accessing the buffer. Use the returned instance and the helper functions
  * to access the buffer's memory in the correct way.
  *
+ * The type :c:type:`struct dma_buf_map ` and its helpers are
+ * actually independent from the dma-buf infrastructure. When sharing buffers
+ * among devices, drivers have to know the location of the memory to access
+ * the buffers in a safe way. :c:type:`struct dma_buf_map `
+ * solves this problem for dma-buf and its users. If other drivers or
+ * sub-systems require similar functionality, the type could be generalized
+ * and moved to a more prominent header file.
+ *
  * Open-coding access to :c:type:`struct dma_buf_map ` is
  * considered bad style. Rather then accessing its fields directly, use one
  * of the provided helper functions, or implement your own. For example,
@@ -51,6 +60,14 @@
  *
  * dma_buf_map_set_vaddr_iomem( 0xdeadbeaf);
  *
+ * Instances of struct dma_buf_map do not have to be cleaned up, but
+ * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings
+ * always refer to system memory.
+ *
+ * .. code-block:: c
+ *
+ * dma_buf_map_clear();
+ *
  * Test if a mapping is valid with either dma_buf_map_is_set() or
  * dma_buf_map_is_null().
  *
@@ -73,17 +90,19 @@
  * if (dma_buf_map_is_equal(_map, _map))
  * // always false
  *
- * Instances of struct dma_buf_map do not have to be cleaned up, but
- * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings
- * always refer to system memory.
+ * A set up instance of struct dma_buf_map can be used to access or manipulate
+ * the buffer memory. Depending on the location of the memory, the provided
+ * helpers will pick the correct operations. Data can be copied into the memory
+ * with dma_buf_map_memcpy_to(). The address can be manipulated with
+ * dma_buf_map_incr().
  *
- * The type :c:type:`struct dma_buf_map ` and its helpers are
- * actually independent from the dma-buf infrastructure. When sharing buffers
- * among devices, drivers have to know the location of the memory to access
- * the buffers in a safe way. :c:type:`struct dma_buf_map `
- * solves this problem for dma-buf and its users. If other drivers or
- * sub-systems require similar functionality, the type could be generalized
- * and moved to a more prominent header file.
+ * .. code-block:: c
+ *
+ * const void *src = ...; // source buffer
+ * size_t len = ...; // length of src
+ *
+ * dma_buf_map_memcpy_to(, src, len);
+ * dma_buf_map_incr(, len); // go to first byte after the memcpy
  */
 
 /**
@@ -210,4 +229,38 @@ static inline void dma_buf_map_clear(struct dma_buf_map 
*map)
}
 }
 
+/**
+ * dma_buf_map_memcpy_to - Memcpy into dma-buf mapping
+ * @dst:   The dma-buf mapping structure
+ * @src:   The source buffer
+ * @len:   The number of byte in src
+ *
+ * Copies data into a dma-buf mapping. The source buffer is in system
+ * memory. Depending on the buffer's location, the helper picks the correct
+ * method of accessing the memory.
+ */
+static inline void dma_buf_map_memcpy_to(struct dma_buf_map *dst, const void 
*src, size_t len)
+{
+   if (dst->is_iomem)
+   memcpy_toio(dst->vaddr_iomem, src, len);
+   else
+   memcpy(dst->vaddr, src, len);
+}
+
+/**
+ * dma_buf_map_incr - Increments the address stored in a dma-buf mapping
+ * @map:   The dma-buf mapping structure
+ * @incr:  The number of bytes to increment
+ *
+ * Increments the address stored in a dma-buf mapping. Depending on the
+ * buffer's location, the correct value will be updated.
+ */
+static inline void dma_buf_map_incr(struct dma_buf_map *map, size_t incr)
+{
+   if (map->is_iomem)
+   map->vaddr_iomem += incr;
+   else
+   map->vaddr += incr;
+}
+
 #endif /* __DMA_BUF_MAP_H__ */
-- 
2.29.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v7 10/10] drm/fb_helper: Support framebuffers in I/O memory

2020-11-03 Thread Thomas Zimmermann
At least sparc64 requires I/O-specific access to framebuffers. This
patch updates the fbdev console accordingly.

For drivers with direct access to the framebuffer memory, the callback
functions in struct fb_ops test for the type of memory and call the rsp
fb_sys_ of fb_cfb_ functions. Read and write operations are implemented
internally by DRM's fbdev helper.

For drivers that employ a shadow buffer, fbdev's blit function retrieves
the framebuffer address as struct dma_buf_map, and uses dma_buf_map
interfaces to access the buffer.

The bochs driver on sparc64 uses a workaround to flag the framebuffer as
I/O memory and avoid a HW exception. With the introduction of struct
dma_buf_map, this is not required any longer. The patch removes the rsp
code from both, bochs and fbdev.

v7:
* use min_t(size_t,) (kernel test robot)
* return the number of bytes read/written, if any (fbdev testcase)
v5:
* implement fb_read/fb_write internally (Daniel, Sam)
v4:
* move dma_buf_map changes into separate patch (Daniel)
* TODO list: comment on fbdev updates (Daniel)

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Daniel Vetter 
Reviewed-by: Sam Ravnborg 
Tested-by: Sam Ravnborg 
---
 Documentation/gpu/todo.rst|  19 ++-
 drivers/gpu/drm/bochs/bochs_kms.c |   1 -
 drivers/gpu/drm/drm_fb_helper.c   | 220 --
 include/drm/drm_mode_config.h |  12 --
 4 files changed, 223 insertions(+), 29 deletions(-)

diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index 59f63f1d7680..acca232b025b 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -201,13 +201,28 @@ Convert drivers to use drm_fbdev_generic_setup()
 
 
 Most drivers can use drm_fbdev_generic_setup(). Driver have to implement
-atomic modesetting and GEM vmap support. Current generic fbdev emulation
-expects the framebuffer in system memory (or system-like memory).
+atomic modesetting and GEM vmap support. Historically, generic fbdev emulation
+expected the framebuffer in system memory or system-like memory. By employing
+struct dma_buf_map, drivers with frambuffers in I/O memory can be supported
+as well.
 
 Contact: Maintainer of the driver you plan to convert
 
 Level: Intermediate
 
+Reimplement functions in drm_fbdev_fb_ops without fbdev
+---
+
+A number of callback functions in drm_fbdev_fb_ops could benefit from
+being rewritten without dependencies on the fbdev module. Some of the
+helpers could further benefit from using struct dma_buf_map instead of
+raw pointers.
+
+Contact: Thomas Zimmermann , Daniel Vetter
+
+Level: Advanced
+
+
 drm_framebuffer_funcs and drm_mode_config_funcs.fb_create cleanup
 -
 
diff --git a/drivers/gpu/drm/bochs/bochs_kms.c 
b/drivers/gpu/drm/bochs/bochs_kms.c
index 13d0d04c4457..853081d186d5 100644
--- a/drivers/gpu/drm/bochs/bochs_kms.c
+++ b/drivers/gpu/drm/bochs/bochs_kms.c
@@ -151,7 +151,6 @@ int bochs_kms_init(struct bochs_device *bochs)
bochs->dev->mode_config.preferred_depth = 24;
bochs->dev->mode_config.prefer_shadow = 0;
bochs->dev->mode_config.prefer_shadow_fbdev = 1;
-   bochs->dev->mode_config.fbdev_use_iomem = true;
bochs->dev->mode_config.quirk_addfb_prefer_host_byte_order = true;
 
bochs->dev->mode_config.funcs = _mode_funcs;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index a0d88130fedb..01ba1da28511 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -372,24 +372,22 @@ static void drm_fb_helper_resume_worker(struct 
work_struct *work)
 }
 
 static void drm_fb_helper_dirty_blit_real(struct drm_fb_helper *fb_helper,
- struct drm_clip_rect *clip)
+ struct drm_clip_rect *clip,
+ struct dma_buf_map *dst)
 {
struct drm_framebuffer *fb = fb_helper->fb;
unsigned int cpp = fb->format->cpp[0];
size_t offset = clip->y1 * fb->pitches[0] + clip->x1 * cpp;
void *src = fb_helper->fbdev->screen_buffer + offset;
-   void *dst = fb_helper->buffer->map.vaddr + offset;
size_t len = (clip->x2 - clip->x1) * cpp;
unsigned int y;
 
-   for (y = clip->y1; y < clip->y2; y++) {
-   if (!fb_helper->dev->mode_config.fbdev_use_iomem)
-   memcpy(dst, src, len);
-   else
-   memcpy_toio((void __iomem *)dst, src, len);
+   dma_buf_map_incr(dst, offset); /* go to first pixel within clip rect */
 
+   for (y = clip->y1; y < clip->y2; y++) {
+   dma_buf_map_memcpy_to(dst, src, len);
+   dma_buf_map_incr(dst, fb->pitches[0]);
src += fb->pitches[0];
-   dst += fb->pitches[0];
  

[PATCH v7 01/10] drm/vram-helper: Remove invariant parameters from internal kmap function

2020-11-03 Thread Thomas Zimmermann
The parameters map and is_iomem are always of the same value. Removed them
to prepares the function for conversion to struct dma_buf_map.

v4:
* don't check for !kmap->virtual; will always be false

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Daniel Vetter 
Reviewed-by: Christian König 
Tested-by: Sam Ravnborg 
---
 drivers/gpu/drm/drm_gem_vram_helper.c | 18 --
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c 
b/drivers/gpu/drm/drm_gem_vram_helper.c
index 16d68c04ea5d..e305fadb8bc8 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -378,32 +378,22 @@ int drm_gem_vram_unpin(struct drm_gem_vram_object *gbo)
 }
 EXPORT_SYMBOL(drm_gem_vram_unpin);
 
-static void *drm_gem_vram_kmap_locked(struct drm_gem_vram_object *gbo,
- bool map, bool *is_iomem)
+static void *drm_gem_vram_kmap_locked(struct drm_gem_vram_object *gbo)
 {
int ret;
struct ttm_bo_kmap_obj *kmap = >kmap;
+   bool is_iomem;
 
if (gbo->kmap_use_count > 0)
goto out;
 
-   if (kmap->virtual || !map)
-   goto out;
-
ret = ttm_bo_kmap(>bo, 0, gbo->bo.num_pages, kmap);
if (ret)
return ERR_PTR(ret);
 
 out:
-   if (!kmap->virtual) {
-   if (is_iomem)
-   *is_iomem = false;
-   return NULL; /* not mapped; don't increment ref */
-   }
++gbo->kmap_use_count;
-   if (is_iomem)
-   return ttm_kmap_obj_virtual(kmap, is_iomem);
-   return kmap->virtual;
+   return ttm_kmap_obj_virtual(kmap, _iomem);
 }
 
 static void drm_gem_vram_kunmap_locked(struct drm_gem_vram_object *gbo)
@@ -448,7 +438,7 @@ void *drm_gem_vram_vmap(struct drm_gem_vram_object *gbo)
ret = drm_gem_vram_pin_locked(gbo, 0);
if (ret)
goto err_ttm_bo_unreserve;
-   base = drm_gem_vram_kmap_locked(gbo, true, NULL);
+   base = drm_gem_vram_kmap_locked(gbo);
if (IS_ERR(base)) {
ret = PTR_ERR(base);
goto err_drm_gem_vram_unpin_locked;
-- 
2.29.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v7 05/10] drm/ttm: Add vmap/vunmap to TTM and TTM GEM helpers

2020-11-03 Thread Thomas Zimmermann
The new functions ttm_bo_{vmap,vunmap}() map and unmap a TTM BO in kernel
address space. The mapping's address is returned as struct dma_buf_map.
Each function is a simplified version of TTM's existing kmap code. Both
functions respect the memory's location ani/or writecombine flags.

On top TTM's functions, GEM TTM helpers got drm_gem_ttm_{vmap,vunmap}(),
two helpers that convert a GEM object into the TTM BO and forward the call
to TTM's vmap/vunmap. These helpers can be dropped into the rsp GEM object
callbacks.

v5:
* use size_t for storing mapping size (Christian)
* ignore premapped memory areas correctly in ttm_bo_vunmap()
* rebase onto latest TTM interfaces (Christian)
* remove BUG() from ttm_bo_vmap() (Christian)
v4:
* drop ttm_kmap_obj_to_dma_buf() in favor of vmap helpers (Daniel,
  Christian)

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Christian König 
Acked-by: Daniel Vetter 
Tested-by: Sam Ravnborg 
---
 drivers/gpu/drm/drm_gem_ttm_helper.c | 38 +++
 drivers/gpu/drm/ttm/ttm_bo_util.c| 72 
 include/drm/drm_gem_ttm_helper.h |  6 +++
 include/drm/ttm/ttm_bo_api.h | 28 +++
 include/linux/dma-buf-map.h  | 20 
 5 files changed, 164 insertions(+)

diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c 
b/drivers/gpu/drm/drm_gem_ttm_helper.c
index 0e4fb9ba43ad..db4c14d78a30 100644
--- a/drivers/gpu/drm/drm_gem_ttm_helper.c
+++ b/drivers/gpu/drm/drm_gem_ttm_helper.c
@@ -49,6 +49,44 @@ void drm_gem_ttm_print_info(struct drm_printer *p, unsigned 
int indent,
 }
 EXPORT_SYMBOL(drm_gem_ttm_print_info);
 
+/**
+ * drm_gem_ttm_vmap() - vmap _buffer_object
+ * @gem: GEM object.
+ * @map: [out] returns the dma-buf mapping.
+ *
+ * Maps a GEM object with ttm_bo_vmap(). This function can be used as
+ * _gem_object_funcs.vmap callback.
+ *
+ * Returns:
+ * 0 on success, or a negative errno code otherwise.
+ */
+int drm_gem_ttm_vmap(struct drm_gem_object *gem,
+struct dma_buf_map *map)
+{
+   struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+
+   return ttm_bo_vmap(bo, map);
+
+}
+EXPORT_SYMBOL(drm_gem_ttm_vmap);
+
+/**
+ * drm_gem_ttm_vunmap() - vunmap _buffer_object
+ * @gem: GEM object.
+ * @map: dma-buf mapping.
+ *
+ * Unmaps a GEM object with ttm_bo_vunmap(). This function can be used as
+ * _gem_object_funcs.vmap callback.
+ */
+void drm_gem_ttm_vunmap(struct drm_gem_object *gem,
+   struct dma_buf_map *map)
+{
+   struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+
+   ttm_bo_vunmap(bo, map);
+}
+EXPORT_SYMBOL(drm_gem_ttm_vunmap);
+
 /**
  * drm_gem_ttm_mmap() - mmap _buffer_object
  * @gem: GEM object.
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index ecb54415d1ca..7ccb2295cac1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -471,6 +472,77 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map)
 }
 EXPORT_SYMBOL(ttm_bo_kunmap);
 
+int ttm_bo_vmap(struct ttm_buffer_object *bo, struct dma_buf_map *map)
+{
+   struct ttm_resource *mem = >mem;
+   int ret;
+
+   ret = ttm_mem_io_reserve(bo->bdev, mem);
+   if (ret)
+   return ret;
+
+   if (mem->bus.is_iomem) {
+   void __iomem *vaddr_iomem;
+   size_t size = bo->num_pages << PAGE_SHIFT;
+
+   if (mem->bus.addr)
+   vaddr_iomem = (void __iomem *)mem->bus.addr;
+   else if (mem->bus.caching == ttm_write_combined)
+   vaddr_iomem = ioremap_wc(mem->bus.offset, size);
+   else
+   vaddr_iomem = ioremap(mem->bus.offset, size);
+
+   if (!vaddr_iomem)
+   return -ENOMEM;
+
+   dma_buf_map_set_vaddr_iomem(map, vaddr_iomem);
+
+   } else {
+   struct ttm_operation_ctx ctx = {
+   .interruptible = false,
+   .no_wait_gpu = false
+   };
+   struct ttm_tt *ttm = bo->ttm;
+   pgprot_t prot;
+   void *vaddr;
+
+   ret = ttm_tt_populate(bo->bdev, ttm, );
+   if (ret)
+   return ret;
+
+   /*
+* We need to use vmap to get the desired page protection
+* or to make the buffer object look contiguous.
+*/
+   prot = ttm_io_prot(bo, mem, PAGE_KERNEL);
+   vaddr = vmap(ttm->pages, bo->num_pages, 0, prot);
+   if (!vaddr)
+   return -ENOMEM;
+
+   dma_buf_map_set_vaddr(map, vaddr);
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL(ttm_bo_vmap);
+
+void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct dma_buf_map *map)
+{
+   struct ttm_resource *mem = 

[PATCH v7 06/10] drm/gem: Use struct dma_buf_map in GEM vmap ops and convert GEM backends

2020-11-03 Thread Thomas Zimmermann
This patch replaces the vmap/vunmap's use of raw pointers in GEM object
functions with instances of struct dma_buf_map. GEM backends are
converted as well. For most of them, this simply changes the returned type.

TTM-based drivers now return information about the location of the memory,
either system or I/O memory. GEM VRAM helpers and qxl now use ttm_bo_vmap()
et al. Amdgpu, nouveau and radeon use drm_gem_ttm_vmap() et al instead of
implementing their own vmap callbacks.

v7:
* init QXL cursor to mapped BO buffer (kernel test robot)
v5:
* update vkms after switch to shmem
v4:
* use ttm_bo_vmap(), drm_gem_ttm_vmap(), et al. (Daniel, Christian)
* fix a trailing { in drm_gem_vmap()
* remove several empty functions instead of converting them (Daniel)
* comment uses of raw pointers with a TODO (Daniel)
* TODO list: convert more helpers to use struct dma_buf_map

Signed-off-by: Thomas Zimmermann 
Acked-by: Christian König 
Tested-by: Sam Ravnborg 
---
 Documentation/gpu/todo.rst  |  18 
 drivers/gpu/drm/Kconfig |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c |  36 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |   1 -
 drivers/gpu/drm/ast/ast_cursor.c|  27 +++--
 drivers/gpu/drm/ast/ast_drv.h   |   7 +-
 drivers/gpu/drm/drm_gem.c   |  23 +++--
 drivers/gpu/drm/drm_gem_cma_helper.c|  10 +-
 drivers/gpu/drm/drm_gem_shmem_helper.c  |  48 +
 drivers/gpu/drm/drm_gem_vram_helper.c   | 107 ++--
 drivers/gpu/drm/etnaviv/etnaviv_drv.h   |   2 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c |   9 +-
 drivers/gpu/drm/lima/lima_gem.c |   6 +-
 drivers/gpu/drm/lima/lima_sched.c   |  11 +-
 drivers/gpu/drm/mgag200/mgag200_mode.c  |  10 +-
 drivers/gpu/drm/nouveau/Kconfig |   1 +
 drivers/gpu/drm/nouveau/nouveau_bo.h|   2 -
 drivers/gpu/drm/nouveau/nouveau_gem.c   |   6 +-
 drivers/gpu/drm/nouveau/nouveau_gem.h   |   2 -
 drivers/gpu/drm/nouveau/nouveau_prime.c |  20 
 drivers/gpu/drm/panfrost/panfrost_perfcnt.c |  14 +--
 drivers/gpu/drm/qxl/qxl_display.c   |  15 ++-
 drivers/gpu/drm/qxl/qxl_draw.c  |  14 ++-
 drivers/gpu/drm/qxl/qxl_drv.h   |  11 +-
 drivers/gpu/drm/qxl/qxl_object.c|  31 +++---
 drivers/gpu/drm/qxl/qxl_object.h|   2 +-
 drivers/gpu/drm/qxl/qxl_prime.c |  12 +--
 drivers/gpu/drm/radeon/radeon.h |   1 -
 drivers/gpu/drm/radeon/radeon_gem.c |   7 +-
 drivers/gpu/drm/radeon/radeon_prime.c   |  20 
 drivers/gpu/drm/rockchip/rockchip_drm_gem.c |  22 ++--
 drivers/gpu/drm/rockchip/rockchip_drm_gem.h |   4 +-
 drivers/gpu/drm/tiny/cirrus.c   |  10 +-
 drivers/gpu/drm/tiny/gm12u320.c |  10 +-
 drivers/gpu/drm/udl/udl_modeset.c   |   8 +-
 drivers/gpu/drm/vboxvideo/vbox_mode.c   |  11 +-
 drivers/gpu/drm/vc4/vc4_bo.c|   6 +-
 drivers/gpu/drm/vc4/vc4_drv.h   |   2 +-
 drivers/gpu/drm/vgem/vgem_drv.c |  16 ++-
 drivers/gpu/drm/vkms/vkms_plane.c   |  15 ++-
 drivers/gpu/drm/vkms/vkms_writeback.c   |  22 ++--
 drivers/gpu/drm/xen/xen_drm_front_gem.c |  18 ++--
 drivers/gpu/drm/xen/xen_drm_front_gem.h |   6 +-
 include/drm/drm_gem.h   |   5 +-
 include/drm/drm_gem_cma_helper.h|   2 +-
 include/drm/drm_gem_shmem_helper.h  |   4 +-
 include/drm/drm_gem_vram_helper.h   |  14 +--
 49 files changed, 349 insertions(+), 308 deletions(-)

diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index 6b224ef14455..59f63f1d7680 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -450,6 +450,24 @@ Contact: Ville Syrjälä, Daniel Vetter
 
 Level: Intermediate
 
+Use struct dma_buf_map throughout codebase
+--
+
+Pointers to shared device memory are stored in struct dma_buf_map. Each
+instance knows whether it refers to system or I/O memory. Most of the DRM-wide
+interface have been converted to use struct dma_buf_map, but implementations
+often still use raw pointers.
+
+The task is to use struct dma_buf_map where it makes sense.
+
+* Memory managers should use struct dma_buf_map for dma-buf-imported buffers.
+* TTM might benefit from using struct dma_buf_map internally.
+* Framebuffer copying and blitting helpers should operate on struct 
dma_buf_map.
+
+Contact: Thomas Zimmermann , Christian König, Daniel 
Vetter
+
+Level: Intermediate
+
 
 Core refactorings
 =
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 64376dd298ed..f5c7aa7894d5 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -232,6 +232,7 @@ config 

[PATCH v7 08/10] drm/gem: Store client buffer mappings as struct dma_buf_map

2020-11-03 Thread Thomas Zimmermann
Kernel DRM clients now store their framebuffer address in an instance
of struct dma_buf_map. Depending on the buffer's location, the address
refers to system or I/O memory.

Callers of drm_client_buffer_vmap() receive a copy of the value in
the call's supplied arguments. It can be accessed and modified with
dma_buf_map interfaces.

v6:
* don't call page_to_phys() on framebuffers in I/O memory;
  warn instead (Daniel)

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Daniel Vetter 
Tested-by: Sam Ravnborg 
---
 drivers/gpu/drm/drm_client.c| 34 +++--
 drivers/gpu/drm/drm_fb_helper.c | 32 ---
 include/drm/drm_client.h|  7 ---
 3 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index ac0082bed966..fe573acf1067 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -235,7 +235,7 @@ static void drm_client_buffer_delete(struct 
drm_client_buffer *buffer)
 {
struct drm_device *dev = buffer->client->dev;
 
-   drm_gem_vunmap(buffer->gem, buffer->vaddr);
+   drm_gem_vunmap(buffer->gem, >map);
 
if (buffer->gem)
drm_gem_object_put(buffer->gem);
@@ -291,25 +291,31 @@ drm_client_buffer_create(struct drm_client_dev *client, 
u32 width, u32 height, u
 /**
  * drm_client_buffer_vmap - Map DRM client buffer into address space
  * @buffer: DRM client buffer
+ * @map_copy: Returns the mapped memory's address
  *
  * This function maps a client buffer into kernel address space. If the
- * buffer is already mapped, it returns the mapping's address.
+ * buffer is already mapped, it returns the existing mapping's address.
  *
  * Client buffer mappings are not ref'counted. Each call to
  * drm_client_buffer_vmap() should be followed by a call to
  * drm_client_buffer_vunmap(); or the client buffer should be mapped
  * throughout its lifetime.
  *
+ * The returned address is a copy of the internal value. In contrast to
+ * other vmap interfaces, you don't need it for the client's vunmap
+ * function. So you can modify it at will during blit and draw operations.
+ *
  * Returns:
- * The mapped memory's address
+ * 0 on success, or a negative errno code otherwise.
  */
-void *drm_client_buffer_vmap(struct drm_client_buffer *buffer)
+int
+drm_client_buffer_vmap(struct drm_client_buffer *buffer, struct dma_buf_map 
*map_copy)
 {
-   struct dma_buf_map map;
+   struct dma_buf_map *map = >map;
int ret;
 
-   if (buffer->vaddr)
-   return buffer->vaddr;
+   if (dma_buf_map_is_set(map))
+   goto out;
 
/*
 * FIXME: The dependency on GEM here isn't required, we could
@@ -319,13 +325,14 @@ void *drm_client_buffer_vmap(struct drm_client_buffer 
*buffer)
 * fd_install step out of the driver backend hooks, to make that
 * final step optional for internal users.
 */
-   ret = drm_gem_vmap(buffer->gem, );
+   ret = drm_gem_vmap(buffer->gem, map);
if (ret)
-   return ERR_PTR(ret);
+   return ret;
 
-   buffer->vaddr = map.vaddr;
+out:
+   *map_copy = *map;
 
-   return map.vaddr;
+   return 0;
 }
 EXPORT_SYMBOL(drm_client_buffer_vmap);
 
@@ -339,10 +346,9 @@ EXPORT_SYMBOL(drm_client_buffer_vmap);
  */
 void drm_client_buffer_vunmap(struct drm_client_buffer *buffer)
 {
-   struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(buffer->vaddr);
+   struct dma_buf_map *map = >map;
 
-   drm_gem_vunmap(buffer->gem, );
-   buffer->vaddr = NULL;
+   drm_gem_vunmap(buffer->gem, map);
 }
 EXPORT_SYMBOL(drm_client_buffer_vunmap);
 
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 92e0db30fdf7..a0d88130fedb 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -378,7 +378,7 @@ static void drm_fb_helper_dirty_blit_real(struct 
drm_fb_helper *fb_helper,
unsigned int cpp = fb->format->cpp[0];
size_t offset = clip->y1 * fb->pitches[0] + clip->x1 * cpp;
void *src = fb_helper->fbdev->screen_buffer + offset;
-   void *dst = fb_helper->buffer->vaddr + offset;
+   void *dst = fb_helper->buffer->map.vaddr + offset;
size_t len = (clip->x2 - clip->x1) * cpp;
unsigned int y;
 
@@ -400,7 +400,8 @@ static void drm_fb_helper_dirty_work(struct work_struct 
*work)
struct drm_clip_rect *clip = >dirty_clip;
struct drm_clip_rect clip_copy;
unsigned long flags;
-   void *vaddr;
+   struct dma_buf_map map;
+   int ret;
 
spin_lock_irqsave(>dirty_lock, flags);
clip_copy = *clip;
@@ -413,8 +414,8 @@ static void drm_fb_helper_dirty_work(struct work_struct 
*work)
 
/* Generic fbdev uses a shadow buffer */
if (helper->buffer) {
-   vaddr = drm_client_buffer_vmap(helper->buffer);
-

[PATCH v7 02/10] drm/cma-helper: Remove empty drm_gem_cma_prime_vunmap()

2020-11-03 Thread Thomas Zimmermann
The function drm_gem_cma_prime_vunmap() is empty. Remove it before
changing the interface to use struct drm_buf_map.

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Christian König 
Tested-by: Sam Ravnborg 
---
 drivers/gpu/drm/drm_gem_cma_helper.c | 17 -
 drivers/gpu/drm/vc4/vc4_bo.c |  1 -
 include/drm/drm_gem_cma_helper.h |  1 -
 3 files changed, 19 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c 
b/drivers/gpu/drm/drm_gem_cma_helper.c
index 2165633c9b9e..d527485ea0b7 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -537,23 +537,6 @@ void *drm_gem_cma_prime_vmap(struct drm_gem_object *obj)
 }
 EXPORT_SYMBOL_GPL(drm_gem_cma_prime_vmap);
 
-/**
- * drm_gem_cma_prime_vunmap - unmap a CMA GEM object from the kernel's virtual
- * address space
- * @obj: GEM object
- * @vaddr: kernel virtual address where the CMA GEM object was mapped
- *
- * This function removes a buffer exported via DRM PRIME from the kernel's
- * virtual address space. This is a no-op because CMA buffers cannot be
- * unmapped from kernel space. Drivers using the CMA helpers should set this
- * as their _gem_object_funcs.vunmap callback.
- */
-void drm_gem_cma_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
-{
-   /* Nothing to do */
-}
-EXPORT_SYMBOL_GPL(drm_gem_cma_prime_vunmap);
-
 static const struct drm_gem_object_funcs drm_gem_cma_default_funcs = {
.free = drm_gem_cma_free_object,
.print_info = drm_gem_cma_print_info,
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index f432278173cd..557f0d1e6437 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -387,7 +387,6 @@ static const struct drm_gem_object_funcs 
vc4_gem_object_funcs = {
.export = vc4_prime_export,
.get_sg_table = drm_gem_cma_prime_get_sg_table,
.vmap = vc4_prime_vmap,
-   .vunmap = drm_gem_cma_prime_vunmap,
.vm_ops = _vm_ops,
 };
 
diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h
index 2bfa2502607a..a064b0d1c480 100644
--- a/include/drm/drm_gem_cma_helper.h
+++ b/include/drm/drm_gem_cma_helper.h
@@ -104,7 +104,6 @@ drm_gem_cma_prime_import_sg_table(struct drm_device *dev,
 int drm_gem_cma_prime_mmap(struct drm_gem_object *obj,
   struct vm_area_struct *vma);
 void *drm_gem_cma_prime_vmap(struct drm_gem_object *obj);
-void drm_gem_cma_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 
 struct drm_gem_object *
 drm_gem_cma_create_object_default_funcs(struct drm_device *dev, size_t size);
-- 
2.29.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v7 03/10] drm/etnaviv: Remove empty etnaviv_gem_prime_vunmap()

2020-11-03 Thread Thomas Zimmermann
The function etnaviv_gem_prime_vunmap() is empty. Remove it before
changing the interface to use struct drm_buf_map.

Signed-off-by: Thomas Zimmermann 
Acked-by: Christian König 
Tested-by: Sam Ravnborg 
---
 drivers/gpu/drm/etnaviv/etnaviv_drv.h   | 1 -
 drivers/gpu/drm/etnaviv/etnaviv_gem.c   | 1 -
 drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 5 -
 3 files changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h 
b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index 914f0867ff71..9682c26d89bb 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -52,7 +52,6 @@ int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct 
*vma);
 int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset);
 struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj);
 void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj);
-void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 int etnaviv_gem_prime_mmap(struct drm_gem_object *obj,
   struct vm_area_struct *vma);
 struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device 
*dev,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c 
b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 67d9a2b9ea6a..bbd235473645 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -571,7 +571,6 @@ static const struct drm_gem_object_funcs 
etnaviv_gem_object_funcs = {
.unpin = etnaviv_gem_prime_unpin,
.get_sg_table = etnaviv_gem_prime_get_sg_table,
.vmap = etnaviv_gem_prime_vmap,
-   .vunmap = etnaviv_gem_prime_vunmap,
.vm_ops = _ops,
 };
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c 
b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index 135fbff6fecf..a6d9932a32ae 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -27,11 +27,6 @@ void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
return etnaviv_gem_vmap(obj);
 }
 
-void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
-{
-   /* TODO msm_gem_vunmap() */
-}
-
 int etnaviv_gem_prime_mmap(struct drm_gem_object *obj,
   struct vm_area_struct *vma)
 {
-- 
2.29.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v7 04/10] drm/exynos: Remove empty exynos_drm_gem_prime_{vmap, vunmap}()

2020-11-03 Thread Thomas Zimmermann
The functions exynos_drm_gem_prime_{vmap,vunmap}() are empty. Remove
them before changing the interface to use struct drm_buf_map. As a side
effect of removing drm_gem_prime_vmap(), the error code changes from
ENOMEM to EOPNOTSUPP.

Signed-off-by: Thomas Zimmermann 
Acked-by: Christian König 
Tested-by: Sam Ravnborg 
---
 drivers/gpu/drm/exynos/exynos_drm_gem.c | 12 
 drivers/gpu/drm/exynos/exynos_drm_gem.h |  2 --
 2 files changed, 14 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c 
b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 4afbf5109cbf..4396224227d1 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -135,8 +135,6 @@ static const struct vm_operations_struct 
exynos_drm_gem_vm_ops = {
 static const struct drm_gem_object_funcs exynos_drm_gem_object_funcs = {
.free = exynos_drm_gem_free_object,
.get_sg_table = exynos_drm_gem_prime_get_sg_table,
-   .vmap = exynos_drm_gem_prime_vmap,
-   .vunmap = exynos_drm_gem_prime_vunmap,
.vm_ops = _drm_gem_vm_ops,
 };
 
@@ -469,16 +467,6 @@ exynos_drm_gem_prime_import_sg_table(struct drm_device 
*dev,
return _gem->base;
 }
 
-void *exynos_drm_gem_prime_vmap(struct drm_gem_object *obj)
-{
-   return NULL;
-}
-
-void exynos_drm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
-{
-   /* Nothing to do */
-}
-
 int exynos_drm_gem_prime_mmap(struct drm_gem_object *obj,
  struct vm_area_struct *vma)
 {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h 
b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 74e926abeff0..a23272fb96fb 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -107,8 +107,6 @@ struct drm_gem_object *
 exynos_drm_gem_prime_import_sg_table(struct drm_device *dev,
 struct dma_buf_attachment *attach,
 struct sg_table *sgt);
-void *exynos_drm_gem_prime_vmap(struct drm_gem_object *obj);
-void exynos_drm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 int exynos_drm_gem_prime_mmap(struct drm_gem_object *obj,
  struct vm_area_struct *vma);
 
-- 
2.29.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v7 00/10] Support GEM object mappings from I/O memory

2020-11-03 Thread Thomas Zimmermann
DRM's fbdev console uses regular load and store operations to update
framebuffer memory. The bochs driver on sparc64 requires the use of
I/O-specific load and store operations. We have a workaround, but need
a long-term solution to the problem.

This patchset changes GEM's vmap/vunmap interfaces to forward pointers
of type struct dma_buf_map and updates the generic fbdev emulation to
use them correctly. This enables I/O-memory operations on all framebuffers
that require and support them.

Patches #1 to #4 prepare VRAM helpers and drivers.

Next is the update of the GEM vmap functions. Patch #5 adds vmap and vunmap
that is usable with TTM-based GEM drivers, and patch #6 updates GEM's
vmap/vunmap callback to forward instances of type struct dma_buf_map. While
the patch touches many files throughout the DRM modules, the applied changes
are mostly trivial interface fixes. Several TTM-based GEM drivers now use
the new vmap code. Patch #7 updates GEM's internal vmap/vunmap functions to
forward struct dma_buf_map.

With struct dma_buf_map propagated through the layers, patches #8 to #10
convert DRM clients and generic fbdev emulation to use it. Updating the
fbdev framebuffer will select the correct functions, either for system or
I/O memory.

There is also a set of IGT testcases for fbdev at [1]. Reading and writting
fbdev device files has several corner cases near the EOF that the tests cover
as well. The original fbdev code has different semantics with the different
implementations (sys, cfb). Patch #10 and the testcases intend to harmonize
the behaviour and serve as a reference.

v7:
* return number of read/written bytes in fbdev code; if any
* init QXL cursor from BO buffer (kernel test robot)
* use min_t(size_t,) (kernel test robot)
v6:
* don't call page_to_phys() on fbdev framebuffers in I/O memory;
  warn instead (Daniel)
v5:
* rebase onto latest TTM changes (Christian)
* support TTM premapped memory correctly (Christian)
* implement fb_read/fb_write internally (Sam, Daniel)
* cleanups
v4:
* provide TTM vmap/vunmap plus GEM helpers and convert drivers
  over (Christian, Daniel)
* remove several empty functions
* more TODOs and documentation (Daniel)
v3:
* recreate the whole patchset on top of struct dma_buf_map
v2:
* RFC patchset

[1] https://gitlab.freedesktop.org/tzimmermann/igt-gpu-tools/-/merge_requests/1

Thomas Zimmermann (10):
  drm/vram-helper: Remove invariant parameters from internal kmap
function
  drm/cma-helper: Remove empty drm_gem_cma_prime_vunmap()
  drm/etnaviv: Remove empty etnaviv_gem_prime_vunmap()
  drm/exynos: Remove empty exynos_drm_gem_prime_{vmap,vunmap}()
  drm/ttm: Add vmap/vunmap to TTM and TTM GEM helpers
  drm/gem: Use struct dma_buf_map in GEM vmap ops and convert GEM
backends
  drm/gem: Update internal GEM vmap/vunmap interfaces to use struct
dma_buf_map
  drm/gem: Store client buffer mappings as struct dma_buf_map
  dma-buf-map: Add memcpy and pointer-increment interfaces
  drm/fb_helper: Support framebuffers in I/O memory

 Documentation/gpu/todo.rst  |  37 ++-
 drivers/gpu/drm/Kconfig |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c |  36 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |   1 -
 drivers/gpu/drm/ast/ast_cursor.c|  27 +--
 drivers/gpu/drm/ast/ast_drv.h   |   7 +-
 drivers/gpu/drm/bochs/bochs_kms.c   |   1 -
 drivers/gpu/drm/drm_client.c|  38 +--
 drivers/gpu/drm/drm_fb_helper.c | 250 ++--
 drivers/gpu/drm/drm_gem.c   |  29 ++-
 drivers/gpu/drm/drm_gem_cma_helper.c|  27 +--
 drivers/gpu/drm/drm_gem_shmem_helper.c  |  48 ++--
 drivers/gpu/drm/drm_gem_ttm_helper.c|  38 +++
 drivers/gpu/drm/drm_gem_vram_helper.c   | 117 +
 drivers/gpu/drm/drm_internal.h  |   5 +-
 drivers/gpu/drm/drm_prime.c |  14 +-
 drivers/gpu/drm/etnaviv/etnaviv_drv.h   |   3 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem.c   |   1 -
 drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c |  12 +-
 drivers/gpu/drm/exynos/exynos_drm_gem.c |  12 -
 drivers/gpu/drm/exynos/exynos_drm_gem.h |   2 -
 drivers/gpu/drm/lima/lima_gem.c |   6 +-
 drivers/gpu/drm/lima/lima_sched.c   |  11 +-
 drivers/gpu/drm/mgag200/mgag200_mode.c  |  10 +-
 drivers/gpu/drm/nouveau/Kconfig |   1 +
 drivers/gpu/drm/nouveau/nouveau_bo.h|   2 -
 drivers/gpu/drm/nouveau/nouveau_gem.c   |   6 +-
 drivers/gpu/drm/nouveau/nouveau_gem.h   |   2 -
 drivers/gpu/drm/nouveau/nouveau_prime.c |  20 --
 drivers/gpu/drm/panfrost/panfrost_perfcnt.c |  14 +-
 drivers/gpu/drm/qxl/qxl_display.c   |  15 +-
 drivers/gpu/drm/qxl/qxl_draw.c  |  

[PATCH v7 07/10] drm/gem: Update internal GEM vmap/vunmap interfaces to use struct dma_buf_map

2020-11-03 Thread Thomas Zimmermann
GEM's vmap and vunmap interfaces now wrap memory pointers in struct
dma_buf_map.

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Daniel Vetter 
Tested-by: Sam Ravnborg 
---
 drivers/gpu/drm/drm_client.c   | 18 +++---
 drivers/gpu/drm/drm_gem.c  | 26 +-
 drivers/gpu/drm/drm_internal.h |  5 +++--
 drivers/gpu/drm/drm_prime.c| 14 --
 4 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index 495f47d23d87..ac0082bed966 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -3,6 +3,7 @@
  * Copyright 2018 Noralf Trønnes
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -304,7 +305,8 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 
width, u32 height, u
  */
 void *drm_client_buffer_vmap(struct drm_client_buffer *buffer)
 {
-   void *vaddr;
+   struct dma_buf_map map;
+   int ret;
 
if (buffer->vaddr)
return buffer->vaddr;
@@ -317,13 +319,13 @@ void *drm_client_buffer_vmap(struct drm_client_buffer 
*buffer)
 * fd_install step out of the driver backend hooks, to make that
 * final step optional for internal users.
 */
-   vaddr = drm_gem_vmap(buffer->gem);
-   if (IS_ERR(vaddr))
-   return vaddr;
+   ret = drm_gem_vmap(buffer->gem, );
+   if (ret)
+   return ERR_PTR(ret);
 
-   buffer->vaddr = vaddr;
+   buffer->vaddr = map.vaddr;
 
-   return vaddr;
+   return map.vaddr;
 }
 EXPORT_SYMBOL(drm_client_buffer_vmap);
 
@@ -337,7 +339,9 @@ EXPORT_SYMBOL(drm_client_buffer_vmap);
  */
 void drm_client_buffer_vunmap(struct drm_client_buffer *buffer)
 {
-   drm_gem_vunmap(buffer->gem, buffer->vaddr);
+   struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(buffer->vaddr);
+
+   drm_gem_vunmap(buffer->gem, );
buffer->vaddr = NULL;
 }
 EXPORT_SYMBOL(drm_client_buffer_vunmap);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 4231fda26e70..eb2d23e04be9 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1206,32 +1206,32 @@ void drm_gem_unpin(struct drm_gem_object *obj)
obj->funcs->unpin(obj);
 }
 
-void *drm_gem_vmap(struct drm_gem_object *obj)
+int drm_gem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
 {
-   struct dma_buf_map map;
int ret;
 
if (!obj->funcs->vmap)
-   return ERR_PTR(-EOPNOTSUPP);
+   return -EOPNOTSUPP;
 
-   ret = obj->funcs->vmap(obj, );
+   ret = obj->funcs->vmap(obj, map);
if (ret)
-   return ERR_PTR(ret);
-   else if (dma_buf_map_is_null())
-   return ERR_PTR(-ENOMEM);
+   return ret;
+   else if (dma_buf_map_is_null(map))
+   return -ENOMEM;
 
-   return map.vaddr;
+   return 0;
 }
 
-void drm_gem_vunmap(struct drm_gem_object *obj, void *vaddr)
+void drm_gem_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map)
 {
-   struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(vaddr);
-
-   if (!vaddr)
+   if (dma_buf_map_is_null(map))
return;
 
if (obj->funcs->vunmap)
-   obj->funcs->vunmap(obj, );
+   obj->funcs->vunmap(obj, map);
+
+   /* Always set the mapping to NULL. Callers may rely on this. */
+   dma_buf_map_clear(map);
 }
 
 /**
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 2bdac3557765..81d386b5b92a 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -33,6 +33,7 @@
 
 struct dentry;
 struct dma_buf;
+struct dma_buf_map;
 struct drm_connector;
 struct drm_crtc;
 struct drm_framebuffer;
@@ -187,8 +188,8 @@ void drm_gem_print_info(struct drm_printer *p, unsigned int 
indent,
 
 int drm_gem_pin(struct drm_gem_object *obj);
 void drm_gem_unpin(struct drm_gem_object *obj);
-void *drm_gem_vmap(struct drm_gem_object *obj);
-void drm_gem_vunmap(struct drm_gem_object *obj, void *vaddr);
+int drm_gem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map);
+void drm_gem_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map);
 
 /* drm_debugfs.c drm_debugfs_crc.c */
 #if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 187b55ede62e..302e2bb3dfff 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -667,21 +667,15 @@ EXPORT_SYMBOL(drm_gem_unmap_dma_buf);
  *
  * Sets up a kernel virtual mapping. This can be used as the _buf_ops.vmap
  * callback. Calls into _gem_object_funcs.vmap for device specific 
handling.
+ * The kernel virtual address is returned in map.
  *
- * Returns the kernel virtual address or NULL on failure.
+ * Returns 0 on success or a negative errno code otherwise.
  */
 int drm_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map *map)
 {
struct drm_gem_object *obj 

Re: [PATCH] vhost/vsock: add IOTLB API support

2020-11-03 Thread Jason Wang


On 2020/11/3 上午1:11, Stefano Garzarella wrote:

On Fri, Oct 30, 2020 at 07:44:43PM +0800, Jason Wang wrote:


On 2020/10/30 下午6:54, Stefano Garzarella wrote:

On Fri, Oct 30, 2020 at 06:02:18PM +0800, Jason Wang wrote:


On 2020/10/30 上午1:43, Stefano Garzarella wrote:

This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.

These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
  device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
  VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
  metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
  chardev; they are used by the userspace to exchange IOTLB messages

This patch was tested with QEMU and a patch applied [1] to fix a
simple issue:
    $ qemu -M q35,accel=kvm,kernel-irqchip=split \
   -drive file=fedora.qcow2,format=qcow2,if=virtio \
   -device intel-iommu,intremap=on \
   -device vhost-vsock-pci,guest-cid=3,iommu_platform=on



Patch looks good, but a question:

It looks to me you don't enable ATS which means vhost won't get any 
invalidation request or did I miss anything?




You're right, I didn't see invalidation requests, only miss and 
updates.
Now I have tried to enable 'ats' and 'device-iotlb' but I still 
don't see any invalidation.


How can I test it? (Sorry but I don't have much experience yet with 
vIOMMU)



I guess it's because the batched unmap. Maybe you can try to use 
"intel_iommu=strict" in guest kernel command line to see if it works.


Btw, make sure the qemu contains the patch [1]. Otherwise ATS won't 
be enabled for recent Linux Kernel in the guest.


The problem was my kernel, it was built with a tiny configuration.
Using fedora stock kernel I can see the 'invalidate' requests, but I 
also had the following issues.


Do they make you ring any bells?

$ ./qemu -m 4G -smp 4 -M q35,accel=kvm,kernel-irqchip=split \
    -drive file=fedora.qcow2,format=qcow2,if=virtio \
    -device intel-iommu,intremap=on,device-iotlb=on \
    -device vhost-vsock-pci,guest-cid=6,iommu_platform=on,ats=on,id=v1

    qemu-system-x86_64: vtd_iova_to_slpte: detected IOVA overflow     
(iova=0x1d4030c0)



It's a hint that IOVA exceeds the AW. It might be worth to check whether 
the missed IOVA reported from IOTLB is legal.


Thanks


qemu-system-x86_64: vtd_iommu_translate: detected translation failure 
(dev=00:03:00, iova=0x1d4030c0)
    qemu-system-x86_64: New fault is not recorded due to compression 
of     faults


Guest kernel messages:
    [   44.940872] DMAR: DRHD: handling fault status reg 2
    [   44.941989] DMAR: [DMA Read] Request device [00:03.0] PASID     
 fault addr 88W

    [   49.785884] DMAR: DRHD: handling fault status reg 2
    [   49.788874] DMAR: [DMA Read] Request device [00:03.0] PASID     
 fault addr 88W



QEMU: b149dea55c Merge remote-tracking branch 
'remotes/cschoenebeck/tags/pull-9p-20201102' into staging


Linux guest: 5.8.16-200.fc32.x86_64


Thanks,
Stefano



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 1/2] Revert "vhost-vdpa: fix page pinning leakage in error path"

2020-11-03 Thread Jason Wang


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

This reverts commit 7ed9e3d97c32d969caded2dfb6e67c1a2cc5a0b1.

Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 119 +--
  1 file changed, 48 insertions(+), 71 deletions(-)



I saw this has been reverted there 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/drivers/vhost?id=5e1a3149eec8675c2767cc465903f5e4829de5b0.


:)

Thanks




diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index a2dbc85..b6d9016 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -588,19 +588,21 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
struct vhost_dev *dev = >vdev;
struct vhost_iotlb *iotlb = dev->iotlb;
struct page **page_list;
-   struct vm_area_struct **vmas;
+   unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
unsigned int gup_flags = FOLL_LONGTERM;
-   unsigned long map_pfn, last_pfn = 0;
-   unsigned long npages, lock_limit;
-   unsigned long i, nmap = 0;
+   unsigned long npages, cur_base, map_pfn, last_pfn = 0;
+   unsigned long locked, lock_limit, pinned, i;
u64 iova = msg->iova;
-   long pinned;
int ret = 0;
  
  	if (vhost_iotlb_itree_first(iotlb, msg->iova,

msg->iova + msg->size - 1))
return -EEXIST;
  
+	page_list = (struct page **) __get_free_page(GFP_KERNEL);

+   if (!page_list)
+   return -ENOMEM;
+
if (msg->perm & VHOST_ACCESS_WO)
gup_flags |= FOLL_WRITE;
  
@@ -608,86 +610,61 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

if (!npages)
return -EINVAL;
  
-	page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);

-   vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
- GFP_KERNEL);
-   if (!page_list || !vmas) {
-   ret = -ENOMEM;
-   goto free;
-   }
-
mmap_read_lock(dev->mm);
  
+	locked = atomic64_add_return(npages, >mm->pinned_vm);

lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-   if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
-   ret = -ENOMEM;
-   goto unlock;
-   }
  
-	pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,

-   page_list, vmas);
-   if (npages != pinned) {
-   if (pinned < 0) {
-   ret = pinned;
-   } else {
-   unpin_user_pages(page_list, pinned);
-   ret = -ENOMEM;
-   }
-   goto unlock;
+   if (locked > lock_limit) {
+   ret = -ENOMEM;
+   goto out;
}
  
+	cur_base = msg->uaddr & PAGE_MASK;

iova &= PAGE_MASK;
-   map_pfn = page_to_pfn(page_list[0]);
-
-   /* One more iteration to avoid extra vdpa_map() call out of loop. */
-   for (i = 0; i <= npages; i++) {
-   unsigned long this_pfn;
-   u64 csize;
-
-   /* The last chunk may have no valid PFN next to it */
-   this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;
-
-   if (last_pfn && (this_pfn == -1UL ||
-this_pfn != last_pfn + 1)) {
-   /* Pin a contiguous chunk of memory */
-   csize = last_pfn - map_pfn + 1;
-   ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
-map_pfn << PAGE_SHIFT,
-msg->perm);
-   if (ret) {
-   /*
-* Unpin the rest chunks of memory on the
-* flight with no corresponding vdpa_map()
-* calls having been made yet. On the other
-* hand, vdpa_unmap() in the failure path
-* is in charge of accounting the number of
-* pinned pages for its own.
-* This asymmetrical pattern of accounting
-* is for efficiency to pin all pages at
-* once, while there is no other callsite
-* of vdpa_map() than here above.
-*/
-   unpin_user_pages(_list[nmap],
-npages - nmap);
-   goto out;
+
+   while (npages) {
+   pinned = min_t(unsigned long, npages, list_size);
+   ret = pin_user_pages(cur_base, pinned,
+gup_flags, page_list, NULL);
+   if (ret != pinned)
+