Re: [PATCH 14/19] mm: Add user_landing in mm_struct

2020-11-08 Thread Dmitry Safonov
On 11/8/20 7:04 PM, Andy Lutomirski wrote:
> On Sat, Nov 7, 2020 at 9:18 PM Dmitry Safonov  wrote:
>>
>> Instead of having every architecture to define vdso_base/vdso_addr etc,
>> provide a generic mechanism to track landing in userspace.
>> It'll minimize per-architecture difference, the number of callbacks to
>> provide.
>>
>> Originally, it started from thread [1] where the need for .close()
>> callback on vm_special_mapping was pointed, this generic code besides
>> removing duplicated .mremap() callbacks provides a cheaper way to
>> support munmap() on vdso mappings without introducing .close() callbacks
>> for every architecture (with would bring even more code duplication).
> 
> I find the naming odd.  It's called "user_landing", which is
> presumably a hard-to-understand shorthand for "user mode landing pad
> for return from a signal handler if SA_RESTORER is not set".  But,
> looking at the actual code, it's not this at all -- it's just the vDSO
> base address.

Agree. Originally, I tried to track the actual landing address on the
vdso, but .mremap() seemed simpler when tracking the vma base.

> So how about just calling it vdso_base?  I'm very much in favor of
> consolidating and cleaning up, and improving the vdso remap/unmap
> code, but I'm not convinced that we should call it anything other than
> the vdso base.

Sure.

Thanks,
 Dmitry


Re: [PATCH 14/19] mm: Add user_landing in mm_struct

2020-11-08 Thread Andy Lutomirski
On Sat, Nov 7, 2020 at 9:18 PM Dmitry Safonov  wrote:
>
> Instead of having every architecture to define vdso_base/vdso_addr etc,
> provide a generic mechanism to track landing in userspace.
> It'll minimize per-architecture difference, the number of callbacks to
> provide.
>
> Originally, it started from thread [1] where the need for .close()
> callback on vm_special_mapping was pointed, this generic code besides
> removing duplicated .mremap() callbacks provides a cheaper way to
> support munmap() on vdso mappings without introducing .close() callbacks
> for every architecture (with would bring even more code duplication).

I find the naming odd.  It's called "user_landing", which is
presumably a hard-to-understand shorthand for "user mode landing pad
for return from a signal handler if SA_RESTORER is not set".  But,
looking at the actual code, it's not this at all -- it's just the vDSO
base address.

So how about just calling it vdso_base?  I'm very much in favor of
consolidating and cleaning up, and improving the vdso remap/unmap
code, but I'm not convinced that we should call it anything other than
the vdso base.

--Andy


[PATCH 14/19] mm: Add user_landing in mm_struct

2020-11-07 Thread Dmitry Safonov
Instead of having every architecture to define vdso_base/vdso_addr etc,
provide a generic mechanism to track landing in userspace.
It'll minimize per-architecture difference, the number of callbacks to
provide.

Originally, it started from thread [1] where the need for .close()
callback on vm_special_mapping was pointed, this generic code besides
removing duplicated .mremap() callbacks provides a cheaper way to
support munmap() on vdso mappings without introducing .close() callbacks
for every architecture (with would bring even more code duplication).

[1]: 
https://lore.kernel.org/linux-arch/cajwjo6zanqykshbq+3b+fi_vt80mtrzev5yreqawx-l8j8x...@mail.gmail.com/
Cc: Thomas Bogendoerfer 
Cc: linux-m...@vger.kernel.org
Signed-off-by: Dmitry Safonov 
---
 arch/x86/kernel/cpu/resctrl/pseudo_lock.c |  3 ++-
 fs/aio.c  |  3 ++-
 include/linux/mm.h|  3 ++-
 include/linux/mm_types.h  | 10 ++
 mm/Kconfig|  3 +++
 mm/mmap.c | 19 ++-
 mm/mremap.c   |  2 +-
 7 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c 
b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index e916646adc69..786c97203bf6 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -1458,7 +1458,8 @@ static int pseudo_lock_dev_release(struct inode *inode, 
struct file *filp)
return 0;
 }
 
-static int pseudo_lock_dev_mremap(struct vm_area_struct *area, unsigned long 
flags)
+static int pseudo_lock_dev_mremap(struct vm_area_struct *old_vma,
+   struct vm_area_struct *new_vma, unsigned long flags)
 {
/* Not supported */
return -EINVAL;
diff --git a/fs/aio.c b/fs/aio.c
index d1dad4cd860f..2695dc9ed46f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -324,7 +324,8 @@ static void aio_free_ring(struct kioctx *ctx)
}
 }
 
-static int aio_ring_mremap(struct vm_area_struct *vma, unsigned long flags)
+static int aio_ring_mremap(struct vm_area_struct *old_vma,
+  struct vm_area_struct *vma, unsigned long flags)
 {
struct file *file = vma->vm_file;
struct mm_struct *mm = vma->vm_mm;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 427911d2c83e..4b0f97a289b3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -559,7 +559,8 @@ struct vm_operations_struct {
void (*close)(struct vm_area_struct * area);
/* Called any time before splitting to check if it's allowed */
int (*may_split)(struct vm_area_struct *area, unsigned long addr);
-   int (*mremap)(struct vm_area_struct *area, unsigned long flags);
+   int (*mremap)(struct vm_area_struct *old_vma,
+   struct vm_area_struct *new_vma, unsigned long flags);
vm_fault_t (*fault)(struct vm_fault *vmf);
vm_fault_t (*huge_fault)(struct vm_fault *vmf,
enum page_entry_size pe_size);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b035caff6abe..f888257e973a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -488,6 +488,16 @@ struct mm_struct {
 
/* Architecture-specific MM context */
mm_context_t context;
+#ifdef CONFIG_ARCH_HAS_USER_LANDING
+   /*
+* Address of special mapping VMA to land after processing
+* a signal. Reads are unprotected: if a thread unmaps or
+* mremaps the mapping while another thread is processing
+* a signal, it can segfault while landing.
+*/
+   void __user *user_landing;
+#endif
+#define UNMAPPED_USER_LANDING TASK_SIZE_MAX
 
unsigned long flags; /* Must use atomic bitops to access */
 
diff --git a/mm/Kconfig b/mm/Kconfig
index 01b0ae0cd9d3..d43b61a21be8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -883,4 +883,7 @@ config ARCH_HAS_HUGEPD
 config MAPPING_DIRTY_HELPERS
 bool
 
+config ARCH_HAS_USER_LANDING
+   bool
+
 endmenu
diff --git a/mm/mmap.c b/mm/mmap.c
index 2376f3972f13..8a17ffdedacb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3410,11 +3410,25 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t 
flags, long npages)
 
 static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
 
+static void update_user_landing(struct vm_area_struct *old_vma,
+   unsigned long new_addr)
+{
+#ifdef CONFIG_ARCH_HAS_USER_LANDING
+   struct mm_struct *mm = old_vma->vm_mm;
+
+   if (WARN_ON_ONCE(!mm))
+   return;
+   if (old_vma->vm_start == (unsigned long)mm->user_landing)
+   mm->user_landing = (void __user *)new_addr;
+#endif
+}
+
 /*
  * Having a close hook prevents vma merging regardless of flags.
  */
 static void special_mapping_close(struct vm_area_struct *vma)
 {
+