On Tue, 2024-01-30 at 17:07 -0800, Charlie Jenkins wrote:
> On riscv it is guaranteed that the address returned by mmap is less
> than
> the hint address. Allow mmap to return an address all the way up to
> addr, if provided, rather than just up to the lower address space.
> 
> This provides a performance benefit as well, allowing mmap to exit
> after
> checking that the address is in range rather than searching for a
> valid
> address.
> 
> It is possible to provide an address that uses at most the same
> number
> of bits, however it is significantly more computationally expensive
> to
> provide that number rather than setting the max to be the hint
> address.
> There is the instruction clz/clzw in Zbb that returns the highest set
> bit
> which could be used to performantly implement this, but it would
> still
> be slower than the current implementation. At worst case, half of the
> address would not be able to be allocated when a hint address is
> provided.
> 
> Signed-off-by: Charlie Jenkins <[email protected]>
> ---
>  arch/riscv/include/asm/processor.h | 27 +++++++++++----------------
>  1 file changed, 11 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/processor.h
> b/arch/riscv/include/asm/processor.h
> index f19f861cda54..8ece7a8f0e18 100644
> --- a/arch/riscv/include/asm/processor.h
> +++ b/arch/riscv/include/asm/processor.h
> @@ -14,22 +14,16 @@
>  
>  #include <asm/ptrace.h>
>  
> -#ifdef CONFIG_64BIT
> -#define DEFAULT_MAP_WINDOW   (UL(1) << (MMAP_VA_BITS - 1))
> -#define STACK_TOP_MAX                TASK_SIZE_64
> -
>  #define arch_get_mmap_end(addr, len, flags)                  \
>  ({                                                           \
>       unsigned long
> mmap_end;                                     \
>       typeof(addr) _addr = (addr);                            \
> -     if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) &&
> is_compat_task())) \
> +     if ((_addr) == 0 ||                                     \
> +         (IS_ENABLED(CONFIG_COMPAT) && is_compat_task()) ||  \
> +         ((_addr + len) > BIT(VA_BITS -
> 1)))                  \
>               mmap_end = STACK_TOP_MAX;                       \
> -     else if ((_addr) >= VA_USER_SV57)                       \
> -             mmap_end = STACK_TOP_MAX;                       \
> -     else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >=
> VA_BITS_SV48)) \
> -             mmap_end = VA_USER_SV48;                        \
>       else                                                    \
> -             mmap_end = VA_USER_SV39;                        \
> +             mmap_end = (_addr + len);                       \
>       mmap_end;                                               \
>  })
>  
> @@ -39,17 +33,18 @@
>       typeof(addr) _addr = (addr);                            \
>       typeof(base) _base = (base);                            \
>       unsigned long rnd_gap = DEFAULT_MAP_WINDOW - (_base);   \
> -     if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) &&
> is_compat_task())) \
> +     if ((_addr) == 0 ||                                     \
> +         (IS_ENABLED(CONFIG_COMPAT) && is_compat_task()) ||  \
> +         ((_addr + len) > BIT(VA_BITS -
> 1)))                  \
>               mmap_base = (_base);                            \
> -     else if (((_addr) >= VA_USER_SV57) && (VA_BITS >=
> VA_BITS_SV57)) \
> -             mmap_base = VA_USER_SV57 - rnd_gap;             \
> -     else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >=
> VA_BITS_SV48)) \
> -             mmap_base = VA_USER_SV48 - rnd_gap;             \
>       else                                                    \
> -             mmap_base = VA_USER_SV39 - rnd_gap;             \
> +             mmap_base = (_addr + len) - rnd_gap;            \
>       mmap_base;                                              \
>  })
>  
> +#ifdef CONFIG_64BIT
> +#define DEFAULT_MAP_WINDOW   (UL(1) << (MMAP_VA_BITS - 1))
> +#define STACK_TOP_MAX                TASK_SIZE_64
>  #else
>  #define DEFAULT_MAP_WINDOW   TASK_SIZE
>  #define STACK_TOP_MAX                TASK_SIZE
> 

I have carefully tested your patch on qemu with sv57. A bug that needs
to be solved is that mmap with the same hint address without MAP_FIXED
set will fail the second time.

Userspace code to reproduce the bug:

#include <sys/mman.h>
#include <stdio.h>
#include <stdint.h>

void test(char *addr) {
    char *res = mmap(addr, 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS
| MAP_PRIVATE, -1, 0);
    printf("hint %p got %p.\n", addr, res);
}

int main (void) {
    test(1<<30);
    test(1<<30);
    test(1<<30);
    return 0;
}

output:

hint 0x40000000 got 0x40000000.
hint 0x40000000 got 0xffffffffffffffff.
hint 0x40000000 got 0xffffffffffffffff.

output on x86:

hint 0x40000000 got 0x40000000.
hint 0x40000000 got 0x7f9171363000.
hint 0x40000000 got 0x7f9171362000.

It may need to implement a special arch_get_unmapped_area and
arch_get_unmapped_area_topdown function.


Reply via email to