Re: [PATCH v3 1/3] riscv: Move kernel mapping to vmalloc zone

2020-05-28 Thread Alex Ghiti

Hi Zong,

Le 5/27/20 à 3:29 AM, Alex Ghiti a écrit :

Le 5/27/20 à 2:05 AM, Zong Li a écrit :

On Wed, May 27, 2020 at 1:06 AM Alex Ghiti  wrote:

Hi Zong,

Le 5/26/20 à 5:43 AM, Zong Li a écrit :

On Sun, May 24, 2020 at 4:54 PM Alexandre Ghiti  wrote:

This is a preparatory patch for relocatable kernel.

The kernel used to be linked at PAGE_OFFSET address and used to be 
loaded
physically at the beginning of the main memory. Therefore, we 
could use

the linear mapping for the kernel mapping.

But the relocated kernel base address will be different from 
PAGE_OFFSET
and since in the linear mapping, two different virtual addresses 
cannot
point to the same physical address, the kernel mapping needs to 
lie outside

the linear mapping.

In addition, because modules and BPF must be close to the kernel 
(inside
+-2GB window), the kernel is placed at the end of the vmalloc zone 
minus

2GB, which leaves room for modules and BPF. The kernel could not be
placed at the beginning of the vmalloc zone since other vmalloc
allocations from the kernel could get all the +-2GB window around the
kernel which would prevent new modules and BPF programs to be loaded.

Signed-off-by: Alexandre Ghiti 
---
   arch/riscv/boot/loader.lds.S |  3 +-
   arch/riscv/include/asm/page.h    | 10 +-
   arch/riscv/include/asm/pgtable.h | 37 +---
   arch/riscv/kernel/head.S |  3 +-
   arch/riscv/kernel/module.c   |  4 +--
   arch/riscv/kernel/vmlinux.lds.S  |  3 +-
   arch/riscv/mm/init.c | 58 
+---

   arch/riscv/mm/physaddr.c |  2 +-
   8 files changed, 87 insertions(+), 33 deletions(-)

diff --git a/arch/riscv/boot/loader.lds.S 
b/arch/riscv/boot/loader.lds.S

index 47a5003c2e28..62d94696a19c 100644
--- a/arch/riscv/boot/loader.lds.S
+++ b/arch/riscv/boot/loader.lds.S
@@ -1,13 +1,14 @@
   /* SPDX-License-Identifier: GPL-2.0 */

   #include 
+#include 

   OUTPUT_ARCH(riscv)
   ENTRY(_start)

   SECTIONS
   {
-   . = PAGE_OFFSET;
+   . = KERNEL_LINK_ADDR;

  .payload : {
  *(.payload)
diff --git a/arch/riscv/include/asm/page.h 
b/arch/riscv/include/asm/page.h

index 2d50f76efe48..48bb09b6a9b7 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -90,18 +90,26 @@ typedef struct page *pgtable_t;

   #ifdef CONFIG_MMU
   extern unsigned long va_pa_offset;
+extern unsigned long va_kernel_pa_offset;
   extern unsigned long pfn_base;
   #define ARCH_PFN_OFFSET    (pfn_base)
   #else
   #define va_pa_offset   0
+#define va_kernel_pa_offset    0
   #define ARCH_PFN_OFFSET    (PAGE_OFFSET >> PAGE_SHIFT)
   #endif /* CONFIG_MMU */

   extern unsigned long max_low_pfn;
   extern unsigned long min_low_pfn;
+extern unsigned long kernel_virt_addr;

   #define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) + 
va_pa_offset))

-#define __va_to_pa_nodebug(x)  ((unsigned long)(x) - va_pa_offset)
+#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - 
va_pa_offset)

+#define kernel_mapping_va_to_pa(x) \
+   ((unsigned long)(x) - va_kernel_pa_offset)
+#define __va_to_pa_nodebug(x)  \
+   (((x) >= PAGE_OFFSET) ? \
+   linear_mapping_va_to_pa(x) : 
kernel_mapping_va_to_pa(x))


   #ifdef CONFIG_DEBUG_VIRTUAL
   extern phys_addr_t __virt_to_phys(unsigned long x);
diff --git a/arch/riscv/include/asm/pgtable.h 
b/arch/riscv/include/asm/pgtable.h

index 35b60035b6b0..25213cfaf680 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -11,23 +11,29 @@

   #include 

-#ifndef __ASSEMBLY__
-
-/* Page Upper Directory not used in RISC-V */
-#include 
-#include 
-#include 
-#include 
-
-#ifdef CONFIG_MMU
+#ifndef CONFIG_MMU
+#define KERNEL_VIRT_ADDR   PAGE_OFFSET
+#define KERNEL_LINK_ADDR   PAGE_OFFSET
+#else
+/*
+ * Leave 2GB for modules and BPF that must lie within a 2GB range 
around

+ * the kernel.
+ */
+#define KERNEL_VIRT_ADDR   (VMALLOC_END - SZ_2G + 1)
+#define KERNEL_LINK_ADDR   KERNEL_VIRT_ADDR

   #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
   #define VMALLOC_END  (PAGE_OFFSET - 1)
   #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)

   #define BPF_JIT_REGION_SIZE    (SZ_128M)
-#define BPF_JIT_REGION_START   (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
-#define BPF_JIT_REGION_END (VMALLOC_END)
+#define BPF_JIT_REGION_START   (kernel_virt_addr)
+#define BPF_JIT_REGION_END (kernel_virt_addr + 
BPF_JIT_REGION_SIZE)

It seems to have a potential risk here, the region of bpf is
overlapping with kernel mapping, so if kernel size is bigger than
128MB, bpf region would be occupied and run out by kernel mapping.

Is there the risk as I mentioned?



Sorry I forgot to answer this one: I was confident that 128MB was 
large enough for kernel
and BPF. But I see no reason to leave this risk so I'll change 
kernel_virt_addr for _end so

BPF will have its 128MB reserved.

Thanks !

Alex



Re: [PATCH v3 1/3] riscv: Move kernel mapping to vmalloc zone

2020-05-27 Thread kbuild test robot
Hi Alexandre,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on powerpc/next]
[also build test WARNING on linus/master v5.7-rc7 next-20200526]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Alexandre-Ghiti/vmalloc-kernel-mapping-and-relocatable-kernel/20200524-170109
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: riscv-allyesconfig (attached as .config)
compiler: riscv64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross 
ARCH=riscv 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All warnings (new ones prefixed by >>, old ones prefixed by <<):

>> arch/riscv/mm/init.c:383:6: warning: no previous prototype for 
>> 'create_kernel_page_table' [-Wmissing-prototypes]
383 | void create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size)
|  ^~~~

vim +/create_kernel_page_table +383 arch/riscv/mm/init.c

   382  
 > 383  void create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size)
   384  {
   385  uintptr_t va, end_va;
   386  
   387  end_va = kernel_virt_addr + load_sz;
   388  for (va = kernel_virt_addr; va < end_va; va += map_size)
   389  create_pgd_mapping(pgdir, va,
   390 load_pa + (va - kernel_virt_addr),
   391 map_size, PAGE_KERNEL_EXEC);
   392  }
   393  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: [PATCH v3 1/3] riscv: Move kernel mapping to vmalloc zone

2020-05-27 Thread Alex Ghiti

Le 5/27/20 à 2:05 AM, Zong Li a écrit :

On Wed, May 27, 2020 at 1:06 AM Alex Ghiti  wrote:

Hi Zong,

Le 5/26/20 à 5:43 AM, Zong Li a écrit :

On Sun, May 24, 2020 at 4:54 PM Alexandre Ghiti  wrote:

This is a preparatory patch for relocatable kernel.

The kernel used to be linked at PAGE_OFFSET address and used to be loaded
physically at the beginning of the main memory. Therefore, we could use
the linear mapping for the kernel mapping.

But the relocated kernel base address will be different from PAGE_OFFSET
and since in the linear mapping, two different virtual addresses cannot
point to the same physical address, the kernel mapping needs to lie outside
the linear mapping.

In addition, because modules and BPF must be close to the kernel (inside
+-2GB window), the kernel is placed at the end of the vmalloc zone minus
2GB, which leaves room for modules and BPF. The kernel could not be
placed at the beginning of the vmalloc zone since other vmalloc
allocations from the kernel could get all the +-2GB window around the
kernel which would prevent new modules and BPF programs to be loaded.

Signed-off-by: Alexandre Ghiti 
---
   arch/riscv/boot/loader.lds.S |  3 +-
   arch/riscv/include/asm/page.h| 10 +-
   arch/riscv/include/asm/pgtable.h | 37 +---
   arch/riscv/kernel/head.S |  3 +-
   arch/riscv/kernel/module.c   |  4 +--
   arch/riscv/kernel/vmlinux.lds.S  |  3 +-
   arch/riscv/mm/init.c | 58 +---
   arch/riscv/mm/physaddr.c |  2 +-
   8 files changed, 87 insertions(+), 33 deletions(-)

diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
index 47a5003c2e28..62d94696a19c 100644
--- a/arch/riscv/boot/loader.lds.S
+++ b/arch/riscv/boot/loader.lds.S
@@ -1,13 +1,14 @@
   /* SPDX-License-Identifier: GPL-2.0 */

   #include 
+#include 

   OUTPUT_ARCH(riscv)
   ENTRY(_start)

   SECTIONS
   {
-   . = PAGE_OFFSET;
+   . = KERNEL_LINK_ADDR;

  .payload : {
  *(.payload)
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 2d50f76efe48..48bb09b6a9b7 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -90,18 +90,26 @@ typedef struct page *pgtable_t;

   #ifdef CONFIG_MMU
   extern unsigned long va_pa_offset;
+extern unsigned long va_kernel_pa_offset;
   extern unsigned long pfn_base;
   #define ARCH_PFN_OFFSET(pfn_base)
   #else
   #define va_pa_offset   0
+#define va_kernel_pa_offset0
   #define ARCH_PFN_OFFSET(PAGE_OFFSET >> PAGE_SHIFT)
   #endif /* CONFIG_MMU */

   extern unsigned long max_low_pfn;
   extern unsigned long min_low_pfn;
+extern unsigned long kernel_virt_addr;

   #define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) + va_pa_offset))
-#define __va_to_pa_nodebug(x)  ((unsigned long)(x) - va_pa_offset)
+#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
+#define kernel_mapping_va_to_pa(x) \
+   ((unsigned long)(x) - va_kernel_pa_offset)
+#define __va_to_pa_nodebug(x)  \
+   (((x) >= PAGE_OFFSET) ? \
+   linear_mapping_va_to_pa(x) : kernel_mapping_va_to_pa(x))

   #ifdef CONFIG_DEBUG_VIRTUAL
   extern phys_addr_t __virt_to_phys(unsigned long x);
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 35b60035b6b0..25213cfaf680 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -11,23 +11,29 @@

   #include 

-#ifndef __ASSEMBLY__
-
-/* Page Upper Directory not used in RISC-V */
-#include 
-#include 
-#include 
-#include 
-
-#ifdef CONFIG_MMU
+#ifndef CONFIG_MMU
+#define KERNEL_VIRT_ADDR   PAGE_OFFSET
+#define KERNEL_LINK_ADDR   PAGE_OFFSET
+#else
+/*
+ * Leave 2GB for modules and BPF that must lie within a 2GB range around
+ * the kernel.
+ */
+#define KERNEL_VIRT_ADDR   (VMALLOC_END - SZ_2G + 1)
+#define KERNEL_LINK_ADDR   KERNEL_VIRT_ADDR

   #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
   #define VMALLOC_END  (PAGE_OFFSET - 1)
   #define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)

   #define BPF_JIT_REGION_SIZE(SZ_128M)
-#define BPF_JIT_REGION_START   (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
-#define BPF_JIT_REGION_END (VMALLOC_END)
+#define BPF_JIT_REGION_START   (kernel_virt_addr)
+#define BPF_JIT_REGION_END (kernel_virt_addr + BPF_JIT_REGION_SIZE)

It seems to have a potential risk here, the region of bpf is
overlapping with kernel mapping, so if kernel size is bigger than
128MB, bpf region would be occupied and run out by kernel mapping.

Is there the risk as I mentioned?



Sorry I forgot to answer this one: I was confident that 128MB was large 
enough for kernel
and BPF. But I see no reason to leave this risk so I'll change 
kernel_virt_addr for _end so

BPF will have its 128MB reserved.

Thanks !

Alex





+
+#ifdef CONFIG_64BIT
+#define VMALLOC_MODULE_START   BPF_JIT_REGION_END

Re: [PATCH v3 1/3] riscv: Move kernel mapping to vmalloc zone

2020-05-27 Thread Zong Li
On Wed, May 27, 2020 at 1:06 AM Alex Ghiti  wrote:
>
> Hi Zong,
>
> Le 5/26/20 à 5:43 AM, Zong Li a écrit :
> > On Sun, May 24, 2020 at 4:54 PM Alexandre Ghiti  wrote:
> >> This is a preparatory patch for relocatable kernel.
> >>
> >> The kernel used to be linked at PAGE_OFFSET address and used to be loaded
> >> physically at the beginning of the main memory. Therefore, we could use
> >> the linear mapping for the kernel mapping.
> >>
> >> But the relocated kernel base address will be different from PAGE_OFFSET
> >> and since in the linear mapping, two different virtual addresses cannot
> >> point to the same physical address, the kernel mapping needs to lie outside
> >> the linear mapping.
> >>
> >> In addition, because modules and BPF must be close to the kernel (inside
> >> +-2GB window), the kernel is placed at the end of the vmalloc zone minus
> >> 2GB, which leaves room for modules and BPF. The kernel could not be
> >> placed at the beginning of the vmalloc zone since other vmalloc
> >> allocations from the kernel could get all the +-2GB window around the
> >> kernel which would prevent new modules and BPF programs to be loaded.
> >>
> >> Signed-off-by: Alexandre Ghiti 
> >> ---
> >>   arch/riscv/boot/loader.lds.S |  3 +-
> >>   arch/riscv/include/asm/page.h| 10 +-
> >>   arch/riscv/include/asm/pgtable.h | 37 +---
> >>   arch/riscv/kernel/head.S |  3 +-
> >>   arch/riscv/kernel/module.c   |  4 +--
> >>   arch/riscv/kernel/vmlinux.lds.S  |  3 +-
> >>   arch/riscv/mm/init.c | 58 +---
> >>   arch/riscv/mm/physaddr.c |  2 +-
> >>   8 files changed, 87 insertions(+), 33 deletions(-)
> >>
> >> diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
> >> index 47a5003c2e28..62d94696a19c 100644
> >> --- a/arch/riscv/boot/loader.lds.S
> >> +++ b/arch/riscv/boot/loader.lds.S
> >> @@ -1,13 +1,14 @@
> >>   /* SPDX-License-Identifier: GPL-2.0 */
> >>
> >>   #include 
> >> +#include 
> >>
> >>   OUTPUT_ARCH(riscv)
> >>   ENTRY(_start)
> >>
> >>   SECTIONS
> >>   {
> >> -   . = PAGE_OFFSET;
> >> +   . = KERNEL_LINK_ADDR;
> >>
> >>  .payload : {
> >>  *(.payload)
> >> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> >> index 2d50f76efe48..48bb09b6a9b7 100644
> >> --- a/arch/riscv/include/asm/page.h
> >> +++ b/arch/riscv/include/asm/page.h
> >> @@ -90,18 +90,26 @@ typedef struct page *pgtable_t;
> >>
> >>   #ifdef CONFIG_MMU
> >>   extern unsigned long va_pa_offset;
> >> +extern unsigned long va_kernel_pa_offset;
> >>   extern unsigned long pfn_base;
> >>   #define ARCH_PFN_OFFSET(pfn_base)
> >>   #else
> >>   #define va_pa_offset   0
> >> +#define va_kernel_pa_offset0
> >>   #define ARCH_PFN_OFFSET(PAGE_OFFSET >> PAGE_SHIFT)
> >>   #endif /* CONFIG_MMU */
> >>
> >>   extern unsigned long max_low_pfn;
> >>   extern unsigned long min_low_pfn;
> >> +extern unsigned long kernel_virt_addr;
> >>
> >>   #define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) + 
> >> va_pa_offset))
> >> -#define __va_to_pa_nodebug(x)  ((unsigned long)(x) - va_pa_offset)
> >> +#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
> >> +#define kernel_mapping_va_to_pa(x) \
> >> +   ((unsigned long)(x) - va_kernel_pa_offset)
> >> +#define __va_to_pa_nodebug(x)  \
> >> +   (((x) >= PAGE_OFFSET) ? \
> >> +   linear_mapping_va_to_pa(x) : kernel_mapping_va_to_pa(x))
> >>
> >>   #ifdef CONFIG_DEBUG_VIRTUAL
> >>   extern phys_addr_t __virt_to_phys(unsigned long x);
> >> diff --git a/arch/riscv/include/asm/pgtable.h 
> >> b/arch/riscv/include/asm/pgtable.h
> >> index 35b60035b6b0..25213cfaf680 100644
> >> --- a/arch/riscv/include/asm/pgtable.h
> >> +++ b/arch/riscv/include/asm/pgtable.h
> >> @@ -11,23 +11,29 @@
> >>
> >>   #include 
> >>
> >> -#ifndef __ASSEMBLY__
> >> -
> >> -/* Page Upper Directory not used in RISC-V */
> >> -#include 
> >> -#include 
> >> -#include 
> >> -#include 
> >> -
> >> -#ifdef CONFIG_MMU
> >> +#ifndef CONFIG_MMU
> >> +#define KERNEL_VIRT_ADDR   PAGE_OFFSET
> >> +#define KERNEL_LINK_ADDR   PAGE_OFFSET
> >> +#else
> >> +/*
> >> + * Leave 2GB for modules and BPF that must lie within a 2GB range around
> >> + * the kernel.
> >> + */
> >> +#define KERNEL_VIRT_ADDR   (VMALLOC_END - SZ_2G + 1)
> >> +#define KERNEL_LINK_ADDR   KERNEL_VIRT_ADDR
> >>
> >>   #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
> >>   #define VMALLOC_END  (PAGE_OFFSET - 1)
> >>   #define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
> >>
> >>   #define BPF_JIT_REGION_SIZE(SZ_128M)
> >> -#define BPF_JIT_REGION_START   (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
> >> -#define BPF_JIT_REGION_END (VMALLOC_END)
> >> +#define BPF_JIT_REGION_START   (kernel_virt_addr)
> >> +#define BPF_JIT_REGION_END (kernel_virt_addr + BPF_JIT_REGION_SIZE)
> > It seems to have a 

Re: [PATCH v3 1/3] riscv: Move kernel mapping to vmalloc zone

2020-05-26 Thread Alex Ghiti

Hi Zong,

Le 5/26/20 à 5:43 AM, Zong Li a écrit :

On Sun, May 24, 2020 at 4:54 PM Alexandre Ghiti  wrote:

This is a preparatory patch for relocatable kernel.

The kernel used to be linked at PAGE_OFFSET address and used to be loaded
physically at the beginning of the main memory. Therefore, we could use
the linear mapping for the kernel mapping.

But the relocated kernel base address will be different from PAGE_OFFSET
and since in the linear mapping, two different virtual addresses cannot
point to the same physical address, the kernel mapping needs to lie outside
the linear mapping.

In addition, because modules and BPF must be close to the kernel (inside
+-2GB window), the kernel is placed at the end of the vmalloc zone minus
2GB, which leaves room for modules and BPF. The kernel could not be
placed at the beginning of the vmalloc zone since other vmalloc
allocations from the kernel could get all the +-2GB window around the
kernel which would prevent new modules and BPF programs to be loaded.

Signed-off-by: Alexandre Ghiti 
---
  arch/riscv/boot/loader.lds.S |  3 +-
  arch/riscv/include/asm/page.h| 10 +-
  arch/riscv/include/asm/pgtable.h | 37 +---
  arch/riscv/kernel/head.S |  3 +-
  arch/riscv/kernel/module.c   |  4 +--
  arch/riscv/kernel/vmlinux.lds.S  |  3 +-
  arch/riscv/mm/init.c | 58 +---
  arch/riscv/mm/physaddr.c |  2 +-
  8 files changed, 87 insertions(+), 33 deletions(-)

diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
index 47a5003c2e28..62d94696a19c 100644
--- a/arch/riscv/boot/loader.lds.S
+++ b/arch/riscv/boot/loader.lds.S
@@ -1,13 +1,14 @@
  /* SPDX-License-Identifier: GPL-2.0 */

  #include 
+#include 

  OUTPUT_ARCH(riscv)
  ENTRY(_start)

  SECTIONS
  {
-   . = PAGE_OFFSET;
+   . = KERNEL_LINK_ADDR;

 .payload : {
 *(.payload)
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 2d50f76efe48..48bb09b6a9b7 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -90,18 +90,26 @@ typedef struct page *pgtable_t;

  #ifdef CONFIG_MMU
  extern unsigned long va_pa_offset;
+extern unsigned long va_kernel_pa_offset;
  extern unsigned long pfn_base;
  #define ARCH_PFN_OFFSET(pfn_base)
  #else
  #define va_pa_offset   0
+#define va_kernel_pa_offset0
  #define ARCH_PFN_OFFSET(PAGE_OFFSET >> PAGE_SHIFT)
  #endif /* CONFIG_MMU */

  extern unsigned long max_low_pfn;
  extern unsigned long min_low_pfn;
+extern unsigned long kernel_virt_addr;

  #define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) + va_pa_offset))
-#define __va_to_pa_nodebug(x)  ((unsigned long)(x) - va_pa_offset)
+#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
+#define kernel_mapping_va_to_pa(x) \
+   ((unsigned long)(x) - va_kernel_pa_offset)
+#define __va_to_pa_nodebug(x)  \
+   (((x) >= PAGE_OFFSET) ? \
+   linear_mapping_va_to_pa(x) : kernel_mapping_va_to_pa(x))

  #ifdef CONFIG_DEBUG_VIRTUAL
  extern phys_addr_t __virt_to_phys(unsigned long x);
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 35b60035b6b0..25213cfaf680 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -11,23 +11,29 @@

  #include 

-#ifndef __ASSEMBLY__
-
-/* Page Upper Directory not used in RISC-V */
-#include 
-#include 
-#include 
-#include 
-
-#ifdef CONFIG_MMU
+#ifndef CONFIG_MMU
+#define KERNEL_VIRT_ADDR   PAGE_OFFSET
+#define KERNEL_LINK_ADDR   PAGE_OFFSET
+#else
+/*
+ * Leave 2GB for modules and BPF that must lie within a 2GB range around
+ * the kernel.
+ */
+#define KERNEL_VIRT_ADDR   (VMALLOC_END - SZ_2G + 1)
+#define KERNEL_LINK_ADDR   KERNEL_VIRT_ADDR

  #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
  #define VMALLOC_END  (PAGE_OFFSET - 1)
  #define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)

  #define BPF_JIT_REGION_SIZE(SZ_128M)
-#define BPF_JIT_REGION_START   (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
-#define BPF_JIT_REGION_END (VMALLOC_END)
+#define BPF_JIT_REGION_START   (kernel_virt_addr)
+#define BPF_JIT_REGION_END (kernel_virt_addr + BPF_JIT_REGION_SIZE)

It seems to have a potential risk here, the region of bpf is
overlapping with kernel mapping, so if kernel size is bigger than
128MB, bpf region would be occupied and run out by kernel mapping.


+
+#ifdef CONFIG_64BIT
+#define VMALLOC_MODULE_START   BPF_JIT_REGION_END
+#define VMALLOC_MODULE_END VMALLOC_END
+#endif


Although kernel_virt_addr is a fixed address now, I think it could be
changed for the purpose of relocatable or KASLR, so if
kernel_virt_addr is moved to far from VMALLOC_END than 2G, the region
of module would be too big.



Yes you're right, that's wrong to allow modules to lie outside
the 2G window, thanks for noticing.



In addition, the region 

Re: [PATCH v3 1/3] riscv: Move kernel mapping to vmalloc zone

2020-05-26 Thread Zong Li
On Sun, May 24, 2020 at 4:54 PM Alexandre Ghiti  wrote:
>
> This is a preparatory patch for relocatable kernel.
>
> The kernel used to be linked at PAGE_OFFSET address and used to be loaded
> physically at the beginning of the main memory. Therefore, we could use
> the linear mapping for the kernel mapping.
>
> But the relocated kernel base address will be different from PAGE_OFFSET
> and since in the linear mapping, two different virtual addresses cannot
> point to the same physical address, the kernel mapping needs to lie outside
> the linear mapping.
>
> In addition, because modules and BPF must be close to the kernel (inside
> +-2GB window), the kernel is placed at the end of the vmalloc zone minus
> 2GB, which leaves room for modules and BPF. The kernel could not be
> placed at the beginning of the vmalloc zone since other vmalloc
> allocations from the kernel could get all the +-2GB window around the
> kernel which would prevent new modules and BPF programs to be loaded.
>
> Signed-off-by: Alexandre Ghiti 
> ---
>  arch/riscv/boot/loader.lds.S |  3 +-
>  arch/riscv/include/asm/page.h| 10 +-
>  arch/riscv/include/asm/pgtable.h | 37 +---
>  arch/riscv/kernel/head.S |  3 +-
>  arch/riscv/kernel/module.c   |  4 +--
>  arch/riscv/kernel/vmlinux.lds.S  |  3 +-
>  arch/riscv/mm/init.c | 58 +---
>  arch/riscv/mm/physaddr.c |  2 +-
>  8 files changed, 87 insertions(+), 33 deletions(-)
>
> diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
> index 47a5003c2e28..62d94696a19c 100644
> --- a/arch/riscv/boot/loader.lds.S
> +++ b/arch/riscv/boot/loader.lds.S
> @@ -1,13 +1,14 @@
>  /* SPDX-License-Identifier: GPL-2.0 */
>
>  #include 
> +#include 
>
>  OUTPUT_ARCH(riscv)
>  ENTRY(_start)
>
>  SECTIONS
>  {
> -   . = PAGE_OFFSET;
> +   . = KERNEL_LINK_ADDR;
>
> .payload : {
> *(.payload)
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 2d50f76efe48..48bb09b6a9b7 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -90,18 +90,26 @@ typedef struct page *pgtable_t;
>
>  #ifdef CONFIG_MMU
>  extern unsigned long va_pa_offset;
> +extern unsigned long va_kernel_pa_offset;
>  extern unsigned long pfn_base;
>  #define ARCH_PFN_OFFSET(pfn_base)
>  #else
>  #define va_pa_offset   0
> +#define va_kernel_pa_offset0
>  #define ARCH_PFN_OFFSET(PAGE_OFFSET >> PAGE_SHIFT)
>  #endif /* CONFIG_MMU */
>
>  extern unsigned long max_low_pfn;
>  extern unsigned long min_low_pfn;
> +extern unsigned long kernel_virt_addr;
>
>  #define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) + va_pa_offset))
> -#define __va_to_pa_nodebug(x)  ((unsigned long)(x) - va_pa_offset)
> +#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
> +#define kernel_mapping_va_to_pa(x) \
> +   ((unsigned long)(x) - va_kernel_pa_offset)
> +#define __va_to_pa_nodebug(x)  \
> +   (((x) >= PAGE_OFFSET) ? \
> +   linear_mapping_va_to_pa(x) : kernel_mapping_va_to_pa(x))
>
>  #ifdef CONFIG_DEBUG_VIRTUAL
>  extern phys_addr_t __virt_to_phys(unsigned long x);
> diff --git a/arch/riscv/include/asm/pgtable.h 
> b/arch/riscv/include/asm/pgtable.h
> index 35b60035b6b0..25213cfaf680 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -11,23 +11,29 @@
>
>  #include 
>
> -#ifndef __ASSEMBLY__
> -
> -/* Page Upper Directory not used in RISC-V */
> -#include 
> -#include 
> -#include 
> -#include 
> -
> -#ifdef CONFIG_MMU
> +#ifndef CONFIG_MMU
> +#define KERNEL_VIRT_ADDR   PAGE_OFFSET
> +#define KERNEL_LINK_ADDR   PAGE_OFFSET
> +#else
> +/*
> + * Leave 2GB for modules and BPF that must lie within a 2GB range around
> + * the kernel.
> + */
> +#define KERNEL_VIRT_ADDR   (VMALLOC_END - SZ_2G + 1)
> +#define KERNEL_LINK_ADDR   KERNEL_VIRT_ADDR
>
>  #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
>  #define VMALLOC_END  (PAGE_OFFSET - 1)
>  #define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
>
>  #define BPF_JIT_REGION_SIZE(SZ_128M)
> -#define BPF_JIT_REGION_START   (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
> -#define BPF_JIT_REGION_END (VMALLOC_END)
> +#define BPF_JIT_REGION_START   (kernel_virt_addr)
> +#define BPF_JIT_REGION_END (kernel_virt_addr + BPF_JIT_REGION_SIZE)

It seems to have a potential risk here, the region of bpf is
overlapping with kernel mapping, so if kernel size is bigger than
128MB, bpf region would be occupied and run out by kernel mapping.

> +
> +#ifdef CONFIG_64BIT
> +#define VMALLOC_MODULE_START   BPF_JIT_REGION_END
> +#define VMALLOC_MODULE_END VMALLOC_END
> +#endif
>

Although kernel_virt_addr is a fixed address now, I think it could be
changed for the purpose of relocatable or KASLR, so if
kernel_virt_addr is moved to far from VMALLOC_END than 2G, the region
of module 

[PATCH v3 1/3] riscv: Move kernel mapping to vmalloc zone

2020-05-24 Thread Alexandre Ghiti
This is a preparatory patch for relocatable kernel.

The kernel used to be linked at PAGE_OFFSET address and used to be loaded
physically at the beginning of the main memory. Therefore, we could use
the linear mapping for the kernel mapping.

But the relocated kernel base address will be different from PAGE_OFFSET
and since in the linear mapping, two different virtual addresses cannot
point to the same physical address, the kernel mapping needs to lie outside
the linear mapping.

In addition, because modules and BPF must be close to the kernel (inside
+-2GB window), the kernel is placed at the end of the vmalloc zone minus
2GB, which leaves room for modules and BPF. The kernel could not be
placed at the beginning of the vmalloc zone since other vmalloc
allocations from the kernel could get all the +-2GB window around the
kernel which would prevent new modules and BPF programs to be loaded.

Signed-off-by: Alexandre Ghiti 
---
 arch/riscv/boot/loader.lds.S |  3 +-
 arch/riscv/include/asm/page.h| 10 +-
 arch/riscv/include/asm/pgtable.h | 37 +---
 arch/riscv/kernel/head.S |  3 +-
 arch/riscv/kernel/module.c   |  4 +--
 arch/riscv/kernel/vmlinux.lds.S  |  3 +-
 arch/riscv/mm/init.c | 58 +---
 arch/riscv/mm/physaddr.c |  2 +-
 8 files changed, 87 insertions(+), 33 deletions(-)

diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
index 47a5003c2e28..62d94696a19c 100644
--- a/arch/riscv/boot/loader.lds.S
+++ b/arch/riscv/boot/loader.lds.S
@@ -1,13 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
 #include 
+#include 
 
 OUTPUT_ARCH(riscv)
 ENTRY(_start)
 
 SECTIONS
 {
-   . = PAGE_OFFSET;
+   . = KERNEL_LINK_ADDR;
 
.payload : {
*(.payload)
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 2d50f76efe48..48bb09b6a9b7 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -90,18 +90,26 @@ typedef struct page *pgtable_t;
 
 #ifdef CONFIG_MMU
 extern unsigned long va_pa_offset;
+extern unsigned long va_kernel_pa_offset;
 extern unsigned long pfn_base;
 #define ARCH_PFN_OFFSET(pfn_base)
 #else
 #define va_pa_offset   0
+#define va_kernel_pa_offset0
 #define ARCH_PFN_OFFSET(PAGE_OFFSET >> PAGE_SHIFT)
 #endif /* CONFIG_MMU */
 
 extern unsigned long max_low_pfn;
 extern unsigned long min_low_pfn;
+extern unsigned long kernel_virt_addr;
 
 #define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) + va_pa_offset))
-#define __va_to_pa_nodebug(x)  ((unsigned long)(x) - va_pa_offset)
+#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
+#define kernel_mapping_va_to_pa(x) \
+   ((unsigned long)(x) - va_kernel_pa_offset)
+#define __va_to_pa_nodebug(x)  \
+   (((x) >= PAGE_OFFSET) ? \
+   linear_mapping_va_to_pa(x) : kernel_mapping_va_to_pa(x))
 
 #ifdef CONFIG_DEBUG_VIRTUAL
 extern phys_addr_t __virt_to_phys(unsigned long x);
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 35b60035b6b0..25213cfaf680 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -11,23 +11,29 @@
 
 #include 
 
-#ifndef __ASSEMBLY__
-
-/* Page Upper Directory not used in RISC-V */
-#include 
-#include 
-#include 
-#include 
-
-#ifdef CONFIG_MMU
+#ifndef CONFIG_MMU
+#define KERNEL_VIRT_ADDR   PAGE_OFFSET
+#define KERNEL_LINK_ADDR   PAGE_OFFSET
+#else
+/*
+ * Leave 2GB for modules and BPF that must lie within a 2GB range around
+ * the kernel.
+ */
+#define KERNEL_VIRT_ADDR   (VMALLOC_END - SZ_2G + 1)
+#define KERNEL_LINK_ADDR   KERNEL_VIRT_ADDR
 
 #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
 #define VMALLOC_END  (PAGE_OFFSET - 1)
 #define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
 
 #define BPF_JIT_REGION_SIZE(SZ_128M)
-#define BPF_JIT_REGION_START   (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
-#define BPF_JIT_REGION_END (VMALLOC_END)
+#define BPF_JIT_REGION_START   (kernel_virt_addr)
+#define BPF_JIT_REGION_END (kernel_virt_addr + BPF_JIT_REGION_SIZE)
+
+#ifdef CONFIG_64BIT
+#define VMALLOC_MODULE_START   BPF_JIT_REGION_END
+#define VMALLOC_MODULE_END VMALLOC_END
+#endif
 
 /*
  * Roughly size the vmemmap space to be large enough to fit enough
@@ -57,9 +63,16 @@
 #define FIXADDR_SIZE PGDIR_SIZE
 #endif
 #define FIXADDR_START(FIXADDR_TOP - FIXADDR_SIZE)
-
 #endif
 
+#ifndef __ASSEMBLY__
+
+/* Page Upper Directory not used in RISC-V */
+#include 
+#include 
+#include 
+#include 
+
 #ifdef CONFIG_64BIT
 #include 
 #else
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 98a406474e7d..8f5bb7731327 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -49,7 +49,8 @@ ENTRY(_start)
 #ifdef CONFIG_MMU
 relocate:
/* Relocate return address */
-   li a1, PAGE_OFFSET
+   la a1,