Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-15 Thread Greentime Hu
On Fri, Aug 16, 2019 at 12:20 AM Logan Gunthorpe  wrote:
>
>
>
> On 2019-08-15 3:31 a.m., Greentime Hu wrote:
> > Hi Logan,
> >
> > On Thu, Aug 15, 2019 at 6:21 AM Logan Gunthorpe  wrote:
> >>
> >> Hey,
> >>
> >> On 2019-08-14 7:35 a.m., Greentime Hu wrote:
> >>> How about this fix? Not sure if it is good for everyone.
> >>
> >> I applied your fix to the patch and it seems ok. But it doesn't seem to
> >> work on a recent version of the kernel. Have you got it working on v5.3?
> >> It seems the following patch breaks things:
> >>
> >> 671f9a3e2e24 ("RISC-V: Setup initial page tables in two stages")
> >>
> >> I don't really have time right now to debug this any further.
> >>
> >
> > I just tried v5.3-rc4 and it failed. I try to debug this case.
> > I found it failed might be because of an unmapped virtual address is used
> > in memblocks_present() -> memblock_alloc ().
> >
> > In this commit 671f9a3e2e24 ("RISC-V: Setup initial page tables in two
> > stages"), it finishes all the VA/PA mapping after setup_vm_final() is
> > called.
> > So we have to call memblocks_present() and sparse_init() right after
> > setup_vm_final().
> >
> > Here is my patch and I tested with memory-with-hole.
> > It can boot normally in Unleashed board after applying this patch.
>
> Great, thanks! I'll roll this into my patch and send v5 out when I have
> a moment. Can I add your Signed-off-by for the bits you've contributed
> to give you credit for your work?

Sure. Please use my Signed-off-by: Greentime Hu 


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-15 Thread Logan Gunthorpe



On 2019-08-15 3:31 a.m., Greentime Hu wrote:
> Hi Logan,
> 
> On Thu, Aug 15, 2019 at 6:21 AM Logan Gunthorpe  wrote:
>>
>> Hey,
>>
>> On 2019-08-14 7:35 a.m., Greentime Hu wrote:
>>> How about this fix? Not sure if it is good for everyone.
>>
>> I applied your fix to the patch and it seems ok. But it doesn't seem to
>> work on a recent version of the kernel. Have you got it working on v5.3?
>> It seems the following patch breaks things:
>>
>> 671f9a3e2e24 ("RISC-V: Setup initial page tables in two stages")
>>
>> I don't really have time right now to debug this any further.
>>
> 
> I just tried v5.3-rc4 and it failed. I try to debug this case.
> I found it failed might be because of an unmapped virtual address is used
> in memblocks_present() -> memblock_alloc ().
> 
> In this commit 671f9a3e2e24 ("RISC-V: Setup initial page tables in two
> stages"), it finishes all the VA/PA mapping after setup_vm_final() is
> called.
> So we have to call memblocks_present() and sparse_init() right after
> setup_vm_final().
> 
> Here is my patch and I tested with memory-with-hole.
> It can boot normally in Unleashed board after applying this patch.

Great, thanks! I'll roll this into my patch and send v5 out when I have
a moment. Can I add your Signed-off-by for the bits you've contributed
to give you credit for your work?

Logan


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-15 Thread Greentime Hu
Hi Logan,

On Thu, Aug 15, 2019 at 6:21 AM Logan Gunthorpe  wrote:
>
> Hey,
>
> On 2019-08-14 7:35 a.m., Greentime Hu wrote:
> > How about this fix? Not sure if it is good for everyone.
>
> I applied your fix to the patch and it seems ok. But it doesn't seem to
> work on a recent version of the kernel. Have you got it working on v5.3?
> It seems the following patch breaks things:
>
> 671f9a3e2e24 ("RISC-V: Setup initial page tables in two stages")
>
> I don't really have time right now to debug this any further.
>

I just tried v5.3-rc4 and it failed. I try to debug this case.
I found it failed might be because of an unmapped virtual address is used
in memblocks_present() -> memblock_alloc ().

In this commit 671f9a3e2e24 ("RISC-V: Setup initial page tables in two
stages"), it finishes all the VA/PA mapping after setup_vm_final() is
called.
So we have to call memblocks_present() and sparse_init() right after
setup_vm_final().

Here is my patch and I tested with memory-with-hole.
It can boot normally in Unleashed board after applying this patch.

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 27d1d847fb2d..35aacb0c93e5 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -138,8 +138,6 @@ void __init setup_bootmem(void)
  PFN_PHYS(end_pfn - start_pfn),
  , 0);
}
-   memblocks_present();
-   sparse_init();
 }

 unsigned long va_pa_offset;
@@ -452,6 +450,8 @@ static void __init setup_vm_final(void)
 void __init paging_init(void)
 {
setup_vm_final();
+   memblocks_present();
+   sparse_init();
setup_zero_page();
zone_sizes_init();
 }

Test case:
memory@8000 {
device_type = "memory";
reg = <0x0 0x8000 0x0 0x8000>;
};
memory@18000 {
device_type = "memory";
reg = <0x1 0x8000 0x0 0x4000>;
};


# cat /proc/meminfo
MemTotal:3003496 kB
MemFree: 2986584 kB
MemAvailable:2970176 kB
Buffers:   0 kB
Cached: 3540 kB
SwapCached:0 kB
Active: 3920 kB
Inactive: 68 kB
Active(anon):   3920 kB
Inactive(anon):   68 kB
Active(file):  0 kB
Inactive(file):0 kB
Unevictable:   0 kB
Mlocked:   0 kB
SwapTotal: 0 kB
SwapFree:  0 kB
Dirty: 0 kB
Writeback: 0 kB
AnonPages:   528 kB
Mapped: 1984 kB
Shmem:  3540 kB
KReclaimable:688 kB
Slab:   5700 kB
SReclaimable:688 kB
SUnreclaim: 5012 kB
KernelStack: 424 kB
PageTables:   80 kB
NFS_Unstable:  0 kB
Bounce:0 kB
WritebackTmp:  0 kB
CommitLimit: 1501748 kB
Committed_AS:   3200 kB
VmallocTotal:   67108863 kB
VmallocUsed:  12 kB
VmallocChunk:  0 kB
Percpu:  272 kB
# uname -a
Linux buildroot 5.3.0-rc4-1-g44404421c481-dirty #10 SMP Thu Aug 15
16:28:24 DST 2019 riscv64 GNU/Lin[ 2352.443621] random: fast init done
ux
# zcat /proc/config.gz |grep SPARSE
CONFIG_SPARSE_IRQ=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_SPARSEMEM_MANUAL=y
CONFIG_SPARSEMEM=y
CONFIG_SPARSEMEM_EXTREME=y
CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
CONFIG_SPARSEMEM_VMEMMAP=y
# CONFIG_INPUT_SPARSEKMAP is not set


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-14 Thread Logan Gunthorpe
Hey,

On 2019-08-14 7:35 a.m., Greentime Hu wrote:
> How about this fix? Not sure if it is good for everyone.

I applied your fix to the patch and it seems ok. But it doesn't seem to
work on a recent version of the kernel. Have you got it working on v5.3?
It seems the following patch breaks things:

671f9a3e2e24 ("RISC-V: Setup initial page tables in two stages")

I don't really have time right now to debug this any further.

Logan


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-14 Thread Paul Walmsley
On Wed, 14 Aug 2019, Logan Gunthorpe wrote:

> On 2019-08-14 11:40 a.m., Paul Walmsley wrote:
> > On Wed, 14 Aug 2019, Logan Gunthorpe wrote:
> > 
> >> On 2019-08-14 7:35 a.m., Greentime Hu wrote:
> >>
> >>> Maybe this commit explains why it used HAVE_ARCH_PFN_VALID instead of 
> >>> SPARSEMEM.
> >>> https://github.com/torvalds/linux/commit/7b7bf499f79de3f6c85a340c8453a78789523f85
> >>>
> >>> BTW, I found another issue here.
> >>> #define FIXADDR_TOP(VMALLOC_START)
> >>> #define FIXADDR_START   (FIXADDR_TOP - FIXADDR_SIZE)
> >>> #define VMEMMAP_END(VMALLOC_START - 1)
> >>> #define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
> >>> These 2 regions are overlapped.
> >>>
> >>> How about this fix? Not sure if it is good for everyone.
> >>
> >> Yes, this looks good to me. I can fold these changes into my patch and
> >> send a v5 to the list.
> > 
> > The change to FIXADDR_TOP should be separated out into its own patch - it 
> > probably needs to go up as a fix.
> 
> I don't think so... VMEMMAP_START doesn't exist until the sparsemem
> patch so it can't be changed until after the sparsemem patch and no
> sense adding a bug in the sparsemem patch...

OK, that's fine then.


- Paul


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-14 Thread Logan Gunthorpe



On 2019-08-14 11:40 a.m., Paul Walmsley wrote:
> On Wed, 14 Aug 2019, Logan Gunthorpe wrote:
> 
>> On 2019-08-14 7:35 a.m., Greentime Hu wrote:
>>
>>> Maybe this commit explains why it used HAVE_ARCH_PFN_VALID instead of 
>>> SPARSEMEM.
>>> https://github.com/torvalds/linux/commit/7b7bf499f79de3f6c85a340c8453a78789523f85
>>>
>>> BTW, I found another issue here.
>>> #define FIXADDR_TOP(VMALLOC_START)
>>> #define FIXADDR_START   (FIXADDR_TOP - FIXADDR_SIZE)
>>> #define VMEMMAP_END(VMALLOC_START - 1)
>>> #define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
>>> These 2 regions are overlapped.
>>>
>>> How about this fix? Not sure if it is good for everyone.
>>
>> Yes, this looks good to me. I can fold these changes into my patch and
>> send a v5 to the list.
> 
> The change to FIXADDR_TOP should be separated out into its own patch - it 
> probably needs to go up as a fix.

I don't think so... VMEMMAP_START doesn't exist until the sparsemem
patch so it can't be changed until after the sparsemem patch and no
sense adding a bug in the sparsemem patch...

Logan



Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-14 Thread Paul Walmsley
On Wed, 14 Aug 2019, Logan Gunthorpe wrote:

> On 2019-08-14 7:35 a.m., Greentime Hu wrote:
>
> > Maybe this commit explains why it used HAVE_ARCH_PFN_VALID instead of 
> > SPARSEMEM.
> > https://github.com/torvalds/linux/commit/7b7bf499f79de3f6c85a340c8453a78789523f85
> > 
> > BTW, I found another issue here.
> > #define FIXADDR_TOP(VMALLOC_START)
> > #define FIXADDR_START   (FIXADDR_TOP - FIXADDR_SIZE)
> > #define VMEMMAP_END(VMALLOC_START - 1)
> > #define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
> > These 2 regions are overlapped.
> > 
> > How about this fix? Not sure if it is good for everyone.
> 
> Yes, this looks good to me. I can fold these changes into my patch and
> send a v5 to the list.

The change to FIXADDR_TOP should be separated out into its own patch - it 
probably needs to go up as a fix.


- Paul


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-14 Thread Logan Gunthorpe



On 2019-08-14 7:35 a.m., Greentime Hu wrote:
> Logan Gunthorpe  於 2019年8月14日 週三 上午12:50寫道:
>>
>> On 2019-08-13 10:39 a.m., Paul Walmsley wrote:
>>> On Tue, 13 Aug 2019, Logan Gunthorpe wrote:
>>>
 On 2019-08-13 12:04 a.m., Greentime Hu wrote:

> Every architecture with mmu defines their own pfn_valid().

 Not true. Arm64, for example just uses the generic implementation in
 mmzone.h.
>>>
>>> arm64 seems to define their own:
>>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/Kconfig#n899
>>
>> Oh, yup. My mistake.
>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/mm/init.c#n235
>>>
>>> While there are many architectures which have their own pfn_valid();
>>> oddly, almost none of them set HAVE_ARCH_PFN_VALID ?
>>
>> Yes, much of this is super confusing. Seems HAVE_ARCH_PFN_VALID only
>> matters if SPARSEMEM is set. So risc-v probably doesn't need to set it
>> and we just need a #ifdef !CONFIG_FLATMEM around the pfn_valid
>> definition like other arches.
>>
> 
> Maybe this commit explains why it used HAVE_ARCH_PFN_VALID instead of 
> SPARSEMEM.
> https://github.com/torvalds/linux/commit/7b7bf499f79de3f6c85a340c8453a78789523f85
> 
> BTW, I found another issue here.
> #define FIXADDR_TOP(VMALLOC_START)
> #define FIXADDR_START   (FIXADDR_TOP - FIXADDR_SIZE)
> #define VMEMMAP_END(VMALLOC_START - 1)
> #define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
> These 2 regions are overlapped.
> 
> How about this fix? Not sure if it is good for everyone.

Yes, this looks good to me. I can fold these changes into my patch and
send a v5 to the list.

Thanks!

Logan


> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 3f12b069af1d..3c4d394679d0 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -115,9 +115,6 @@ config PGTABLE_LEVELS
> default 3 if 64BIT
> default 2
> 
> -config HAVE_ARCH_PFN_VALID
> -   def_bool y
> -
>  menu "Platform type"
> 
>  choice
> diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
> index c207f6634b91..72e106b60bc5 100644
> --- a/arch/riscv/include/asm/fixmap.h
> +++ b/arch/riscv/include/asm/fixmap.h
> @@ -26,7 +26,7 @@ enum fixed_addresses {
>  };
> 
>  #define FIXADDR_SIZE   (__end_of_fixed_addresses * PAGE_SIZE)
> -#define FIXADDR_TOP(VMALLOC_START)
> +#define FIXADDR_TOP(VMEMMAP_START)
>  #define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
> 
>  #define FIXMAP_PAGE_IO PAGE_KERNEL
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 8ddb6c7fedac..83830997dce6 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -100,8 +100,10 @@ extern unsigned long min_low_pfn;
>  #define page_to_bus(page)  (page_to_phys(page))
>  #define phys_to_page(paddr)(pfn_to_page(phys_to_pfn(paddr)))
> 
> +#if defined(CONFIG_FLATMEM)
>  #define pfn_valid(pfn) \
> (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
> +#endif
> 
>  #define ARCH_PFN_OFFSET(pfn_base)



Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-14 Thread Greentime Hu
Logan Gunthorpe  於 2019年8月14日 週三 上午12:50寫道:
>
> On 2019-08-13 10:39 a.m., Paul Walmsley wrote:
> > On Tue, 13 Aug 2019, Logan Gunthorpe wrote:
> >
> >> On 2019-08-13 12:04 a.m., Greentime Hu wrote:
> >>
> >>> Every architecture with mmu defines their own pfn_valid().
> >>
> >> Not true. Arm64, for example just uses the generic implementation in
> >> mmzone.h.
> >
> > arm64 seems to define their own:
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/Kconfig#n899
>
> Oh, yup. My mistake.
>
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/mm/init.c#n235
> >
> > While there are many architectures which have their own pfn_valid();
> > oddly, almost none of them set HAVE_ARCH_PFN_VALID ?
>
> Yes, much of this is super confusing. Seems HAVE_ARCH_PFN_VALID only
> matters if SPARSEMEM is set. So risc-v probably doesn't need to set it
> and we just need a #ifdef !CONFIG_FLATMEM around the pfn_valid
> definition like other arches.
>

Maybe this commit explains why it used HAVE_ARCH_PFN_VALID instead of SPARSEMEM.
https://github.com/torvalds/linux/commit/7b7bf499f79de3f6c85a340c8453a78789523f85

BTW, I found another issue here.
#define FIXADDR_TOP(VMALLOC_START)
#define FIXADDR_START   (FIXADDR_TOP - FIXADDR_SIZE)
#define VMEMMAP_END(VMALLOC_START - 1)
#define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
These 2 regions are overlapped.

How about this fix? Not sure if it is good for everyone.

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 3f12b069af1d..3c4d394679d0 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -115,9 +115,6 @@ config PGTABLE_LEVELS
default 3 if 64BIT
default 2

-config HAVE_ARCH_PFN_VALID
-   def_bool y
-
 menu "Platform type"

 choice
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index c207f6634b91..72e106b60bc5 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -26,7 +26,7 @@ enum fixed_addresses {
 };

 #define FIXADDR_SIZE   (__end_of_fixed_addresses * PAGE_SIZE)
-#define FIXADDR_TOP(VMALLOC_START)
+#define FIXADDR_TOP(VMEMMAP_START)
 #define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)

 #define FIXMAP_PAGE_IO PAGE_KERNEL
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 8ddb6c7fedac..83830997dce6 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -100,8 +100,10 @@ extern unsigned long min_low_pfn;
 #define page_to_bus(page)  (page_to_phys(page))
 #define phys_to_page(paddr)(pfn_to_page(phys_to_pfn(paddr)))

+#if defined(CONFIG_FLATMEM)
 #define pfn_valid(pfn) \
(((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
+#endif

 #define ARCH_PFN_OFFSET(pfn_base)


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-13 Thread Logan Gunthorpe
On 2019-08-13 10:39 a.m., Paul Walmsley wrote:
> On Tue, 13 Aug 2019, Logan Gunthorpe wrote:
> 
>> On 2019-08-13 12:04 a.m., Greentime Hu wrote:
>>
>>> Every architecture with mmu defines their own pfn_valid().
>>
>> Not true. Arm64, for example just uses the generic implementation in
>> mmzone.h. 
> 
> arm64 seems to define their own:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/Kconfig#n899

Oh, yup. My mistake.

> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/mm/init.c#n235
> 
> While there are many architectures which have their own pfn_valid(); 
> oddly, almost none of them set HAVE_ARCH_PFN_VALID ?

Yes, much of this is super confusing. Seems HAVE_ARCH_PFN_VALID only
matters if SPARSEMEM is set. So risc-v probably doesn't need to set it
and we just need a #ifdef !CONFIG_FLATMEM around the pfn_valid
definition like other arches.

Logan



Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-13 Thread Paul Walmsley
On Tue, 13 Aug 2019, Paul Walmsley wrote:

> On Tue, 13 Aug 2019, Logan Gunthorpe wrote:
> 
> > On 2019-08-13 12:04 a.m., Greentime Hu wrote:
> > 
> > > Every architecture with mmu defines their own pfn_valid().
> > 
> > Not true. Arm64, for example just uses the generic implementation in
> > mmzone.h. 
> 
> arm64 seems to define their own:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/Kconfig#n899
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/mm/init.c#n235
> 
> While there are many architectures which have their own pfn_valid(); 
> oddly, almost none of them set HAVE_ARCH_PFN_VALID ?

(fixed the linux-mm@ address)


- Paul


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-13 Thread Paul Walmsley
On Tue, 13 Aug 2019, Logan Gunthorpe wrote:

> On 2019-08-13 12:04 a.m., Greentime Hu wrote:
> 
> > Every architecture with mmu defines their own pfn_valid().
> 
> Not true. Arm64, for example just uses the generic implementation in
> mmzone.h. 

arm64 seems to define their own:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/Kconfig#n899

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/mm/init.c#n235

While there are many architectures which have their own pfn_valid(); 
oddly, almost none of them set HAVE_ARCH_PFN_VALID ?


- Paul


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-13 Thread Logan Gunthorpe



On 2019-08-13 12:04 a.m., Greentime Hu wrote:
> I think  flat mem doesn't support memory-with-hole scenario.
> In mm/Kconfig, it says
> "
>   For systems that have holes in their physical address
>   spaces and for features like NUMA and memory hotplug,
>   choose "Sparse Memory"
> "
> IMHO, the memory-with-hole scenario should only be tested for sparse
> mem but flat mem.

Fair enough.

> The generic pfn_valid() is just for non-mmu arches. 

The generic pfn_valid() in asm-generic is only for non-mmu arches.

> Every architecture
> with mmu defines their own pfn_valid().

Not true. Arm64, for example just uses the generic implementation in
mmzone.h. My main question is whether we can just do that. If we can't
we should probably structure it like powerpc where they only use the
arch-specific helper for CONFIG_FLATMEM instead of when CONFIG_SPARSEMEM
isn't set.

Logan


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-13 Thread Greentime Hu
Hi Logan,

Logan Gunthorpe  於 2019年8月12日 週一 下午11:52寫道:
>
>
>
> On 2019-08-11 10:01 p.m., Greentime Hu wrote:
> > Hi Logan,
> >
> > Logan Gunthorpe  於 2019年8月10日 週六 上午3:03寫道:
> >>
> >>
> >>
> >> On 2019-08-09 11:01 a.m., Greentime Hu wrote:
> >>> Hi Logan,
> >>>
> >>> Logan Gunthorpe  於 2019年8月9日 週五 下午11:47寫道:
> 
> 
> 
>  On 2019-08-08 10:23 p.m., Greentime Hu wrote:
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index 3f12b069af1d..208b3e14ccd8 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -116,7 +116,8 @@ config PGTABLE_LEVELS
> > default 2
> >
> >  config HAVE_ARCH_PFN_VALID
> > -   def_bool y
> > +   bool
> > +   default !SPARSEMEM_VMEMMAP
> >
> >  menu "Platform type"
> >
> > diff --git a/arch/riscv/include/asm/page.h 
> > b/arch/riscv/include/asm/page.h
> > index 8ddb6c7fedac..6991f7a5a4a7 100644
> > --- a/arch/riscv/include/asm/page.h
> > +++ b/arch/riscv/include/asm/page.h
> > @@ -93,16 +93,20 @@ extern unsigned long min_low_pfn;
> >  #define virt_to_pfn(vaddr) (phys_to_pfn(__pa(vaddr)))
> >  #define pfn_to_virt(pfn)   (__va(pfn_to_phys(pfn)))
> >
> > +#if !defined(CONFIG_SPARSEMEM_VMEMMAP)
> > +#define pfn_valid(pfn) \
> > +   (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
> >  #define virt_to_page(vaddr)(pfn_to_page(virt_to_pfn(vaddr)))
> >  #define page_to_virt(page) (pfn_to_virt(page_to_pfn(page)))
> > +#else
> > +#define virt_to_page(vaddr)((struct page *)u64)vaddr -
> > va_pa_offset) / PAGE_SIZE) * sizeof(struct page) + VMEMMAP_START))
> > +#define page_to_virt(pg)   ((void *)(u64)pg - VMEMMAP_START) /
> > sizeof(struct page)) * PAGE_SIZE) + va_pa_offset))
> > +#endif
> 
>  This doesn't make sense to me at all. It should always use pfn_to_page()
>  for virt_to_page() and the generic pfn_to_page()/page_to_pfn()
>  implementations essentially already do what you are doing in a cleaner
>  way. So I'd be really surprised if this does anything at all.
> 
> >>>
> >>> Thank you for point me out that. I just checked the generic
> >>> implementation and I should use that one.
> >>> Sorry I didn't check the generic one and just implement it again.
> >>> I think the only patch we need is the first part to use generic
> >>> pfn_valid(). I just tested it and yes it can boot successfully in dts
> >>> with hole.
> >>>
> >>> It will fail in this check ((pfn)-pfn_base) < max_mapnr.
> >>
> >> Sounds to me like max_mapnr is not set correctly. See the code in
> >> setup_bootmem(). Seems like 'mem_size' should be set to the largest
> >> memory block, not just the one that contains the kernel...
> >>
> >>
> >>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>> index 3f12b069af1d..208b3e14ccd8 100644
> >>> --- a/arch/riscv/Kconfig
> >>> +++ b/arch/riscv/Kconfig
> >>> @@ -116,7 +116,8 @@ config PGTABLE_LEVELS
> >>> default 2
> >>>
> >>>  config HAVE_ARCH_PFN_VALID
> >>> -   def_bool y
> >>> +   bool
> >>> +   default !SPARSEMEM_VMEMMAP
> >>>
> >>>  menu "Platform type"
> >>>
> >>> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> >>> index 8ddb6c7fedac..80d28fa1e2eb 100644
> >>> --- a/arch/riscv/include/asm/page.h
> >>> +++ b/arch/riscv/include/asm/page.h
> >>> @@ -100,8 +100,10 @@ extern unsigned long min_low_pfn;
> >>>  #define page_to_bus(page)  (page_to_phys(page))
> >>>  #define phys_to_page(paddr)(pfn_to_page(phys_to_pfn(paddr)))
> >>>
> >>> +#if !defined(CONFIG_SPARSEMEM_VMEMMAP)
> >>>  #define pfn_valid(pfn) \
> >>> (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
> >>> +#endif
> >>>
> >>>  #define ARCH_PFN_OFFSET(pfn_base)
> >>
> >>
> >> This patch still makes no sense. I'm not sure why we have an arch
> >> specific pfn_valid() because it's very similar to the generic one. But
> >> my guess is there's a reason for it and it's not doing what it is
> >> supposed when you remove it for the sparsemem case.
> >
> > It will use another pfn_valid() implementation in
> > include/linux/mmzone.h if CONFIG_SPARSEMEM and
> > !CONFIG_HAVE_ARCH_PFN_VALID
> > It will be this one.
> >
> > static inline int pfn_valid(unsigned long pfn)
> > {
> > if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
> > return 0;
> > return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
> > }
>
> Ah, ok I see. "page.h" is only included in no-mmu arches. Which explains
> why riscv re-implements that macro. Couple follow up questions then:
>
> * Did you test the memory-with-hole scenario without the sparsemem
> patches? It seems pfn_valid() will be wrong regardless of sparse/flat mem.
>
> * Any chance we can just use the generic pfn_valid() function in all
> cases not just sparsemem? Can you test that?
>

I think  flat mem doesn't support 

Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-12 Thread Logan Gunthorpe



On 2019-08-11 10:01 p.m., Greentime Hu wrote:
> Hi Logan,
> 
> Logan Gunthorpe  於 2019年8月10日 週六 上午3:03寫道:
>>
>>
>>
>> On 2019-08-09 11:01 a.m., Greentime Hu wrote:
>>> Hi Logan,
>>>
>>> Logan Gunthorpe  於 2019年8月9日 週五 下午11:47寫道:



 On 2019-08-08 10:23 p.m., Greentime Hu wrote:
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 3f12b069af1d..208b3e14ccd8 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -116,7 +116,8 @@ config PGTABLE_LEVELS
> default 2
>
>  config HAVE_ARCH_PFN_VALID
> -   def_bool y
> +   bool
> +   default !SPARSEMEM_VMEMMAP
>
>  menu "Platform type"
>
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 8ddb6c7fedac..6991f7a5a4a7 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -93,16 +93,20 @@ extern unsigned long min_low_pfn;
>  #define virt_to_pfn(vaddr) (phys_to_pfn(__pa(vaddr)))
>  #define pfn_to_virt(pfn)   (__va(pfn_to_phys(pfn)))
>
> +#if !defined(CONFIG_SPARSEMEM_VMEMMAP)
> +#define pfn_valid(pfn) \
> +   (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
>  #define virt_to_page(vaddr)(pfn_to_page(virt_to_pfn(vaddr)))
>  #define page_to_virt(page) (pfn_to_virt(page_to_pfn(page)))
> +#else
> +#define virt_to_page(vaddr)((struct page *)u64)vaddr -
> va_pa_offset) / PAGE_SIZE) * sizeof(struct page) + VMEMMAP_START))
> +#define page_to_virt(pg)   ((void *)(u64)pg - VMEMMAP_START) /
> sizeof(struct page)) * PAGE_SIZE) + va_pa_offset))
> +#endif

 This doesn't make sense to me at all. It should always use pfn_to_page()
 for virt_to_page() and the generic pfn_to_page()/page_to_pfn()
 implementations essentially already do what you are doing in a cleaner
 way. So I'd be really surprised if this does anything at all.

>>>
>>> Thank you for point me out that. I just checked the generic
>>> implementation and I should use that one.
>>> Sorry I didn't check the generic one and just implement it again.
>>> I think the only patch we need is the first part to use generic
>>> pfn_valid(). I just tested it and yes it can boot successfully in dts
>>> with hole.
>>>
>>> It will fail in this check ((pfn)-pfn_base) < max_mapnr.
>>
>> Sounds to me like max_mapnr is not set correctly. See the code in
>> setup_bootmem(). Seems like 'mem_size' should be set to the largest
>> memory block, not just the one that contains the kernel...
>>
>>
>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>> index 3f12b069af1d..208b3e14ccd8 100644
>>> --- a/arch/riscv/Kconfig
>>> +++ b/arch/riscv/Kconfig
>>> @@ -116,7 +116,8 @@ config PGTABLE_LEVELS
>>> default 2
>>>
>>>  config HAVE_ARCH_PFN_VALID
>>> -   def_bool y
>>> +   bool
>>> +   default !SPARSEMEM_VMEMMAP
>>>
>>>  menu "Platform type"
>>>
>>> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
>>> index 8ddb6c7fedac..80d28fa1e2eb 100644
>>> --- a/arch/riscv/include/asm/page.h
>>> +++ b/arch/riscv/include/asm/page.h
>>> @@ -100,8 +100,10 @@ extern unsigned long min_low_pfn;
>>>  #define page_to_bus(page)  (page_to_phys(page))
>>>  #define phys_to_page(paddr)(pfn_to_page(phys_to_pfn(paddr)))
>>>
>>> +#if !defined(CONFIG_SPARSEMEM_VMEMMAP)
>>>  #define pfn_valid(pfn) \
>>> (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
>>> +#endif
>>>
>>>  #define ARCH_PFN_OFFSET(pfn_base)
>>
>>
>> This patch still makes no sense. I'm not sure why we have an arch
>> specific pfn_valid() because it's very similar to the generic one. But
>> my guess is there's a reason for it and it's not doing what it is
>> supposed when you remove it for the sparsemem case.
> 
> It will use another pfn_valid() implementation in
> include/linux/mmzone.h if CONFIG_SPARSEMEM and
> !CONFIG_HAVE_ARCH_PFN_VALID
> It will be this one.
> 
> static inline int pfn_valid(unsigned long pfn)
> {
> if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
> return 0;
> return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
> }

Ah, ok I see. "page.h" is only included in no-mmu arches. Which explains
why riscv re-implements that macro. Couple follow up questions then:

* Did you test the memory-with-hole scenario without the sparsemem
patches? It seems pfn_valid() will be wrong regardless of sparse/flat mem.

* Any chance we can just use the generic pfn_valid() function in all
cases not just sparsemem? Can you test that?

Thanks,

Logan


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-11 Thread Greentime Hu
Hi Logan,

Logan Gunthorpe  於 2019年8月10日 週六 上午3:03寫道:
>
>
>
> On 2019-08-09 11:01 a.m., Greentime Hu wrote:
> > Hi Logan,
> >
> > Logan Gunthorpe  於 2019年8月9日 週五 下午11:47寫道:
> >>
> >>
> >>
> >> On 2019-08-08 10:23 p.m., Greentime Hu wrote:
> >>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>> index 3f12b069af1d..208b3e14ccd8 100644
> >>> --- a/arch/riscv/Kconfig
> >>> +++ b/arch/riscv/Kconfig
> >>> @@ -116,7 +116,8 @@ config PGTABLE_LEVELS
> >>> default 2
> >>>
> >>>  config HAVE_ARCH_PFN_VALID
> >>> -   def_bool y
> >>> +   bool
> >>> +   default !SPARSEMEM_VMEMMAP
> >>>
> >>>  menu "Platform type"
> >>>
> >>> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> >>> index 8ddb6c7fedac..6991f7a5a4a7 100644
> >>> --- a/arch/riscv/include/asm/page.h
> >>> +++ b/arch/riscv/include/asm/page.h
> >>> @@ -93,16 +93,20 @@ extern unsigned long min_low_pfn;
> >>>  #define virt_to_pfn(vaddr) (phys_to_pfn(__pa(vaddr)))
> >>>  #define pfn_to_virt(pfn)   (__va(pfn_to_phys(pfn)))
> >>>
> >>> +#if !defined(CONFIG_SPARSEMEM_VMEMMAP)
> >>> +#define pfn_valid(pfn) \
> >>> +   (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
> >>>  #define virt_to_page(vaddr)(pfn_to_page(virt_to_pfn(vaddr)))
> >>>  #define page_to_virt(page) (pfn_to_virt(page_to_pfn(page)))
> >>> +#else
> >>> +#define virt_to_page(vaddr)((struct page *)u64)vaddr -
> >>> va_pa_offset) / PAGE_SIZE) * sizeof(struct page) + VMEMMAP_START))
> >>> +#define page_to_virt(pg)   ((void *)(u64)pg - VMEMMAP_START) /
> >>> sizeof(struct page)) * PAGE_SIZE) + va_pa_offset))
> >>> +#endif
> >>
> >> This doesn't make sense to me at all. It should always use pfn_to_page()
> >> for virt_to_page() and the generic pfn_to_page()/page_to_pfn()
> >> implementations essentially already do what you are doing in a cleaner
> >> way. So I'd be really surprised if this does anything at all.
> >>
> >
> > Thank you for point me out that. I just checked the generic
> > implementation and I should use that one.
> > Sorry I didn't check the generic one and just implement it again.
> > I think the only patch we need is the first part to use generic
> > pfn_valid(). I just tested it and yes it can boot successfully in dts
> > with hole.
> >
> > It will fail in this check ((pfn)-pfn_base) < max_mapnr.
>
> Sounds to me like max_mapnr is not set correctly. See the code in
> setup_bootmem(). Seems like 'mem_size' should be set to the largest
> memory block, not just the one that contains the kernel...
>
>
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index 3f12b069af1d..208b3e14ccd8 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -116,7 +116,8 @@ config PGTABLE_LEVELS
> > default 2
> >
> >  config HAVE_ARCH_PFN_VALID
> > -   def_bool y
> > +   bool
> > +   default !SPARSEMEM_VMEMMAP
> >
> >  menu "Platform type"
> >
> > diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> > index 8ddb6c7fedac..80d28fa1e2eb 100644
> > --- a/arch/riscv/include/asm/page.h
> > +++ b/arch/riscv/include/asm/page.h
> > @@ -100,8 +100,10 @@ extern unsigned long min_low_pfn;
> >  #define page_to_bus(page)  (page_to_phys(page))
> >  #define phys_to_page(paddr)(pfn_to_page(phys_to_pfn(paddr)))
> >
> > +#if !defined(CONFIG_SPARSEMEM_VMEMMAP)
> >  #define pfn_valid(pfn) \
> > (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
> > +#endif
> >
> >  #define ARCH_PFN_OFFSET(pfn_base)
>
>
> This patch still makes no sense. I'm not sure why we have an arch
> specific pfn_valid() because it's very similar to the generic one. But
> my guess is there's a reason for it and it's not doing what it is
> supposed when you remove it for the sparsemem case.

It will use another pfn_valid() implementation in
include/linux/mmzone.h if CONFIG_SPARSEMEM and
!CONFIG_HAVE_ARCH_PFN_VALID
It will be this one.

static inline int pfn_valid(unsigned long pfn)
{
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
}

This generic pfn_valid() API can check the pfn is valid or not even if
there a hole in the memory.
For example:
A hole is between 0x1 to 0x18000 (4GB-6GB) in my dts test case.

[0.00] In setup_bootmem, pfn_valid(0x18)=1
[0.00] In setup_bootmem, pfn_valid(0x8)=1
[0.00] In setup_bootmem, pfn_valid(0x80200)=1
[0.00] In setup_bootmem, pfn_valid(0x80300)=1
[0.00] In setup_bootmem, pfn_valid(0x16)=0
[0.00] In setup_bootmem, pfn_valid(0x17)=0
[0.00] In setup_bootmem, pfn_valid(0x12)=0
[0.00] In setup_bootmem, pfn_valid(0x10)=0
[0.00] In setup_bootmem, pfn_valid(0xf)=1

This generic pfn_valid() could tell the pfn is valid or not.


I think this one is only available for flatmem.
#define pfn_valid(pfn)  (((pfn) >= pfn_base) && 

Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-09 Thread Logan Gunthorpe



On 2019-08-09 11:01 a.m., Greentime Hu wrote:
> Hi Logan,
> 
> Logan Gunthorpe  於 2019年8月9日 週五 下午11:47寫道:
>>
>>
>>
>> On 2019-08-08 10:23 p.m., Greentime Hu wrote:
>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>> index 3f12b069af1d..208b3e14ccd8 100644
>>> --- a/arch/riscv/Kconfig
>>> +++ b/arch/riscv/Kconfig
>>> @@ -116,7 +116,8 @@ config PGTABLE_LEVELS
>>> default 2
>>>
>>>  config HAVE_ARCH_PFN_VALID
>>> -   def_bool y
>>> +   bool
>>> +   default !SPARSEMEM_VMEMMAP
>>>
>>>  menu "Platform type"
>>>
>>> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
>>> index 8ddb6c7fedac..6991f7a5a4a7 100644
>>> --- a/arch/riscv/include/asm/page.h
>>> +++ b/arch/riscv/include/asm/page.h
>>> @@ -93,16 +93,20 @@ extern unsigned long min_low_pfn;
>>>  #define virt_to_pfn(vaddr) (phys_to_pfn(__pa(vaddr)))
>>>  #define pfn_to_virt(pfn)   (__va(pfn_to_phys(pfn)))
>>>
>>> +#if !defined(CONFIG_SPARSEMEM_VMEMMAP)
>>> +#define pfn_valid(pfn) \
>>> +   (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
>>>  #define virt_to_page(vaddr)(pfn_to_page(virt_to_pfn(vaddr)))
>>>  #define page_to_virt(page) (pfn_to_virt(page_to_pfn(page)))
>>> +#else
>>> +#define virt_to_page(vaddr)((struct page *)u64)vaddr -
>>> va_pa_offset) / PAGE_SIZE) * sizeof(struct page) + VMEMMAP_START))
>>> +#define page_to_virt(pg)   ((void *)(u64)pg - VMEMMAP_START) /
>>> sizeof(struct page)) * PAGE_SIZE) + va_pa_offset))
>>> +#endif
>>
>> This doesn't make sense to me at all. It should always use pfn_to_page()
>> for virt_to_page() and the generic pfn_to_page()/page_to_pfn()
>> implementations essentially already do what you are doing in a cleaner
>> way. So I'd be really surprised if this does anything at all.
>>
> 
> Thank you for point me out that. I just checked the generic
> implementation and I should use that one.
> Sorry I didn't check the generic one and just implement it again.
> I think the only patch we need is the first part to use generic
> pfn_valid(). I just tested it and yes it can boot successfully in dts
> with hole.
> 
> It will fail in this check ((pfn)-pfn_base) < max_mapnr.

Sounds to me like max_mapnr is not set correctly. See the code in
setup_bootmem(). Seems like 'mem_size' should be set to the largest
memory block, not just the one that contains the kernel...


> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 3f12b069af1d..208b3e14ccd8 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -116,7 +116,8 @@ config PGTABLE_LEVELS
> default 2
> 
>  config HAVE_ARCH_PFN_VALID
> -   def_bool y
> +   bool
> +   default !SPARSEMEM_VMEMMAP
> 
>  menu "Platform type"
> 
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 8ddb6c7fedac..80d28fa1e2eb 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -100,8 +100,10 @@ extern unsigned long min_low_pfn;
>  #define page_to_bus(page)  (page_to_phys(page))
>  #define phys_to_page(paddr)(pfn_to_page(phys_to_pfn(paddr)))
> 
> +#if !defined(CONFIG_SPARSEMEM_VMEMMAP)
>  #define pfn_valid(pfn) \
> (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
> +#endif
> 
>  #define ARCH_PFN_OFFSET(pfn_base)


This patch still makes no sense. I'm not sure why we have an arch
specific pfn_valid() because it's very similar to the generic one. But
my guess is there's a reason for it and it's not doing what it is
supposed when you remove it for the sparsemem case.

Logan


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-09 Thread Greentime Hu
Hi Logan,

Logan Gunthorpe  於 2019年8月9日 週五 下午11:47寫道:
>
>
>
> On 2019-08-08 10:23 p.m., Greentime Hu wrote:
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index 3f12b069af1d..208b3e14ccd8 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -116,7 +116,8 @@ config PGTABLE_LEVELS
> > default 2
> >
> >  config HAVE_ARCH_PFN_VALID
> > -   def_bool y
> > +   bool
> > +   default !SPARSEMEM_VMEMMAP
> >
> >  menu "Platform type"
> >
> > diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> > index 8ddb6c7fedac..6991f7a5a4a7 100644
> > --- a/arch/riscv/include/asm/page.h
> > +++ b/arch/riscv/include/asm/page.h
> > @@ -93,16 +93,20 @@ extern unsigned long min_low_pfn;
> >  #define virt_to_pfn(vaddr) (phys_to_pfn(__pa(vaddr)))
> >  #define pfn_to_virt(pfn)   (__va(pfn_to_phys(pfn)))
> >
> > +#if !defined(CONFIG_SPARSEMEM_VMEMMAP)
> > +#define pfn_valid(pfn) \
> > +   (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
> >  #define virt_to_page(vaddr)(pfn_to_page(virt_to_pfn(vaddr)))
> >  #define page_to_virt(page) (pfn_to_virt(page_to_pfn(page)))
> > +#else
> > +#define virt_to_page(vaddr)((struct page *)u64)vaddr -
> > va_pa_offset) / PAGE_SIZE) * sizeof(struct page) + VMEMMAP_START))
> > +#define page_to_virt(pg)   ((void *)(u64)pg - VMEMMAP_START) /
> > sizeof(struct page)) * PAGE_SIZE) + va_pa_offset))
> > +#endif
>
> This doesn't make sense to me at all. It should always use pfn_to_page()
> for virt_to_page() and the generic pfn_to_page()/page_to_pfn()
> implementations essentially already do what you are doing in a cleaner
> way. So I'd be really surprised if this does anything at all.
>

Thank you for point me out that. I just checked the generic
implementation and I should use that one.
Sorry I didn't check the generic one and just implement it again.
I think the only patch we need is the first part to use generic
pfn_valid(). I just tested it and yes it can boot successfully in dts
with hole.

It will fail in this check ((pfn)-pfn_base) < max_mapnr.
In my test case it will be
((6GB>>PAGE_SHIFT)-(2GB>>PAGE_SHIFT)=(4GB>>PAGE_SHIFT) <
(3GB>>PAGE_SHIFT) to return false.
 memory@8000 {
 device_type = "memory";
 reg = <0x0 0x8000 0x0 0x8000>;
 };
 memory@18000 {
 device_type = "memory";
 reg = <0x1 0x8000 0x0 0x4000>;
 };

To cause the check here failed to initialize page struct.

for (pfn = start_pfn; pfn < end_pfn; pfn++) {
/*
 * There can be holes in boot-time mem_map[]s handed to this
 * function.  They do not exist on hotplugged memory.
 */
if (context == MEMMAP_EARLY) {
if (!early_pfn_valid(pfn))
continue;
if (!early_pfn_in_nid(pfn, nid))
continue;
if (overlap_memmap_init(zone, ))
continue;
if (defer_init(nid, pfn, end_pfn))
break;
}

page = pfn_to_page(pfn);
__init_single_page(page, pfn, zone, nid);


--

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 3f12b069af1d..208b3e14ccd8 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -116,7 +116,8 @@ config PGTABLE_LEVELS
default 2

 config HAVE_ARCH_PFN_VALID
-   def_bool y
+   bool
+   default !SPARSEMEM_VMEMMAP

 menu "Platform type"

diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 8ddb6c7fedac..80d28fa1e2eb 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -100,8 +100,10 @@ extern unsigned long min_low_pfn;
 #define page_to_bus(page)  (page_to_phys(page))
 #define phys_to_page(paddr)(pfn_to_page(phys_to_pfn(paddr)))

+#if !defined(CONFIG_SPARSEMEM_VMEMMAP)
 #define pfn_valid(pfn) \
(((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
+#endif

 #define ARCH_PFN_OFFSET(pfn_base)


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-08-09 Thread Logan Gunthorpe



On 2019-08-08 10:23 p.m., Greentime Hu wrote:
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 3f12b069af1d..208b3e14ccd8 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -116,7 +116,8 @@ config PGTABLE_LEVELS
>         default 2
> 
>  config HAVE_ARCH_PFN_VALID
> -       def_bool y
> +       bool
> +       default !SPARSEMEM_VMEMMAP
> 
>  menu "Platform type"
> 
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 8ddb6c7fedac..6991f7a5a4a7 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -93,16 +93,20 @@ extern unsigned long min_low_pfn;
>  #define virt_to_pfn(vaddr)     (phys_to_pfn(__pa(vaddr)))
>  #define pfn_to_virt(pfn)       (__va(pfn_to_phys(pfn)))
> 
> +#if !defined(CONFIG_SPARSEMEM_VMEMMAP)
> +#define pfn_valid(pfn) \
> +       (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
>  #define virt_to_page(vaddr)    (pfn_to_page(virt_to_pfn(vaddr)))
>  #define page_to_virt(page)     (pfn_to_virt(page_to_pfn(page)))
> +#else
> +#define virt_to_page(vaddr)    ((struct page *)u64)vaddr -
> va_pa_offset) / PAGE_SIZE) * sizeof(struct page) + VMEMMAP_START))
> +#define page_to_virt(pg)       ((void *)(u64)pg - VMEMMAP_START) /
> sizeof(struct page)) * PAGE_SIZE) + va_pa_offset))
> +#endif

This doesn't make sense to me at all. It should always use pfn_to_page()
for virt_to_page() and the generic pfn_to_page()/page_to_pfn()
implementations essentially already do what you are doing in a cleaner
way. So I'd be really surprised if this does anything at all.

Logan


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-07-31 Thread Greentime Hu
Hi Logan,

Logan Gunthorpe  於 2019年8月1日 週四 上午1:08寫道:
>
>
>
> On 2019-07-31 12:30 a.m., Greentime Hu wrote:
> > I look this issue more closely.
> > I found it always sets each memblock region to node 0. Does this make sense?
> > I am not sure if I understand this correctly. Do you have any idea for
> > this? Thank you. :)
>
> Yes, I think this is normal. When we talk about memory nodes we're
> talking about NUMA nodes which is unrelated to device tree nodes.

Ok, but it seems the second memblock_region may overwrite the first
memblock_region in for_each_memblock(memory, reg)  loop. It always
uses this API to set to node 0.
memblock_set_node(PFN_PHYS(start_pfn),PFN_PHYS(end_pfn - start_pfn),
,0)


> I'm not really sure what's causing the crash. Have you verified it's
> this patch that causes it? Is it related to there being a hole in your
> memory, does it work if you only have one memory node?
>

It works fine if there is only one memory node described in dts.

I think it is related to there being a hole in the device tree script.
I don't actually have a platform with a hole in the memory region, so
I use device tree script to describe it.

The physical address layout will be like this.
2GB-3GB-hole-6GB-7GB

memory@8000 {
device_type = "memory";
reg = <0x0 0x8000 0x0 0x4000>;
};
memory@18000 {
device_type = "memory";
reg = <0x1 0x8000 0x0 0x4000>;
};

Thank you for the quick reply. :)


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-07-31 Thread Logan Gunthorpe



On 2019-07-31 12:30 a.m., Greentime Hu wrote:
> I look this issue more closely.
> I found it always sets each memblock region to node 0. Does this make sense?
> I am not sure if I understand this correctly. Do you have any idea for
> this? Thank you. :)

Yes, I think this is normal. When we talk about memory nodes we're
talking about NUMA nodes which is unrelated to device tree nodes.

I'm not really sure what's causing the crash. Have you verified it's
this patch that causes it? Is it related to there being a hole in your
memory, does it work if you only have one memory node?

Thanks,

Logan


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-07-31 Thread Greentime Hu
Hi Logan,

Logan Gunthorpe  於 2019年1月10日 週四 上午5:07寫道:
>
> This patch implements sparsemem support for risc-v which helps pave the
> way for memory hotplug and eventually P2P support.
>
> We introduce Kconfig options for virtual and physical address bits which
> are used to calculate the size of the vmemmap and set the
> MAX_PHYSMEM_BITS.
>
> The vmemmap is located directly before the VMALLOC region and sized
> such that we can allocate enough pages to populate all the virtual
> address space in the system (similar to the way it's done in arm64).
>
> During initialization, call memblocks_present() and sparse_init(),
> and provide a stub for vmemmap_populate() (all of which is similar to
> arm64).
>
> Signed-off-by: Logan Gunthorpe 
> Reviewed-by: Palmer Dabbelt 
> Cc: Albert Ou 
> Cc: Andrew Waterman 
> Cc: Olof Johansson 
> Cc: Michael Clark 
> Cc: Rob Herring 
> Cc: Zong Li 
> ---
>  arch/riscv/Kconfig | 23 +++
>  arch/riscv/include/asm/pgtable.h   | 21 +
>  arch/riscv/include/asm/sparsemem.h | 11 +++
>  arch/riscv/kernel/setup.c  |  4 +++-
>  arch/riscv/mm/init.c   |  8 
>  5 files changed, 62 insertions(+), 5 deletions(-)
>  create mode 100644 arch/riscv/include/asm/sparsemem.h
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index e0d7d61779a6..bd659327bc6b 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -54,12 +54,32 @@ config ZONE_DMA32
> bool
> default y if 64BIT
>
> +config VA_BITS
> +   int
> +   default 32 if 32BIT
> +   default 39 if 64BIT
> +
> +config PA_BITS
> +   int
> +   default 34 if 32BIT
> +   default 56 if 64BIT
> +
>  config PAGE_OFFSET
> hex
> default 0xC000 if 32BIT && MAXPHYSMEM_2GB
> default 0x8000 if 64BIT && MAXPHYSMEM_2GB
> default 0xffe0 if 64BIT && MAXPHYSMEM_128GB
>
> +config ARCH_FLATMEM_ENABLE
> +   def_bool y
> +
> +config ARCH_SPARSEMEM_ENABLE
> +   def_bool y
> +   select SPARSEMEM_VMEMMAP_ENABLE
> +
> +config ARCH_SELECT_MEMORY_MODEL
> +   def_bool ARCH_SPARSEMEM_ENABLE
> +
>  config STACKTRACE_SUPPORT
> def_bool y
>
> @@ -94,6 +114,9 @@ config PGTABLE_LEVELS
>  config HAVE_KPROBES
> def_bool n
>
> +config HAVE_ARCH_PFN_VALID
> +   def_bool y
> +
>  menu "Platform type"
>
>  choice
> diff --git a/arch/riscv/include/asm/pgtable.h 
> b/arch/riscv/include/asm/pgtable.h
> index 16301966d65b..e1162336f5ea 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -89,6 +89,23 @@ extern pgd_t swapper_pg_dir[];
>  #define __S110 PAGE_SHARED_EXEC
>  #define __S111 PAGE_SHARED_EXEC
>
> +#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
> +#define VMALLOC_END  (PAGE_OFFSET - 1)
> +#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
> +
> +/*
> + * Roughly size the vmemmap space to be large enough to fit enough
> + * struct pages to map half the virtual address space. Then
> + * position vmemmap directly below the VMALLOC region.
> + */
> +#define VMEMMAP_SHIFT \
> +   (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
> +#define VMEMMAP_SIZE   (1UL << VMEMMAP_SHIFT)
> +#define VMEMMAP_END(VMALLOC_START - 1)
> +#define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
> +
> +#define vmemmap((struct page *)VMEMMAP_START)
> +
>  /*
>   * ZERO_PAGE is a global shared page that is always zero,
>   * used for zero-mapped memory areas, etc.
> @@ -411,10 +428,6 @@ static inline void pgtable_cache_init(void)
> /* No page table caches to initialize */
>  }
>
> -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
> -#define VMALLOC_END  (PAGE_OFFSET - 1)
> -#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
> -
>  /*
>   * Task size is 0x400 for RV64 or 0xb80 for RV32.
>   * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
> diff --git a/arch/riscv/include/asm/sparsemem.h 
> b/arch/riscv/include/asm/sparsemem.h
> new file mode 100644
> index ..b58ba2d9ed6e
> --- /dev/null
> +++ b/arch/riscv/include/asm/sparsemem.h
> @@ -0,0 +1,11 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_SPARSEMEM_H
> +#define __ASM_SPARSEMEM_H
> +
> +#ifdef CONFIG_SPARSEMEM
> +#define MAX_PHYSMEM_BITS   CONFIG_PA_BITS
> +#define SECTION_SIZE_BITS  27
> +#endif /* CONFIG_SPARSEMEM */
> +
> +#endif /* __ASM_SPARSEMEM_H */
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index fc8006a042eb..98f39adefb1a 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -193,6 +193,9 @@ static void __init setup_bootmem(void)
>   PFN_PHYS(end_pfn - start_pfn),
>   , 0);
> }
> +
> +   memblocks_present();
> +   sparse_init();
>  }

I just applied this patch to Linux kernel 5.2.
I used a dts with 2 memory nodes with hole int it.


Re: [PATCH v4 2/2] RISC-V: Implement sparsemem

2019-01-15 Thread Christoph Hellwig
Looks fine,

Reviewed-by: Christoph Hellwig 


[PATCH v4 2/2] RISC-V: Implement sparsemem

2019-01-09 Thread Logan Gunthorpe
This patch implements sparsemem support for risc-v which helps pave the
way for memory hotplug and eventually P2P support.

We introduce Kconfig options for virtual and physical address bits which
are used to calculate the size of the vmemmap and set the
MAX_PHYSMEM_BITS.

The vmemmap is located directly before the VMALLOC region and sized
such that we can allocate enough pages to populate all the virtual
address space in the system (similar to the way it's done in arm64).

During initialization, call memblocks_present() and sparse_init(),
and provide a stub for vmemmap_populate() (all of which is similar to
arm64).

Signed-off-by: Logan Gunthorpe 
Reviewed-by: Palmer Dabbelt 
Cc: Albert Ou 
Cc: Andrew Waterman 
Cc: Olof Johansson 
Cc: Michael Clark 
Cc: Rob Herring 
Cc: Zong Li 
---
 arch/riscv/Kconfig | 23 +++
 arch/riscv/include/asm/pgtable.h   | 21 +
 arch/riscv/include/asm/sparsemem.h | 11 +++
 arch/riscv/kernel/setup.c  |  4 +++-
 arch/riscv/mm/init.c   |  8 
 5 files changed, 62 insertions(+), 5 deletions(-)
 create mode 100644 arch/riscv/include/asm/sparsemem.h

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index e0d7d61779a6..bd659327bc6b 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -54,12 +54,32 @@ config ZONE_DMA32
bool
default y if 64BIT
 
+config VA_BITS
+   int
+   default 32 if 32BIT
+   default 39 if 64BIT
+
+config PA_BITS
+   int
+   default 34 if 32BIT
+   default 56 if 64BIT
+
 config PAGE_OFFSET
hex
default 0xC000 if 32BIT && MAXPHYSMEM_2GB
default 0x8000 if 64BIT && MAXPHYSMEM_2GB
default 0xffe0 if 64BIT && MAXPHYSMEM_128GB
 
+config ARCH_FLATMEM_ENABLE
+   def_bool y
+
+config ARCH_SPARSEMEM_ENABLE
+   def_bool y
+   select SPARSEMEM_VMEMMAP_ENABLE
+
+config ARCH_SELECT_MEMORY_MODEL
+   def_bool ARCH_SPARSEMEM_ENABLE
+
 config STACKTRACE_SUPPORT
def_bool y
 
@@ -94,6 +114,9 @@ config PGTABLE_LEVELS
 config HAVE_KPROBES
def_bool n
 
+config HAVE_ARCH_PFN_VALID
+   def_bool y
+
 menu "Platform type"
 
 choice
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 16301966d65b..e1162336f5ea 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -89,6 +89,23 @@ extern pgd_t swapper_pg_dir[];
 #define __S110 PAGE_SHARED_EXEC
 #define __S111 PAGE_SHARED_EXEC
 
+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END  (PAGE_OFFSET - 1)
+#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
+
+/*
+ * Roughly size the vmemmap space to be large enough to fit enough
+ * struct pages to map half the virtual address space. Then
+ * position vmemmap directly below the VMALLOC region.
+ */
+#define VMEMMAP_SHIFT \
+   (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
+#define VMEMMAP_SIZE   (1UL << VMEMMAP_SHIFT)
+#define VMEMMAP_END(VMALLOC_START - 1)
+#define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
+
+#define vmemmap((struct page *)VMEMMAP_START)
+
 /*
  * ZERO_PAGE is a global shared page that is always zero,
  * used for zero-mapped memory areas, etc.
@@ -411,10 +428,6 @@ static inline void pgtable_cache_init(void)
/* No page table caches to initialize */
 }
 
-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END  (PAGE_OFFSET - 1)
-#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
-
 /*
  * Task size is 0x400 for RV64 or 0xb80 for RV32.
  * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
diff --git a/arch/riscv/include/asm/sparsemem.h 
b/arch/riscv/include/asm/sparsemem.h
new file mode 100644
index ..b58ba2d9ed6e
--- /dev/null
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_SPARSEMEM_H
+#define __ASM_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+#define MAX_PHYSMEM_BITS   CONFIG_PA_BITS
+#define SECTION_SIZE_BITS  27
+#endif /* CONFIG_SPARSEMEM */
+
+#endif /* __ASM_SPARSEMEM_H */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index fc8006a042eb..98f39adefb1a 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -193,6 +193,9 @@ static void __init setup_bootmem(void)
  PFN_PHYS(end_pfn - start_pfn),
  , 0);
}
+
+   memblocks_present();
+   sparse_init();
 }
 
 void __init setup_arch(char **cmdline_p)
@@ -224,4 +227,3 @@ void __init setup_arch(char **cmdline_p)
 
riscv_fill_hwcap();
 }
-
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 1d9bfaff60bc..09568d5bc5b8 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -69,3 +69,11 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 {
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
+
+#ifdef CONFIG_SPARSEMEM