Re: [PATCH v9 01/12] mm/sparsemem: Introduce struct mem_section_usage

2019-06-18 Thread Wei Yang
On Tue, Jun 18, 2019 at 02:56:09PM -0700, Dan Williams wrote:
>On Sun, Jun 16, 2019 at 6:11 AM Wei Yang  wrote:
>>
>> On Wed, Jun 05, 2019 at 02:57:54PM -0700, Dan Williams wrote:
>> >Towards enabling memory hotplug to track partial population of a
>> >section, introduce 'struct mem_section_usage'.
>> >
>> >A pointer to a 'struct mem_section_usage' instance replaces the existing
>> >pointer to a 'pageblock_flags' bitmap. Effectively it adds one more
>> >'unsigned long' beyond the 'pageblock_flags' (usemap) allocation to
>> >house a new 'subsection_map' bitmap.  The new bitmap enables the memory
>> >hot{plug,remove} implementation to act on incremental sub-divisions of a
>> >section.
>> >
>> >The default SUBSECTION_SHIFT is chosen to keep the 'subsection_map' no
>> >larger than a single 'unsigned long' on the major architectures.
>> >Alternatively an architecture can define ARCH_SUBSECTION_SHIFT to
>> >override the default PMD_SHIFT. Note that PowerPC needs to use
>> >ARCH_SUBSECTION_SHIFT to workaround PMD_SHIFT being a non-constant
>> >expression on PowerPC.
>> >
>> >The primary motivation for this functionality is to support platforms
>> >that mix "System RAM" and "Persistent Memory" within a single section,
>> >or multiple PMEM ranges with different mapping lifetimes within a single
>> >section. The section restriction for hotplug has caused an ongoing saga
>> >of hacks and bugs for devm_memremap_pages() users.
>> >
>> >Beyond the fixups to teach existing paths how to retrieve the 'usemap'
>> >from a section, and updates to usemap allocation path, there are no
>> >expected behavior changes.
>> >
>> >Cc: Michal Hocko 
>> >Cc: Vlastimil Babka 
>> >Cc: Logan Gunthorpe 
>> >Cc: Oscar Salvador 
>> >Cc: Pavel Tatashin 
>> >Cc: Benjamin Herrenschmidt 
>> >Cc: Paul Mackerras 
>> >Cc: Michael Ellerman 
>> >Signed-off-by: Dan Williams 
>> >---
>> > arch/powerpc/include/asm/sparsemem.h |3 +
>> > include/linux/mmzone.h   |   48 +++-
>> > mm/memory_hotplug.c  |   18 
>> > mm/page_alloc.c  |2 -
>> > mm/sparse.c  |   81 
>> > +-
>> > 5 files changed, 99 insertions(+), 53 deletions(-)
>> >
>> >diff --git a/arch/powerpc/include/asm/sparsemem.h 
>> >b/arch/powerpc/include/asm/sparsemem.h
>> >index 3192d454a733..1aa3c9303bf8 100644
>> >--- a/arch/powerpc/include/asm/sparsemem.h
>> >+++ b/arch/powerpc/include/asm/sparsemem.h
>> >@@ -10,6 +10,9 @@
>> >  */
>> > #define SECTION_SIZE_BITS   24
>> >
>> >+/* Reflect the largest possible PMD-size as the subsection-size constant */
>> >+#define ARCH_SUBSECTION_SHIFT 24
>> >+
>> > #endif /* CONFIG_SPARSEMEM */
>> >
>> > #ifdef CONFIG_MEMORY_HOTPLUG
>> >diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>> >index 427b79c39b3c..ac163f2f274f 100644
>> >--- a/include/linux/mmzone.h
>> >+++ b/include/linux/mmzone.h
>> >@@ -1161,6 +1161,44 @@ static inline unsigned long 
>> >section_nr_to_pfn(unsigned long sec)
>> > #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & 
>> > PAGE_SECTION_MASK)
>> > #define SECTION_ALIGN_DOWN(pfn)   ((pfn) & PAGE_SECTION_MASK)
>> >
>> >+/*
>> >+ * SUBSECTION_SHIFT must be constant since it is used to declare
>> >+ * subsection_map and related bitmaps without triggering the generation
>> >+ * of variable-length arrays. The most natural size for a subsection is
>> >+ * a PMD-page. For architectures that do not have a constant PMD-size
>> >+ * ARCH_SUBSECTION_SHIFT can be set to a constant max size, or otherwise
>> >+ * fallback to 2MB.
>> >+ */
>> >+#if defined(ARCH_SUBSECTION_SHIFT)
>> >+#define SUBSECTION_SHIFT (ARCH_SUBSECTION_SHIFT)
>> >+#elif defined(PMD_SHIFT)
>> >+#define SUBSECTION_SHIFT (PMD_SHIFT)
>> >+#else
>> >+/*
>> >+ * Memory hotplug enabled platforms avoid this default because they
>> >+ * either define ARCH_SUBSECTION_SHIFT, or PMD_SHIFT is a constant, but
>> >+ * this is kept as a backstop to allow compilation on
>> >+ * !ARCH_ENABLE_MEMORY_HOTPLUG archs.
>> >+ */
>> >+#define SUBSECTION_SHIFT 21
>> >+#endif
>> >+
>> >+#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
>> >+#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
>> >+#define PAGE_SUBSECTION_MASK ((~(PAGES_PER_SUBSECTION-1)))
>>
>> One pair of brackets could be removed, IMHO.
>
>Sure.
>
>>
>> >+
>> >+#if SUBSECTION_SHIFT > SECTION_SIZE_BITS
>> >+#error Subsection size exceeds section size
>> >+#else
>> >+#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - 
>> >SUBSECTION_SHIFT))
>> >+#endif
>> >+
>> >+struct mem_section_usage {
>> >+  DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
>> >+  /* See declaration of similar field in struct zone */
>> >+  unsigned long pageblock_flags[0];
>> >+};
>> >+
>> > struct page;
>> > struct page_ext;
>> > struct mem_section {
>> >@@ -1178,8 +1216,7 @@ struct mem_section {
>> >*/
>> >   unsigned long section_mem_map;
>> >

Re: [PATCH v9 01/12] mm/sparsemem: Introduce struct mem_section_usage

2019-06-18 Thread Dan Williams
On Sun, Jun 16, 2019 at 6:11 AM Wei Yang  wrote:
>
> On Wed, Jun 05, 2019 at 02:57:54PM -0700, Dan Williams wrote:
> >Towards enabling memory hotplug to track partial population of a
> >section, introduce 'struct mem_section_usage'.
> >
> >A pointer to a 'struct mem_section_usage' instance replaces the existing
> >pointer to a 'pageblock_flags' bitmap. Effectively it adds one more
> >'unsigned long' beyond the 'pageblock_flags' (usemap) allocation to
> >house a new 'subsection_map' bitmap.  The new bitmap enables the memory
> >hot{plug,remove} implementation to act on incremental sub-divisions of a
> >section.
> >
> >The default SUBSECTION_SHIFT is chosen to keep the 'subsection_map' no
> >larger than a single 'unsigned long' on the major architectures.
> >Alternatively an architecture can define ARCH_SUBSECTION_SHIFT to
> >override the default PMD_SHIFT. Note that PowerPC needs to use
> >ARCH_SUBSECTION_SHIFT to workaround PMD_SHIFT being a non-constant
> >expression on PowerPC.
> >
> >The primary motivation for this functionality is to support platforms
> >that mix "System RAM" and "Persistent Memory" within a single section,
> >or multiple PMEM ranges with different mapping lifetimes within a single
> >section. The section restriction for hotplug has caused an ongoing saga
> >of hacks and bugs for devm_memremap_pages() users.
> >
> >Beyond the fixups to teach existing paths how to retrieve the 'usemap'
> >from a section, and updates to usemap allocation path, there are no
> >expected behavior changes.
> >
> >Cc: Michal Hocko 
> >Cc: Vlastimil Babka 
> >Cc: Logan Gunthorpe 
> >Cc: Oscar Salvador 
> >Cc: Pavel Tatashin 
> >Cc: Benjamin Herrenschmidt 
> >Cc: Paul Mackerras 
> >Cc: Michael Ellerman 
> >Signed-off-by: Dan Williams 
> >---
> > arch/powerpc/include/asm/sparsemem.h |3 +
> > include/linux/mmzone.h   |   48 +++-
> > mm/memory_hotplug.c  |   18 
> > mm/page_alloc.c  |2 -
> > mm/sparse.c  |   81 
> > +-
> > 5 files changed, 99 insertions(+), 53 deletions(-)
> >
> >diff --git a/arch/powerpc/include/asm/sparsemem.h 
> >b/arch/powerpc/include/asm/sparsemem.h
> >index 3192d454a733..1aa3c9303bf8 100644
> >--- a/arch/powerpc/include/asm/sparsemem.h
> >+++ b/arch/powerpc/include/asm/sparsemem.h
> >@@ -10,6 +10,9 @@
> >  */
> > #define SECTION_SIZE_BITS   24
> >
> >+/* Reflect the largest possible PMD-size as the subsection-size constant */
> >+#define ARCH_SUBSECTION_SHIFT 24
> >+
> > #endif /* CONFIG_SPARSEMEM */
> >
> > #ifdef CONFIG_MEMORY_HOTPLUG
> >diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> >index 427b79c39b3c..ac163f2f274f 100644
> >--- a/include/linux/mmzone.h
> >+++ b/include/linux/mmzone.h
> >@@ -1161,6 +1161,44 @@ static inline unsigned long 
> >section_nr_to_pfn(unsigned long sec)
> > #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & 
> > PAGE_SECTION_MASK)
> > #define SECTION_ALIGN_DOWN(pfn)   ((pfn) & PAGE_SECTION_MASK)
> >
> >+/*
> >+ * SUBSECTION_SHIFT must be constant since it is used to declare
> >+ * subsection_map and related bitmaps without triggering the generation
> >+ * of variable-length arrays. The most natural size for a subsection is
> >+ * a PMD-page. For architectures that do not have a constant PMD-size
> >+ * ARCH_SUBSECTION_SHIFT can be set to a constant max size, or otherwise
> >+ * fallback to 2MB.
> >+ */
> >+#if defined(ARCH_SUBSECTION_SHIFT)
> >+#define SUBSECTION_SHIFT (ARCH_SUBSECTION_SHIFT)
> >+#elif defined(PMD_SHIFT)
> >+#define SUBSECTION_SHIFT (PMD_SHIFT)
> >+#else
> >+/*
> >+ * Memory hotplug enabled platforms avoid this default because they
> >+ * either define ARCH_SUBSECTION_SHIFT, or PMD_SHIFT is a constant, but
> >+ * this is kept as a backstop to allow compilation on
> >+ * !ARCH_ENABLE_MEMORY_HOTPLUG archs.
> >+ */
> >+#define SUBSECTION_SHIFT 21
> >+#endif
> >+
> >+#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
> >+#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
> >+#define PAGE_SUBSECTION_MASK ((~(PAGES_PER_SUBSECTION-1)))
>
> One pair of brackets could be removed, IMHO.

Sure.

>
> >+
> >+#if SUBSECTION_SHIFT > SECTION_SIZE_BITS
> >+#error Subsection size exceeds section size
> >+#else
> >+#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - 
> >SUBSECTION_SHIFT))
> >+#endif
> >+
> >+struct mem_section_usage {
> >+  DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
> >+  /* See declaration of similar field in struct zone */
> >+  unsigned long pageblock_flags[0];
> >+};
> >+
> > struct page;
> > struct page_ext;
> > struct mem_section {
> >@@ -1178,8 +1216,7 @@ struct mem_section {
> >*/
> >   unsigned long section_mem_map;
> >
> >-  /* See declaration of similar field in struct zone */
> >-  unsigned long *pageblock_flags;
> >+  struct mem_section_usage *usage;
> > #ifdef CONFIG_PAGE_EXTENSION
> >   

Re: [PATCH v9 01/12] mm/sparsemem: Introduce struct mem_section_usage

2019-06-16 Thread Wei Yang
On Wed, Jun 05, 2019 at 02:57:54PM -0700, Dan Williams wrote:
>Towards enabling memory hotplug to track partial population of a
>section, introduce 'struct mem_section_usage'.
>
>A pointer to a 'struct mem_section_usage' instance replaces the existing
>pointer to a 'pageblock_flags' bitmap. Effectively it adds one more
>'unsigned long' beyond the 'pageblock_flags' (usemap) allocation to
>house a new 'subsection_map' bitmap.  The new bitmap enables the memory
>hot{plug,remove} implementation to act on incremental sub-divisions of a
>section.
>
>The default SUBSECTION_SHIFT is chosen to keep the 'subsection_map' no
>larger than a single 'unsigned long' on the major architectures.
>Alternatively an architecture can define ARCH_SUBSECTION_SHIFT to
>override the default PMD_SHIFT. Note that PowerPC needs to use
>ARCH_SUBSECTION_SHIFT to workaround PMD_SHIFT being a non-constant
>expression on PowerPC.
>
>The primary motivation for this functionality is to support platforms
>that mix "System RAM" and "Persistent Memory" within a single section,
>or multiple PMEM ranges with different mapping lifetimes within a single
>section. The section restriction for hotplug has caused an ongoing saga
>of hacks and bugs for devm_memremap_pages() users.
>
>Beyond the fixups to teach existing paths how to retrieve the 'usemap'
>from a section, and updates to usemap allocation path, there are no
>expected behavior changes.
>
>Cc: Michal Hocko 
>Cc: Vlastimil Babka 
>Cc: Logan Gunthorpe 
>Cc: Oscar Salvador 
>Cc: Pavel Tatashin 
>Cc: Benjamin Herrenschmidt 
>Cc: Paul Mackerras 
>Cc: Michael Ellerman 
>Signed-off-by: Dan Williams 
>---
> arch/powerpc/include/asm/sparsemem.h |3 +
> include/linux/mmzone.h   |   48 +++-
> mm/memory_hotplug.c  |   18 
> mm/page_alloc.c  |2 -
> mm/sparse.c  |   81 +-
> 5 files changed, 99 insertions(+), 53 deletions(-)
>
>diff --git a/arch/powerpc/include/asm/sparsemem.h 
>b/arch/powerpc/include/asm/sparsemem.h
>index 3192d454a733..1aa3c9303bf8 100644
>--- a/arch/powerpc/include/asm/sparsemem.h
>+++ b/arch/powerpc/include/asm/sparsemem.h
>@@ -10,6 +10,9 @@
>  */
> #define SECTION_SIZE_BITS   24
> 
>+/* Reflect the largest possible PMD-size as the subsection-size constant */
>+#define ARCH_SUBSECTION_SHIFT 24
>+
> #endif /* CONFIG_SPARSEMEM */
> 
> #ifdef CONFIG_MEMORY_HOTPLUG
>diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>index 427b79c39b3c..ac163f2f274f 100644
>--- a/include/linux/mmzone.h
>+++ b/include/linux/mmzone.h
>@@ -1161,6 +1161,44 @@ static inline unsigned long section_nr_to_pfn(unsigned 
>long sec)
> #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & 
> PAGE_SECTION_MASK)
> #define SECTION_ALIGN_DOWN(pfn)   ((pfn) & PAGE_SECTION_MASK)
> 
>+/*
>+ * SUBSECTION_SHIFT must be constant since it is used to declare
>+ * subsection_map and related bitmaps without triggering the generation
>+ * of variable-length arrays. The most natural size for a subsection is
>+ * a PMD-page. For architectures that do not have a constant PMD-size
>+ * ARCH_SUBSECTION_SHIFT can be set to a constant max size, or otherwise
>+ * fallback to 2MB.
>+ */
>+#if defined(ARCH_SUBSECTION_SHIFT)
>+#define SUBSECTION_SHIFT (ARCH_SUBSECTION_SHIFT)
>+#elif defined(PMD_SHIFT)
>+#define SUBSECTION_SHIFT (PMD_SHIFT)
>+#else
>+/*
>+ * Memory hotplug enabled platforms avoid this default because they
>+ * either define ARCH_SUBSECTION_SHIFT, or PMD_SHIFT is a constant, but
>+ * this is kept as a backstop to allow compilation on
>+ * !ARCH_ENABLE_MEMORY_HOTPLUG archs.
>+ */
>+#define SUBSECTION_SHIFT 21
>+#endif
>+
>+#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
>+#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
>+#define PAGE_SUBSECTION_MASK ((~(PAGES_PER_SUBSECTION-1)))

One pair of brackets could be removed, IMHO.

>+
>+#if SUBSECTION_SHIFT > SECTION_SIZE_BITS
>+#error Subsection size exceeds section size
>+#else
>+#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - 
>SUBSECTION_SHIFT))
>+#endif
>+
>+struct mem_section_usage {
>+  DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
>+  /* See declaration of similar field in struct zone */
>+  unsigned long pageblock_flags[0];
>+};
>+
> struct page;
> struct page_ext;
> struct mem_section {
>@@ -1178,8 +1216,7 @@ struct mem_section {
>*/
>   unsigned long section_mem_map;
> 
>-  /* See declaration of similar field in struct zone */
>-  unsigned long *pageblock_flags;
>+  struct mem_section_usage *usage;
> #ifdef CONFIG_PAGE_EXTENSION
>   /*
>* If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
>@@ -1210,6 +1247,11 @@ extern struct mem_section **mem_section;
> extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
> #endif
> 
>+static inline unsigned long *section_to_usemap(struct mem_section *ms)

Re: [PATCH v9 01/12] mm/sparsemem: Introduce struct mem_section_usage

2019-06-06 Thread Oscar Salvador
On Wed, Jun 05, 2019 at 02:57:54PM -0700, Dan Williams wrote:
> Towards enabling memory hotplug to track partial population of a
> section, introduce 'struct mem_section_usage'.
> 
> A pointer to a 'struct mem_section_usage' instance replaces the existing
> pointer to a 'pageblock_flags' bitmap. Effectively it adds one more
> 'unsigned long' beyond the 'pageblock_flags' (usemap) allocation to
> house a new 'subsection_map' bitmap.  The new bitmap enables the memory
> hot{plug,remove} implementation to act on incremental sub-divisions of a
> section.
> 
> The default SUBSECTION_SHIFT is chosen to keep the 'subsection_map' no
> larger than a single 'unsigned long' on the major architectures.
> Alternatively an architecture can define ARCH_SUBSECTION_SHIFT to
> override the default PMD_SHIFT. Note that PowerPC needs to use
> ARCH_SUBSECTION_SHIFT to workaround PMD_SHIFT being a non-constant
> expression on PowerPC.
> 
> The primary motivation for this functionality is to support platforms
> that mix "System RAM" and "Persistent Memory" within a single section,
> or multiple PMEM ranges with different mapping lifetimes within a single
> section. The section restriction for hotplug has caused an ongoing saga
> of hacks and bugs for devm_memremap_pages() users.
> 
> Beyond the fixups to teach existing paths how to retrieve the 'usemap'
> from a section, and updates to usemap allocation path, there are no
> expected behavior changes.
> 
> Cc: Michal Hocko 
> Cc: Vlastimil Babka 
> Cc: Logan Gunthorpe 
> Cc: Oscar Salvador 
> Cc: Pavel Tatashin 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Signed-off-by: Dan Williams 

Reviewed-by: Oscar Salvador 

-- 
Oscar Salvador
SUSE L3
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[PATCH v9 01/12] mm/sparsemem: Introduce struct mem_section_usage

2019-06-05 Thread Dan Williams
Towards enabling memory hotplug to track partial population of a
section, introduce 'struct mem_section_usage'.

A pointer to a 'struct mem_section_usage' instance replaces the existing
pointer to a 'pageblock_flags' bitmap. Effectively it adds one more
'unsigned long' beyond the 'pageblock_flags' (usemap) allocation to
house a new 'subsection_map' bitmap.  The new bitmap enables the memory
hot{plug,remove} implementation to act on incremental sub-divisions of a
section.

The default SUBSECTION_SHIFT is chosen to keep the 'subsection_map' no
larger than a single 'unsigned long' on the major architectures.
Alternatively an architecture can define ARCH_SUBSECTION_SHIFT to
override the default PMD_SHIFT. Note that PowerPC needs to use
ARCH_SUBSECTION_SHIFT to workaround PMD_SHIFT being a non-constant
expression on PowerPC.

The primary motivation for this functionality is to support platforms
that mix "System RAM" and "Persistent Memory" within a single section,
or multiple PMEM ranges with different mapping lifetimes within a single
section. The section restriction for hotplug has caused an ongoing saga
of hacks and bugs for devm_memremap_pages() users.

Beyond the fixups to teach existing paths how to retrieve the 'usemap'
from a section, and updates to usemap allocation path, there are no
expected behavior changes.

Cc: Michal Hocko 
Cc: Vlastimil Babka 
Cc: Logan Gunthorpe 
Cc: Oscar Salvador 
Cc: Pavel Tatashin 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Signed-off-by: Dan Williams 
---
 arch/powerpc/include/asm/sparsemem.h |3 +
 include/linux/mmzone.h   |   48 +++-
 mm/memory_hotplug.c  |   18 
 mm/page_alloc.c  |2 -
 mm/sparse.c  |   81 +-
 5 files changed, 99 insertions(+), 53 deletions(-)

diff --git a/arch/powerpc/include/asm/sparsemem.h 
b/arch/powerpc/include/asm/sparsemem.h
index 3192d454a733..1aa3c9303bf8 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -10,6 +10,9 @@
  */
 #define SECTION_SIZE_BITS   24
 
+/* Reflect the largest possible PMD-size as the subsection-size constant */
+#define ARCH_SUBSECTION_SHIFT 24
+
 #endif /* CONFIG_SPARSEMEM */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 427b79c39b3c..ac163f2f274f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1161,6 +1161,44 @@ static inline unsigned long section_nr_to_pfn(unsigned 
long sec)
 #define SECTION_ALIGN_UP(pfn)  (((pfn) + PAGES_PER_SECTION - 1) & 
PAGE_SECTION_MASK)
 #define SECTION_ALIGN_DOWN(pfn)((pfn) & PAGE_SECTION_MASK)
 
+/*
+ * SUBSECTION_SHIFT must be constant since it is used to declare
+ * subsection_map and related bitmaps without triggering the generation
+ * of variable-length arrays. The most natural size for a subsection is
+ * a PMD-page. For architectures that do not have a constant PMD-size
+ * ARCH_SUBSECTION_SHIFT can be set to a constant max size, or otherwise
+ * fallback to 2MB.
+ */
+#if defined(ARCH_SUBSECTION_SHIFT)
+#define SUBSECTION_SHIFT (ARCH_SUBSECTION_SHIFT)
+#elif defined(PMD_SHIFT)
+#define SUBSECTION_SHIFT (PMD_SHIFT)
+#else
+/*
+ * Memory hotplug enabled platforms avoid this default because they
+ * either define ARCH_SUBSECTION_SHIFT, or PMD_SHIFT is a constant, but
+ * this is kept as a backstop to allow compilation on
+ * !ARCH_ENABLE_MEMORY_HOTPLUG archs.
+ */
+#define SUBSECTION_SHIFT 21
+#endif
+
+#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
+#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
+#define PAGE_SUBSECTION_MASK ((~(PAGES_PER_SUBSECTION-1)))
+
+#if SUBSECTION_SHIFT > SECTION_SIZE_BITS
+#error Subsection size exceeds section size
+#else
+#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT))
+#endif
+
+struct mem_section_usage {
+   DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
+   /* See declaration of similar field in struct zone */
+   unsigned long pageblock_flags[0];
+};
+
 struct page;
 struct page_ext;
 struct mem_section {
@@ -1178,8 +1216,7 @@ struct mem_section {
 */
unsigned long section_mem_map;
 
-   /* See declaration of similar field in struct zone */
-   unsigned long *pageblock_flags;
+   struct mem_section_usage *usage;
 #ifdef CONFIG_PAGE_EXTENSION
/*
 * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
@@ -1210,6 +1247,11 @@ extern struct mem_section **mem_section;
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
+static inline unsigned long *section_to_usemap(struct mem_section *ms)
+{
+   return ms->usage->pageblock_flags;
+}
+
 static inline struct mem_section *__nr_to_section(unsigned long nr)
 {
 #ifdef CONFIG_SPARSEMEM_EXTREME
@@ -1221,7 +1263,7 @@ static inline struct mem_section