Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=13808910713a98cc1159291e62cdfec92cc94d05
Commit:     13808910713a98cc1159291e62cdfec92cc94d05
Parent:     55144768e100b68447f44c5e5c9deb155ad661bd
Author:     Christoph Lameter <[EMAIL PROTECTED]>
AuthorDate: Tue Oct 16 01:25:27 2007 -0700
Committer:  Linus Torvalds <[EMAIL PROTECTED]>
CommitDate: Tue Oct 16 09:42:58 2007 -0700

    Memoryless nodes: Generic management of nodemasks for various purposes
    
    Why do we need to support memoryless nodes?
    
    KAMEZAWA Hiroyuki <[EMAIL PROTECTED]> wrote:
    
    > For fujitsu, problem is called "empty" node.
    >
    > When ACPI's SRAT table includes "possible nodes", ia64 
bootstrap(acpi_numa_init)
    > creates nodes, which includes no memory, no cpu.
    >
    > I tried to remove empty-node in past, but that was denied.
    > It was because we can hot-add cpu to the empty node.
    > (node-hotplug triggered by cpu is not implemented now. and it will be 
ugly.)
    >
    >
    > For HP, (Lee can comment on this later), they have memory-less-node.
    > As far as I hear, HP's machine can have following configration.
    >
    > (example)
    > Node0: CPU0   memory AAA MB
    > Node1: CPU1   memory AAA MB
    > Node2: CPU2   memory AAA MB
    > Node3: CPU3   memory AAA MB
    > Node4: Memory XXX GB
    >
    > AAA is very small value (below 16MB)  and will be omitted by ia64 
bootstrap.
    > After boot, only Node 4 has valid memory (but have no cpu.)
    >
    > Maybe this is memory-interleave by firmware config.
    
    Christoph Lameter <[EMAIL PROTECTED]> wrote:
    
    > Future SGI platforms (actually also current one can have but nothing like
    > that is deployed to my knowledge) have nodes with only cpus. Current SGI
    > platforms have nodes with just I/O that we so far cannot manage in the
    > core. So the arch code maps them to the nearest memory node.
    
    Lee Schermerhorn <[EMAIL PROTECTED]> wrote:
    
    > For the HP platforms, we can configure each cell with from 0% to 100%
    > "cell local memory".  When we configure with <100% CLM, the "missing
    > percentages" are interleaved by hardware on a cache-line granularity to
    > improve bandwidth at the expense of latency for numa-challenged
    > applications [and OSes, but not our problem ;-)].  When we boot Linux on
    > such a config, all of the real nodes have no memory--it all resides in a
    > single interleaved pseudo-node.
    >
    > When we boot Linux on a 100% CLM configuration [== NUMA], we still have
    > the interleaved pseudo-node.  It contains a few hundred MB stolen from
    > the real nodes to contain the DMA zone.  [Interleaved memory resides at
    > phys addr 0].  The memoryless-nodes patches, along with the zoneorder
    > patches, support this config as well.
    >
    > Also, when we boot a NUMA config with the "mem=" command line,
    > specifying less memory than actually exists, Linux takes the excluded
    > memory "off the top" rather than distributing it across the nodes.  This
    > can result in memoryless nodes, as well.
    >
    
    This patch:
    
    Preparation for memoryless node patches.
    
    Provide a generic way to keep nodemasks describing various characteristics 
of
    NUMA nodes.
    
    Remove the node_online_map and the node_possible map and realize the same
    functionality using two nodes stats: N_POSSIBLE and N_ONLINE.
    
    [EMAIL PROTECTED]: Initialize N_*_MEMORY and N_CPU masks for non-NUMA 
config]
    Signed-off-by: Christoph Lameter <[EMAIL PROTECTED]>
    Tested-by: Lee Schermerhorn <[EMAIL PROTECTED]>
    Acked-by: Lee Schermerhorn <[EMAIL PROTECTED]>
    Acked-by: Bob Picco <[EMAIL PROTECTED]>
    Cc: Nishanth Aravamudan <[EMAIL PROTECTED]>
    Cc: KAMEZAWA Hiroyuki <[EMAIL PROTECTED]>
    Cc: Mel Gorman <[EMAIL PROTECTED]>
    Signed-off-by: Lee Schermerhorn <[EMAIL PROTECTED]>
    Cc: "Serge E. Hallyn" <[EMAIL PROTECTED]>
    Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
    Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>
---
 include/linux/nodemask.h |   87 +++++++++++++++++++++++++++++++++++++--------
 mm/page_alloc.c          |   20 +++++++---
 2 files changed, 85 insertions(+), 22 deletions(-)

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 52c54a5..583e6b8 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -338,31 +338,81 @@ static inline void __nodes_remap(nodemask_t *dstp, const 
nodemask_t *srcp,
 #endif /* MAX_NUMNODES */
 
 /*
+ * Bitmasks that are kept for all the nodes.
+ */
+enum node_states {
+       N_POSSIBLE,     /* The node could become online at some point */
+       N_ONLINE,       /* The node is online */
+       NR_NODE_STATES
+};
+
+/*
  * The following particular system nodemasks and operations
  * on them manage all possible and online nodes.
  */
 
-extern nodemask_t node_online_map;
-extern nodemask_t node_possible_map;
+extern nodemask_t node_states[NR_NODE_STATES];
 
 #if MAX_NUMNODES > 1
-#define num_online_nodes()     nodes_weight(node_online_map)
-#define num_possible_nodes()   nodes_weight(node_possible_map)
-#define node_online(node)      node_isset((node), node_online_map)
-#define node_possible(node)    node_isset((node), node_possible_map)
-#define first_online_node      first_node(node_online_map)
-#define next_online_node(nid)  next_node((nid), node_online_map)
+static inline int node_state(int node, enum node_states state)
+{
+       return node_isset(node, node_states[state]);
+}
+
+static inline void node_set_state(int node, enum node_states state)
+{
+       __node_set(node, &node_states[state]);
+}
+
+static inline void node_clear_state(int node, enum node_states state)
+{
+       __node_clear(node, &node_states[state]);
+}
+
+static inline int num_node_state(enum node_states state)
+{
+       return nodes_weight(node_states[state]);
+}
+
+#define for_each_node_state(__node, __state) \
+       for_each_node_mask((__node), node_states[__state])
+
+#define first_online_node      first_node(node_states[N_ONLINE])
+#define next_online_node(nid)  next_node((nid), node_states[N_ONLINE])
+
 extern int nr_node_ids;
 #else
-#define num_online_nodes()     1
-#define num_possible_nodes()   1
-#define node_online(node)      ((node) == 0)
-#define node_possible(node)    ((node) == 0)
+
+static inline int node_state(int node, enum node_states state)
+{
+       return node == 0;
+}
+
+static inline void node_set_state(int node, enum node_states state)
+{
+}
+
+static inline void node_clear_state(int node, enum node_states state)
+{
+}
+
+static inline int num_node_state(enum node_states state)
+{
+       return 1;
+}
+
+#define for_each_node_state(node, __state) \
+       for ( (node) = 0; (node) == 0; (node) = 1)
+
 #define first_online_node      0
 #define next_online_node(nid)  (MAX_NUMNODES)
 #define nr_node_ids            1
+
 #endif
 
+#define node_online_map        node_states[N_ONLINE]
+#define node_possible_map      node_states[N_POSSIBLE]
+
 #define any_online_node(mask)                  \
 ({                                             \
        int node;                               \
@@ -372,10 +422,15 @@ extern int nr_node_ids;
        node;                                   \
 })
 
-#define node_set_online(node)     set_bit((node), node_online_map.bits)
-#define node_set_offline(node)    clear_bit((node), node_online_map.bits)
+#define num_online_nodes()     num_node_state(N_ONLINE)
+#define num_possible_nodes()   num_node_state(N_POSSIBLE)
+#define node_online(node)      node_state((node), N_ONLINE)
+#define node_possible(node)    node_state((node), N_POSSIBLE)
+
+#define node_set_online(node)     node_set_state((node), N_ONLINE)
+#define node_set_offline(node)    node_clear_state((node), N_ONLINE)
 
-#define for_each_node(node)       for_each_node_mask((node), node_possible_map)
-#define for_each_online_node(node) for_each_node_mask((node), node_online_map)
+#define for_each_node(node)       for_each_node_state(node, N_POSSIBLE)
+#define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
 
 #endif /* __LINUX_NODEMASK_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 71013e6..0cc5b3e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -47,13 +47,21 @@
 #include "internal.h"
 
 /*
- * MCD - HACK: Find somewhere to initialize this EARLY, or make this
- * initializer cleaner
+ * Array of node states.
  */
-nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
-EXPORT_SYMBOL(node_online_map);
-nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
-EXPORT_SYMBOL(node_possible_map);
+nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
+       [N_POSSIBLE] = NODE_MASK_ALL,
+       [N_ONLINE] = { { [0] = 1UL } },
+#ifndef CONFIG_NUMA
+       [N_NORMAL_MEMORY] = { { [0] = 1UL } },
+#ifdef CONFIG_HIGHMEM
+       [N_HIGH_MEMORY] = { { [0] = 1UL } },
+#endif
+       [N_CPU] = { { [0] = 1UL } },
+#endif /* NUMA */
+};
+EXPORT_SYMBOL(node_states);
+
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
 long nr_swap_pages;
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to