The patch titled
numa: mempolicy: Allow tunable policy for system init
has been added to the -mm tree. Its filename is
numa-mempolicy-allow-tunable-policy-for-system-init.patch
*** Remember to use Documentation/SubmitChecklist when testing your code ***
See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this
------------------------------------------------------
Subject: numa: mempolicy: Allow tunable policy for system init
From: Paul Mundt <[EMAIL PROTECTED]>
The current default behaviour for system init (via numa_policy_init()) is
to use MPOL_INTERLEAVE across the online nodes in order to avoid a
preference for node 0. This tends to be undesirable for small nodes that
really would rather prefer to keep as many allocations on node 0 as
possible.
As tmpfs already provides a parser for the policy and nodelist --
shmem_parse_mpol(), we generalize this and wrap in to it via an mpolinit=
(for lack of a better name) setup param. Other code that wishes to do
mempolicy parsing for itself can use the new mpol_parse_options().
As an example, for small nodes, one might prefer to boot with
'mpolinit=prefer:0'. numa_default_policy() will still overload this with
MPOL_DEFAULT later on anyways, so this is only useful for system init.
Signed-off-by: Paul Mundt <[EMAIL PROTECTED]>
Cc: Hugh Dickins <[EMAIL PROTECTED]>
Cc: Christoph Lameter <[EMAIL PROTECTED]>
Cc: Andi Kleen <[EMAIL PROTECTED]>
Cc: Lee Schermerhorn <[EMAIL PROTECTED]>
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
---
Documentation/kernel-parameters.txt | 6 +
include/linux/mempolicy.h | 8 ++
mm/mempolicy.c | 81 ++++++++++++++++++++++++--
mm/shmem.c | 54 -----------------
4 files changed, 91 insertions(+), 58 deletions(-)
diff -puN
Documentation/kernel-parameters.txt~numa-mempolicy-allow-tunable-policy-for-system-init
Documentation/kernel-parameters.txt
---
a/Documentation/kernel-parameters.txt~numa-mempolicy-allow-tunable-policy-for-system-init
+++ a/Documentation/kernel-parameters.txt
@@ -1080,6 +1080,12 @@ and is between 256 and 4096 characters.
mousedev.yres= [MOUSE] Vertical screen resolution, used for devices
reporting absolute coordinates, such as tablets
+ mpolinit= [KNL,NUMA]
+ Format: <policy>,[:<nodelist>]
+ Sets the default memory policy to be used at system
+ init time. Defaults to MPOL_INTERLEAVE between online
+ nodes.
+
mpu401= [HW,OSS]
Format: <io>,<irq>
diff -puN
include/linux/mempolicy.h~numa-mempolicy-allow-tunable-policy-for-system-init
include/linux/mempolicy.h
---
a/include/linux/mempolicy.h~numa-mempolicy-allow-tunable-policy-for-system-init
+++ a/include/linux/mempolicy.h
@@ -148,6 +148,8 @@ extern void mpol_rebind_task(struct task
const nodemask_t *new);
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
extern void mpol_fix_fork_child_flag(struct task_struct *p);
+extern int mpol_parse_options(char *value, int *policy,
+ nodemask_t *policy_nodes);
#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
#ifdef CONFIG_CPUSETS
@@ -253,6 +255,12 @@ static inline void mpol_fix_fork_child_f
{
}
+static inline int mpol_parse_options(char *value, int *policy,
+ nodemask_t *policy_nodes)
+{
+ return 1;
+}
+
#define set_cpuset_being_rebound(x) do {} while (0)
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
diff -puN mm/mempolicy.c~numa-mempolicy-allow-tunable-policy-for-system-init
mm/mempolicy.c
--- a/mm/mempolicy.c~numa-mempolicy-allow-tunable-policy-for-system-init
+++ a/mm/mempolicy.c
@@ -89,7 +89,7 @@
#include <linux/migrate.h>
#include <linux/rmap.h>
#include <linux/security.h>
-
+#include <linux/ctype.h>
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
@@ -1594,9 +1594,72 @@ void mpol_free_shared_policy(struct shar
spin_unlock(&p->lock);
}
+int mpol_parse_options(char *value, int *policy, nodemask_t *policy_nodes)
+{
+ char *nodelist = strchr(value, ':');
+ int err = 1;
+
+ if (nodelist) {
+ /* NUL-terminate policy string */
+ *nodelist++ = '\0';
+ if (nodelist_parse(nodelist, *policy_nodes))
+ goto out;
+ }
+ if (!strcmp(value, "default")) {
+ *policy = MPOL_DEFAULT;
+ /* Don't allow a nodelist */
+ if (!nodelist)
+ err = 0;
+ } else if (!strcmp(value, "prefer")) {
+ *policy = MPOL_PREFERRED;
+ /* Insist on a nodelist of one node only */
+ if (nodelist) {
+ char *rest = nodelist;
+ while (isdigit(*rest))
+ rest++;
+ if (!*rest)
+ err = 0;
+ }
+ } else if (!strcmp(value, "bind")) {
+ *policy = MPOL_BIND;
+ /* Insist on a nodelist */
+ if (nodelist)
+ err = 0;
+ } else if (!strcmp(value, "interleave")) {
+ *policy = MPOL_INTERLEAVE;
+ /* Default to nodes online if no nodelist */
+ if (!nodelist)
+ *policy_nodes = node_online_map;
+ err = 0;
+ }
+out:
+ /* Restore string for error message */
+ if (nodelist)
+ *--nodelist = ':';
+ return err;
+}
+
+/* Set interleaving policy for system init. This way not all
+ the data structures allocated at system boot end up in node zero. */
+static nodemask_t nmask_sysinit __initdata;
+static int policy_sysinit __initdata = MPOL_INTERLEAVE;
+
+static int __init setup_mpol_sysinit(char *str)
+{
+ if (mpol_parse_options(str, &policy_sysinit, &nmask_sysinit)) {
+ printk("mpolinit failed, falling back on interleave\n");
+ return 0;
+ }
+
+ return 1;
+}
+__setup("mpolinit=", setup_mpol_sysinit);
+
/* assumes fs == KERNEL_DS */
void __init numa_policy_init(void)
{
+ nodemask_t *nmask;
+
policy_cache = kmem_cache_create("numa_policy",
sizeof(struct mempolicy),
0, SLAB_PANIC, NULL, NULL);
@@ -1605,11 +1668,19 @@ void __init numa_policy_init(void)
sizeof(struct sp_node),
0, SLAB_PANIC, NULL, NULL);
- /* Set interleaving policy for system init. This way not all
- the data structures allocated at system boot end up in node zero. */
+ /*
+ * Use the specified nodemask for init, or fall back to
+ * node_online_map.
+ */
+ if (policy_sysinit == MPOL_DEFAULT)
+ nmask = NULL;
+ else if (!nodes_empty(nmask_sysinit))
+ nmask = &nmask_sysinit;
+ else
+ nmask = &node_online_map;
- if (do_set_mempolicy(MPOL_INTERLEAVE, &node_online_map))
- printk("numa_policy_init: interleaving failed\n");
+ if (do_set_mempolicy(policy_sysinit, nmask))
+ printk("numa_policy_init: setting init policy failed\n");
}
/* Reset policy of current process to default */
diff -puN mm/shmem.c~numa-mempolicy-allow-tunable-policy-for-system-init
mm/shmem.c
--- a/mm/shmem.c~numa-mempolicy-allow-tunable-policy-for-system-init
+++ a/mm/shmem.c
@@ -958,53 +958,6 @@ redirty:
}
#ifdef CONFIG_NUMA
-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t
*policy_nodes)
-{
- char *nodelist = strchr(value, ':');
- int err = 1;
-
- if (nodelist) {
- /* NUL-terminate policy string */
- *nodelist++ = '\0';
- if (nodelist_parse(nodelist, *policy_nodes))
- goto out;
- if (!nodes_subset(*policy_nodes, node_online_map))
- goto out;
- }
- if (!strcmp(value, "default")) {
- *policy = MPOL_DEFAULT;
- /* Don't allow a nodelist */
- if (!nodelist)
- err = 0;
- } else if (!strcmp(value, "prefer")) {
- *policy = MPOL_PREFERRED;
- /* Insist on a nodelist of one node only */
- if (nodelist) {
- char *rest = nodelist;
- while (isdigit(*rest))
- rest++;
- if (!*rest)
- err = 0;
- }
- } else if (!strcmp(value, "bind")) {
- *policy = MPOL_BIND;
- /* Insist on a nodelist */
- if (nodelist)
- err = 0;
- } else if (!strcmp(value, "interleave")) {
- *policy = MPOL_INTERLEAVE;
- /* Default to nodes online if no nodelist */
- if (!nodelist)
- *policy_nodes = node_online_map;
- err = 0;
- }
-out:
- /* Restore string for error message */
- if (nodelist)
- *--nodelist = ':';
- return err;
-}
-
static struct page *shmem_swapin_async(struct shared_policy *p,
swp_entry_t entry, unsigned long idx)
{
@@ -1057,11 +1010,6 @@ shmem_alloc_page(gfp_t gfp, struct shmem
return page;
}
#else
-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t
*policy_nodes)
-{
- return 1;
-}
-
static inline struct page *
shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
{
@@ -2151,7 +2099,7 @@ static int shmem_parse_options(char *opt
if (*rest)
goto bad_val;
} else if (!strcmp(this_char,"mpol")) {
- if (shmem_parse_mpol(value,policy,policy_nodes))
+ if (mpol_parse_options(value,policy,policy_nodes))
goto bad_val;
} else {
printk(KERN_ERR "tmpfs: Bad mount option %s\n",
_
Patches currently in -mm which might be from [EMAIL PROTECTED] are
slab-fix-alien-cache-handling.patch
potential-parse-error-in-ifdef-part-3.patch
lots-of-architectures-enable-arbitary-speed-tty-support.patch
git-sh.patch
numa-mempolicy-allow-tunable-policy-for-system-init.patch
numa-mempolicy-allow-tunable-policy-for-system-init-fix.patch
pvr2fb-fix-pseudo_palette-array-overrun-and-typecast.patch
-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html