While debugging libnuma helpers, I needed a way to emulate remote
topologies in libnuma as hwloc does so that we can do
HWLOC_FSROOT=/foo/bar tests/linux-libnuma
on any of my existing remote topologies. I modified 2.0.8-rc3 with the
attached patch so that it honors HWLOC_FSROOT when reading /sys and
/proc files.
It's not perfect:
* We need to retrieve the distant /proc/self/status and manually add it
to $FSROOT/proc/self/ (libnuma needs it, hwloc doesn't gather it)
* The remote topology cannot use cpumaps that are larger than the local
kernel cpumap. Basically, you need to remove starting zeros in
$FSROOT/sys/devices/system/node/node*/cpumap until it's not longer than
your local /sys/devices/system/node/node0/cpumap
* Some corner cases don't case such as highly sparse node ids don't
work, maybe because CONFIG_NODES_SHIFT=6 in my kernel
The patch also brings back the old "numa_all_nodes" behavior (before
libnuma 2.0.6) wrt nodes with no memory if HWLOC_OLD_LIBNUMA=1. But this
shouldn't matter anyway because I removed nodemask_t helpers from trunk.
Still no reply to my bug report on numa-devel about this
http://thread.gmane.org/gmane.linux.kernel.numa/716
I am sharing this so that it doesn't get lost, in case somebody ever has
to debug this again.
Brice
diff -ur numactl-2.0.8~rc3/affinity.c numactl-2.0.8~rc3-new/affinity.c
--- numactl-2.0.8~rc3/affinity.c 2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/affinity.c 2012-02-20 13:50:29.537251813 +0100
@@ -49,6 +49,8 @@
#include "affinity.h"
#include "rtnetlink.h"
+extern char *rootpath;
+
static int badchar(char *s)
{
if (strpbrk(s, "/."))
@@ -88,7 +90,7 @@
much about the actual sysfs layout. */
char path[1024];
char *fn = NULL;
- if (asprintf(&fn, "/sys/class/%s/%s", cls, dev) > 0 &&
+ if (asprintf(&fn, "%s/sys/class/%s/%s", rootpath, cls, dev) > 0 &&
readlink(fn, path, sizeof path) > 0) {
regex_t re;
regmatch_t match[2];
@@ -104,7 +106,7 @@
assert(match[0].rm_eo > 0);
path[match[1].rm_eo + 1] = 0;
p = path + match[0].rm_so;
- ret = sysfs_node_read(mask, "/sys/%s/numa_node", p);
+ ret = sysfs_node_read(mask, "%s/sys/%s/numa_node", rootpath, p);
if (ret < 0)
return node_parse_failure(ret, NULL, p);
return ret;
@@ -112,8 +114,8 @@
}
free(fn);
- ret = sysfs_node_read(mask, "/sys/class/%s/%s/device/numa_node",
- cls, dev);
+ ret = sysfs_node_read(mask, "%s/sys/class/%s/%s/device/numa_node",
+ rootpath, cls, dev);
if (ret < 0)
return node_parse_failure(ret, cls, dev);
return 0;
@@ -131,7 +133,7 @@
struct dirent de, *dep;
cls = "block";
- char fn[sizeof("/sys/class/") + strlen(cls)];
+ char fn[1024];
if (stat(file, &st) < 0) {
numa_warn(W_blockdev1, "Cannot stat file %s", file);
return -1;
@@ -145,7 +147,7 @@
} else if (S_ISBLK(st.st_mode))
d = st.st_rdev;
- sprintf(fn, "/sys/class/%s", cls);
+ sprintf(fn, "%s/sys/class/%s", rootpath, cls);
dir = opendir(fn);
if (!dir) {
numa_warn(W_blockdev2, "Cannot enumerate %s devices in sysfs",
@@ -157,10 +159,10 @@
if (*name == '.')
continue;
char *dev;
- char fn2[sizeof("/sys/class/block//dev") + strlen(name)];
+ char fn2[1024];
n = -1;
- if (sprintf(fn2, "/sys/class/block/%s/dev", name) < 0)
+ if (sprintf(fn2, "%s/sys/class/block/%s/dev", rootpath, name) < 0)
break;
dev = sysfs_read(fn2);
if (dev) {
@@ -299,8 +301,8 @@
return -1;
}
ret = sysfs_node_read(mask,
- "/sys/devices/pci%04x:%02x/%04x:%02x:%02x.%x/numa_node",
- seg, bus, seg, bus, dev, func);
+ "%s/sys/devices/pci%04x:%02x/%04x:%02x:%02x.%x/numa_node",
+ rootpath, seg, bus, seg, bus, dev, func);
if (ret < 0)
return node_parse_failure(ret, cls, id);
return 0;
diff -ur numactl-2.0.8~rc3/distance.c numactl-2.0.8~rc3-new/distance.c
--- numactl-2.0.8~rc3/distance.c 2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/distance.c 2012-02-20 13:46:15.713260676 +0100
@@ -61,7 +61,8 @@
for (nd = 0;; nd++) {
char fn[100];
FILE *dfh;
- sprintf(fn, "/sys/devices/system/node/node%d/distance", nd);
+ char *env = getenv("HWLOC_FSROOT");
+ sprintf(fn, "%s/sys/devices/system/node/node%d/distance", env ? : "", nd);
dfh = fopen(fn, "r");
if (!dfh) {
if (errno == ENOENT && nd > 0)
diff -ur numactl-2.0.8~rc3/libnuma.c numactl-2.0.8~rc3-new/libnuma.c
--- numactl-2.0.8~rc3/libnuma.c 2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/libnuma.c 2012-02-20 13:40:18.225273153 +0100
@@ -77,6 +77,9 @@
int numa_exit_on_warn = 0;
static void set_sizes(void);
+char *rootpath = "";
+static char *mask_size_file;
+
/*
* There are two special functions, _init(void) and _fini(void), which
* are called automatically by the dynamic loader whenever a library is loaded.
@@ -87,6 +90,12 @@
numa_init(void)
{
int max,i;
+ char *env;
+
+ env = getenv("HWLOC_FSROOT");
+ if (env)
+ rootpath = env;
+ asprintf(&mask_size_file, "%s/proc/self/status", rootpath);
if (sizes_set)
return;
@@ -320,11 +329,13 @@
DIR *d;
struct dirent *de;
long long freep;
+ char *path;
numa_memnode_ptr = numa_allocate_nodemask();
numa_nodes_ptr = numa_allocate_nodemask();
- d = opendir("/sys/devices/system/node");
+ asprintf(&path, "%s/sys/devices/system/node", rootpath);
+ d = opendir(path);
if (!d) {
maxconfigurednode = 0;
} else {
@@ -341,6 +352,7 @@
}
closedir(d);
}
+ free(path);
}
/*
@@ -358,7 +370,6 @@
return strncmp(s, pre, strlen(pre)) == 0;
}
-static const char *mask_size_file = "/proc/self/status";
static const char *nodemask_prefix = "Mems_allowed:\t";
/*
* (do this the way Paul Jackson's libcpuset does it)
@@ -557,10 +568,13 @@
static void
set_configured_cpus(void)
{
- char *dirnamep = "/sys/devices/system/cpu";
+ char *dirnamep;
struct dirent *dirent;
DIR *dir;
+
+ asprintf(&dirnamep, "%s/sys/devices/system/cpu", rootpath);
dir = opendir(dirnamep);
+ free(dirnamep);
if (dir == NULL) {
/* fall back to using the online cpu count */
@@ -600,6 +614,8 @@
int
numa_num_configured_nodes(void)
{
+ if (getenv("HWLOC_OLD_LIBNUMA"))
+ return maxconfigurednode+1;
/*
* NOTE: this function's behavior matches the documentation (ie: it
* returns a count of nodes with memory) despite the poor function
@@ -743,13 +759,13 @@
char *line = NULL;
long long size = -1;
FILE *f;
- char fn[64];
+ char fn[512];
int ok = 0;
int required = freep ? 2 : 1;
if (freep)
*freep = -1;
- sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node);
+ sprintf(fn, "%s/sys/devices/system/node/node%d/meminfo", rootpath, node);
f = fopen(fn, "r");
if (!f)
return -1;
@@ -1254,7 +1270,7 @@
numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
{
int err = 0;
- char fn[64];
+ char fn[512];
FILE *f;
char *line = NULL;
size_t len = 0;
@@ -1281,7 +1297,7 @@
mask = (unsigned long *)buffer;
memset(mask, 0, buflen_needed);
- sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
+ sprintf(fn, "%s/sys/devices/system/node/node%d/cpumap", rootpath, node);
f = fopen(fn, "r");
if (!f || getdelim(&line, &len, '\n', f) < 1) {
numa_warn(W_nosysfs2,
@@ -1330,7 +1346,7 @@
{
int err = 0, bufferlen;
int nnodes = numa_max_node();
- char fn[64], *line = NULL;
+ char fn[512], *line = NULL;
FILE *f;
size_t len = 0;
struct bitmask *mask;
@@ -1359,7 +1375,7 @@
mask = numa_allocate_cpumask();
/* this is a kernel cpumask_t (see node_read_cpumap()) */
- sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
+ sprintf(fn, "%s/sys/devices/system/node/node%d/cpumap", rootpath, node);
f = fopen(fn, "r");
if (!f || getdelim(&line, &len, '\n', f) < 1) {
numa_warn(W_nosysfs2,
diff -ur numactl-2.0.8~rc3/migspeed.c numactl-2.0.8~rc3-new/migspeed.c
--- numactl-2.0.8~rc3/migspeed.c 2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/migspeed.c 2012-02-20 13:51:17.457250140 +0100
@@ -4,6 +4,7 @@
* (C) 2007 Silicon Graphics, Inc. Christoph Lameter <[email protected]>
*
*/
+#define _GNU_SOURCE 1
#include <stdio.h>
#include <stdlib.h>
#include "numa.h"
@@ -40,10 +41,16 @@
void displaymap(void)
{
- FILE *f = fopen("/proc/self/numa_maps","r");
+ char *env = getenv("HWLOC_FSROOT");
+ char *path;
+ FILE *f;
+
+ asprintf(&path, "%s/proc/self/numa_maps", env ? : "");
+ f = fopen(path, "r");
+ free(path);
if (!f) {
- printf("/proc/self/numa_maps not accessible.\n");
+ printf("%s not accessible.\n", path);
exit(1);
}
diff -ur numactl-2.0.8~rc3/numamon.c numactl-2.0.8~rc3-new/numamon.c
--- numactl-2.0.8~rc3/numamon.c 2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/numamon.c 2012-02-20 13:51:35.681249506 +0100
@@ -243,7 +243,12 @@
char *line = NULL;
size_t size = 0;
int bad = 0;
- FILE *f = fopen("/proc/cpuinfo", "r");
+ char *env = getenv("HWLOC_FSROOT");
+ char *path;
+ FILE *f;
+ asprintf(&path, "%s/proc/cpuinfo", env ? : "");
+ f = fopen(path, "r");
+ free(path);
if (!f)
return;
while (getline(&line, &size, f) > 0) {
diff -ur numactl-2.0.8~rc3/numastat numactl-2.0.8~rc3-new/numastat
--- numactl-2.0.8~rc3/numastat 2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/numastat 2012-02-19 12:48:29.520445939 +0100
@@ -39,7 +39,7 @@
}
$WIDTH = 32 if $WIDTH < 32;
-if (! -d "/sys/devices/system/node" ) {
+if (! -d "/home/bgoglin/SOFT/hwloc/topology-chroot/power7vnode/sys/devices/system/node" ) {
print STDERR "sysfs not mounted or system not NUMA aware\n";
exit 1;
}
@@ -47,7 +47,7 @@
%stat = ();
$title = "";
$mode = 0;
-opendir(NODES, "/sys/devices/system/node") || exit 1;
+opendir(NODES, "/home/bgoglin/SOFT/hwloc/topology-chroot/power7vnode/sys/devices/system/node") || exit 1;
foreach $nd (readdir(NODES)) {
next unless $nd =~ /node(\d+)/;
# On newer kernels, readdir may enumerate the 'node(\d+) subdirs
@@ -59,7 +59,7 @@
if (!$title && $nd =~ /node0/) {
$mode = 1;
}
- open(STAT, "/sys/devices/system/node/$nd/numastat") ||
+ open(STAT, "/home/bgoglin/SOFT/hwloc/topology-chroot/power7vnode/sys/devices/system/node/$nd/numastat") ||
die "cannot open $nd: $!\n";
if (! $mode) {
$title = sprintf("%16s",$nd) . $title;
diff -ur numactl-2.0.8~rc3/shm.c numactl-2.0.8~rc3-new/shm.c
--- numactl-2.0.8~rc3/shm.c 2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/shm.c 2012-02-20 13:43:10.161267153 +0100
@@ -44,11 +44,19 @@
int shmflags;
static int shm_pagesize;
+extern char *rootpath;
+
long huge_page_size(void)
{
size_t len = 0;
char *line = NULL;
- FILE *f = fopen("/proc/meminfo", "r");
+ char *env = getenv("HWLOC_FSROOT");
+ char *path;
+ FILE *f;
+
+ asprintf(&path, "%s/proc/meminfo", env ? : "");
+ f = fopen(path, "r");
+ free(path);
if (f != NULL) {
while (getdelim(&line, &len, '\n', f) > 0) {
int ps;