While debugging libnuma helpers, I needed a way to emulate remote
topologies in libnuma as hwloc does so that we can do
  HWLOC_FSROOT=/foo/bar tests/linux-libnuma
on any of my existing remote topologies. I modified 2.0.8-rc3 with the
attached patch so that it honors HWLOC_FSROOT when reading /sys and
/proc files.

It's not perfect:
* We need to retrieve the distant /proc/self/status and manually add it
to $FSROOT/proc/self/ (libnuma needs it, hwloc doesn't gather it)
* The remote topology cannot use cpumaps that are larger than the local
kernel cpumap. Basically, you need to remove starting zeros in
$FSROOT/sys/devices/system/node/node*/cpumap until it's not longer than
your local /sys/devices/system/node/node0/cpumap
* Some corner cases don't case such as highly sparse node ids don't
work, maybe because CONFIG_NODES_SHIFT=6 in my kernel

The patch also brings back the old "numa_all_nodes" behavior (before
libnuma 2.0.6) wrt nodes with no memory if HWLOC_OLD_LIBNUMA=1. But this
shouldn't matter anyway because I removed nodemask_t helpers from trunk.
Still no reply to my bug report on numa-devel about this
http://thread.gmane.org/gmane.linux.kernel.numa/716

I am sharing this so that it doesn't get lost, in case somebody ever has
to debug this again.

Brice


diff -ur numactl-2.0.8~rc3/affinity.c numactl-2.0.8~rc3-new/affinity.c
--- numactl-2.0.8~rc3/affinity.c	2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/affinity.c	2012-02-20 13:50:29.537251813 +0100
@@ -49,6 +49,8 @@
 #include "affinity.h"
 #include "rtnetlink.h"

+extern char *rootpath;
+
 static int badchar(char *s)
 {
 	if (strpbrk(s, "/."))
@@ -88,7 +90,7 @@
 	   much about the actual sysfs layout. */
 	char path[1024];
 	char *fn = NULL;
-	if (asprintf(&fn, "/sys/class/%s/%s", cls, dev) > 0 &&
+	if (asprintf(&fn, "%s/sys/class/%s/%s", rootpath, cls, dev) > 0 &&
 	    readlink(fn, path, sizeof path) > 0) {
 		regex_t re;
 		regmatch_t match[2];
@@ -104,7 +106,7 @@
 			assert(match[0].rm_eo > 0);
 			path[match[1].rm_eo + 1] = 0;
 			p = path + match[0].rm_so;
-			ret = sysfs_node_read(mask, "/sys/%s/numa_node", p);
+			ret = sysfs_node_read(mask, "%s/sys/%s/numa_node", rootpath, p);
 			if (ret < 0)
 				return node_parse_failure(ret, NULL, p);
 			return ret;
@@ -112,8 +114,8 @@
 	}
 	free(fn);

-	ret = sysfs_node_read(mask, "/sys/class/%s/%s/device/numa_node",
-			      cls, dev);
+	ret = sysfs_node_read(mask, "%s/sys/class/%s/%s/device/numa_node",
+			      rootpath, cls, dev);
 	if (ret < 0)
 		return node_parse_failure(ret, cls, dev);
 	return 0;
@@ -131,7 +133,7 @@
 	struct dirent de, *dep;

 	cls = "block";
-	char fn[sizeof("/sys/class/") + strlen(cls)];
+	char fn[1024];
 	if (stat(file, &st) < 0) {
 		numa_warn(W_blockdev1, "Cannot stat file %s", file);
 		return -1;
@@ -145,7 +147,7 @@
 	} else if (S_ISBLK(st.st_mode))
 		d = st.st_rdev;

-	sprintf(fn, "/sys/class/%s", cls);
+	sprintf(fn, "%s/sys/class/%s", rootpath, cls);
 	dir = opendir(fn);
 	if (!dir) {
 		numa_warn(W_blockdev2, "Cannot enumerate %s devices in sysfs",
@@ -157,10 +159,10 @@
 		if (*name == '.')
 			continue;
 		char *dev;
-		char fn2[sizeof("/sys/class/block//dev") + strlen(name)];
+		char fn2[1024];

 		n = -1;
-		if (sprintf(fn2, "/sys/class/block/%s/dev", name) < 0)
+		if (sprintf(fn2, "%s/sys/class/block/%s/dev", rootpath, name) < 0)
 			break;
 		dev = sysfs_read(fn2);
 		if (dev) {
@@ -299,8 +301,8 @@
 		return -1;
 	}
 	ret = sysfs_node_read(mask,
-			"/sys/devices/pci%04x:%02x/%04x:%02x:%02x.%x/numa_node",
-			      seg, bus, seg, bus, dev, func);
+			"%s/sys/devices/pci%04x:%02x/%04x:%02x:%02x.%x/numa_node",
+			      rootpath, seg, bus, seg, bus, dev, func);
 	if (ret < 0)
 		return node_parse_failure(ret, cls, id);
 	return 0;
diff -ur numactl-2.0.8~rc3/distance.c numactl-2.0.8~rc3-new/distance.c
--- numactl-2.0.8~rc3/distance.c	2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/distance.c	2012-02-20 13:46:15.713260676 +0100
@@ -61,7 +61,8 @@
 	for (nd = 0;; nd++) {
 		char fn[100];
 		FILE *dfh;
-		sprintf(fn, "/sys/devices/system/node/node%d/distance", nd);
+		char *env = getenv("HWLOC_FSROOT");
+		sprintf(fn, "%s/sys/devices/system/node/node%d/distance", env ? : "", nd);
 		dfh = fopen(fn, "r");
 		if (!dfh) {
 			if (errno == ENOENT && nd > 0)
diff -ur numactl-2.0.8~rc3/libnuma.c numactl-2.0.8~rc3-new/libnuma.c
--- numactl-2.0.8~rc3/libnuma.c	2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/libnuma.c	2012-02-20 13:40:18.225273153 +0100
@@ -77,6 +77,9 @@
 int numa_exit_on_warn = 0;
 static void set_sizes(void);

+char *rootpath = "";
+static char *mask_size_file;
+
 /*
  * There are two special functions, _init(void) and _fini(void), which
  * are called automatically by the dynamic loader whenever a library is loaded.
@@ -87,6 +90,12 @@
 numa_init(void)
 {
 	int max,i;
+	char *env;
+
+	env = getenv("HWLOC_FSROOT");
+	if (env)
+		rootpath = env;
+	asprintf(&mask_size_file, "%s/proc/self/status", rootpath);

 	if (sizes_set)
 		return;
@@ -320,11 +329,13 @@
 	DIR *d;
 	struct dirent *de;
 	long long freep;
+	char *path;

 	numa_memnode_ptr = numa_allocate_nodemask();
 	numa_nodes_ptr = numa_allocate_nodemask();

-	d = opendir("/sys/devices/system/node");
+	asprintf(&path, "%s/sys/devices/system/node", rootpath);
+	d = opendir(path);
 	if (!d) {
 		maxconfigurednode = 0;
 	} else {
@@ -341,6 +352,7 @@
 		}
 		closedir(d);
 	}
+	free(path);
 }

 /*
@@ -358,7 +370,6 @@
 	return strncmp(s, pre, strlen(pre)) == 0;
 }

-static const char *mask_size_file = "/proc/self/status";
 static const char *nodemask_prefix = "Mems_allowed:\t";
 /*
  * (do this the way Paul Jackson's libcpuset does it)
@@ -557,10 +568,13 @@
 static void
 set_configured_cpus(void)
 {
-	char		*dirnamep = "/sys/devices/system/cpu";
+	char *dirnamep;
 	struct dirent	*dirent;
 	DIR		*dir;
+
+	asprintf(&dirnamep, "%s/sys/devices/system/cpu", rootpath);
 	dir = opendir(dirnamep);
+	free(dirnamep);

 	if (dir == NULL) {
 		/* fall back to using the online cpu count */
@@ -600,6 +614,8 @@
 int
 numa_num_configured_nodes(void)
 {
+	if (getenv("HWLOC_OLD_LIBNUMA"))
+		return maxconfigurednode+1;
 	/*
 	* NOTE: this function's behavior matches the documentation (ie: it
 	* returns a count of nodes with memory) despite the poor function
@@ -743,13 +759,13 @@
 	char *line = NULL;
 	long long size = -1;
 	FILE *f; 
-	char fn[64];
+	char fn[512];
 	int ok = 0;
 	int required = freep ? 2 : 1; 

 	if (freep) 
 		*freep = -1; 
-	sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node); 
+	sprintf(fn, "%s/sys/devices/system/node/node%d/meminfo", rootpath, node); 
 	f = fopen(fn, "r");
 	if (!f)
 		return -1; 
@@ -1254,7 +1270,7 @@
 numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
 {
 	int err = 0;
-	char fn[64];
+	char fn[512];
 	FILE *f;
 	char *line = NULL;
 	size_t len = 0;
@@ -1281,7 +1297,7 @@
 		mask = (unsigned long *)buffer;
 	memset(mask, 0, buflen_needed);

-	sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
+	sprintf(fn, "%s/sys/devices/system/node/node%d/cpumap", rootpath, node);
 	f = fopen(fn, "r");
 	if (!f || getdelim(&line, &len, '\n', f) < 1) {
 		numa_warn(W_nosysfs2,
@@ -1330,7 +1346,7 @@
 {
 	int err = 0, bufferlen;
 	int nnodes = numa_max_node();
-	char fn[64], *line = NULL;
+	char fn[512], *line = NULL;
 	FILE *f; 
 	size_t len = 0; 
 	struct bitmask *mask;
@@ -1359,7 +1375,7 @@
 	mask = numa_allocate_cpumask();

 	/* this is a kernel cpumask_t (see node_read_cpumap()) */
-	sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node); 
+	sprintf(fn, "%s/sys/devices/system/node/node%d/cpumap", rootpath, node); 
 	f = fopen(fn, "r"); 
 	if (!f || getdelim(&line, &len, '\n', f) < 1) { 
 		numa_warn(W_nosysfs2,
diff -ur numactl-2.0.8~rc3/migspeed.c numactl-2.0.8~rc3-new/migspeed.c
--- numactl-2.0.8~rc3/migspeed.c	2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/migspeed.c	2012-02-20 13:51:17.457250140 +0100
@@ -4,6 +4,7 @@
  * (C) 2007 Silicon Graphics, Inc. Christoph Lameter <clame...@sgi.com>
  *
  */
+#define _GNU_SOURCE 1
 #include <stdio.h>
 #include <stdlib.h>
 #include "numa.h"
@@ -40,10 +41,16 @@

 void displaymap(void)
 {
-	FILE *f = fopen("/proc/self/numa_maps","r");
+        char *env = getenv("HWLOC_FSROOT");
+        char *path;
+        FILE *f;
+
+        asprintf(&path, "%s/proc/self/numa_maps", env ? : "");
+        f = fopen(path, "r");
+	free(path);

 	if (!f) {
-		printf("/proc/self/numa_maps not accessible.\n");
+		printf("%s not accessible.\n", path);
 		exit(1);
 	}

diff -ur numactl-2.0.8~rc3/numamon.c numactl-2.0.8~rc3-new/numamon.c
--- numactl-2.0.8~rc3/numamon.c	2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/numamon.c	2012-02-20 13:51:35.681249506 +0100
@@ -243,7 +243,12 @@
 	char *line = NULL;
 	size_t size = 0;
 	int bad = 0;
-	FILE *f = fopen("/proc/cpuinfo", "r");
+        char *env = getenv("HWLOC_FSROOT");
+        char *path;
+	FILE *f;
+	asprintf(&path, "%s/proc/cpuinfo", env ? : "");
+        f = fopen(path, "r");
+	free(path);
 	if (!f)
 		return;	
 	while (getline(&line, &size, f) > 0) {
diff -ur numactl-2.0.8~rc3/numastat numactl-2.0.8~rc3-new/numastat
--- numactl-2.0.8~rc3/numastat	2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/numastat	2012-02-19 12:48:29.520445939 +0100
@@ -39,7 +39,7 @@
 } 
 $WIDTH = 32 if $WIDTH < 32; 

-if (! -d "/sys/devices/system/node" ) { 
+if (! -d "/home/bgoglin/SOFT/hwloc/topology-chroot/power7vnode/sys/devices/system/node" ) { 
 	print STDERR "sysfs not mounted or system not NUMA aware\n";
 	exit 1;
 } 
@@ -47,7 +47,7 @@
 %stat = (); 
 $title = ""; 
 $mode = 0;
-opendir(NODES, "/sys/devices/system/node") || exit 1;
+opendir(NODES, "/home/bgoglin/SOFT/hwloc/topology-chroot/power7vnode/sys/devices/system/node") || exit 1;
 foreach $nd (readdir(NODES)) { 
 	next unless $nd =~ /node(\d+)/; 
 	# On newer kernels, readdir may enumerate the 'node(\d+) subdirs
@@ -59,7 +59,7 @@
 	if (!$title && $nd =~ /node0/) {
 		$mode = 1;
 	}
-	open(STAT, "/sys/devices/system/node/$nd/numastat") || 
+	open(STAT, "/home/bgoglin/SOFT/hwloc/topology-chroot/power7vnode/sys/devices/system/node/$nd/numastat") || 
 			die "cannot open $nd: $!\n"; 
 	if (! $mode) {
 		$title = sprintf("%16s",$nd) . $title;
diff -ur numactl-2.0.8~rc3/shm.c numactl-2.0.8~rc3-new/shm.c
--- numactl-2.0.8~rc3/shm.c	2011-12-19 15:51:35.000000000 +0100
+++ numactl-2.0.8~rc3-new/shm.c	2012-02-20 13:43:10.161267153 +0100
@@ -44,11 +44,19 @@
 int shmflags;
 static int shm_pagesize;

+extern char *rootpath;
+
 long huge_page_size(void)
 {
 	size_t len = 0;
 	char *line = NULL;
-	FILE *f = fopen("/proc/meminfo", "r");
+        char *env = getenv("HWLOC_FSROOT");
+	char *path;
+	FILE *f;
+
+	asprintf(&path, "%s/proc/meminfo", env ? : "");
+	f = fopen(path, "r");
+	free(path);
 	if (f != NULL) {
 		while (getdelim(&line, &len, '\n', f) > 0) {
 			int ps;

Reply via email to