It turns out that static linking of libibverbs never really worked,
which makes me wonder whether people who insisted building mthca.a
every actually tried it.  Anyway, here's a patch that tries to fix
things up, although only one device driver can be linked in at a time
because everyone exports the same driver entry point.  Comments / test
results appreciated.

I'll check this in now, and if I don't get any bug reports then I'll
put out the latest libibverbs-1.0 tree as libibverbs-1.0.4 around
Tuesday or Wednesday of next week.

(I have a more complicated plan for libibverbs 1.1 that I'm still
coding up)

Thanks,
  Roland

--- libibverbs-1.0/ChangeLog    (revision 9973)
+++ libibverbs-1.0/ChangeLog    (working copy)
@@ -1,3 +1,17 @@
+2006-10-27  Roland Dreier  <[EMAIL PROTECTED]>
+
+       * src/init.c: Revise initialization order to fix static linking.
+       Using dlopen() on a device-specific driver from a statically
+       linked copy of libibverbs will crash, because the driver will
+       bring in dynamic copies of libibverbs and libdl that clash with
+       the copies already linked statically.
+
+       To fix this, we change the way we search for drivers: first we
+       find all uverbs devices and try the driver (if any) that is
+       linked in directly.  If all devices are handled by that driver,
+       then we don't proceed any further.  If not, then we try dynamic
+       loading of drivers and match them against any remaining devices.
+
 2006-10-17  Roland Dreier  <[EMAIL PROTECTED]>
 
        * include/infiniband/arch.h: Update i386 and x86_64 memory barrier
--- libibverbs-1.0/src/ibverbs.h        (revision 9973)
+++ libibverbs-1.0/src/ibverbs.h        (working copy)
@@ -60,6 +60,12 @@
 
 #define PFX            "libibverbs: "
 
+struct ibv_sysfs_dev {
+       struct sysfs_class_device      *verbs_dev;
+       struct ibv_sysfs_dev           *next;
+       int                             have_driver;
+};
+
 struct ibv_driver {
        ibv_driver_init_func    init_func;
        struct ibv_driver      *next;
--- libibverbs-1.0/src/init.c   (revision 9973)
+++ libibverbs-1.0/src/init.c   (working copy)
@@ -52,11 +52,52 @@
 
 HIDDEN int abi_ver;
 
-static char default_path[] = DRIVER_PATH;
+static const char default_path[] = DRIVER_PATH;
 static const char *user_path;
 
+static struct ibv_sysfs_dev *sysfs_dev_list;
 static struct ibv_driver *driver_list;
 
+static void find_sysfs_devs(void)
+{
+       struct sysfs_class *cls;
+       struct dlist *verbs_dev_list;
+       struct sysfs_class_device *verbs_dev;
+       struct ibv_sysfs_dev *dev;
+
+       cls = sysfs_open_class("infiniband_verbs");
+       if (!cls) {
+               fprintf(stderr, PFX "Fatal: couldn't open sysfs class 
'infiniband_verbs'.\n");
+               return;
+       }
+
+       verbs_dev_list = sysfs_get_class_devices(cls);
+       if (!verbs_dev_list) {
+               fprintf(stderr, PFX "Fatal: no infiniband class devices 
found.\n");
+               return;
+       }
+
+       dlist_for_each_data(verbs_dev_list, verbs_dev, struct 
sysfs_class_device) {
+               dev = malloc(sizeof *dev);
+               if (!dev) {
+                       fprintf(stderr, PFX "Warning: couldn't allocate device 
for %s\n",
+                               verbs_dev->name);
+                       continue;
+               }
+
+               dev->verbs_dev   = verbs_dev;
+               dev->next        = sysfs_dev_list;
+               dev->have_driver = 0;
+               sysfs_dev_list   = dev;
+       }
+}
+
+__attribute__((weak))
+struct ibv_device *openib_driver_init(struct sysfs_class_device *dev)
+{
+        return NULL;
+}
+
 static void load_driver(char *so_path)
 {
        void *dlhandle;
@@ -79,7 +120,7 @@ static void load_driver(char *so_path)
 
        driver = malloc(sizeof *driver);
        if (!driver) {
-               fprintf(stderr, PFX "Fatal: couldn't allocate driver for %s\n", 
so_path);
+               fprintf(stderr, PFX "Warning: couldn't allocate driver for 
%s\n", so_path);
                dlclose(dlhandle);
                return;
        }
@@ -89,7 +130,7 @@ static void load_driver(char *so_path)
        driver_list       = driver;
 }
 
-static void find_drivers(char *dir)
+static void find_drivers(const char *dir)
 {
        size_t len = strlen(dir);
        glob_t so_glob;
@@ -101,9 +142,9 @@ static void find_drivers(char *dir)
                return;
 
        while (len && dir[len - 1] == '/')
-               dir[--len] = '\0';
+               --len;
 
-       asprintf(&pat, "%s/*.so", dir);
+       asprintf(&pat, "%.*s/*.so", (int) len, dir);
 
        ret = glob(pat, 0, NULL, &so_glob);
        free(pat);
@@ -120,10 +161,10 @@ static void find_drivers(char *dir)
        globfree(&so_glob);
 }
 
-static struct ibv_device *init_drivers(struct sysfs_class_device *verbs_dev)
+static struct ibv_device *try_driver(ibv_driver_init_func init_func,
+                                    struct sysfs_class_device *verbs_dev)
 {
        struct sysfs_class_device *ib_dev;
-       struct ibv_driver *driver;
        struct ibv_device *dev;
        char ibdev_name[64];
 
@@ -141,24 +182,14 @@ static struct ibv_device *init_drivers(s
                return NULL;
        }
 
-       for (driver = driver_list; driver; driver = driver->next) {
-               dev = driver->init_func(verbs_dev);
-               if (dev) {
-                       dev->dev    = verbs_dev;
-                       dev->ibdev  = ib_dev;
-                       dev->driver = driver;
-
-                       return dev;
-               }
+       dev = init_func(verbs_dev);
+       if (dev) {
+               dev->dev    = verbs_dev;
+               dev->ibdev  = ib_dev;
+               dev->driver = NULL;
        }
 
-       fprintf(stderr, PFX "Warning: no userspace device-specific driver found 
for %s\n"
-               "       driver search path: ", verbs_dev->name);
-       if (user_path)
-               fprintf(stderr, "%s:", user_path);
-       fprintf(stderr, "%s\n", default_path);
-
-       return NULL;
+       return dev;
 }
 
 static int check_abi_version(void)
@@ -191,26 +222,87 @@ static int check_abi_version(void)
        return 0;
 }
 
+static void add_device(struct ibv_device *dev,
+                      struct ibv_device ***dev_list,
+                      int *num_devices,
+                      int *list_size)
+{
+       struct ibv_device **new_list;
+
+       if (*list_size <= *num_devices) {
+               *list_size = *list_size ? *list_size * 2 : 1;
+               new_list = realloc(*dev_list, *list_size * sizeof (struct 
ibv_device *));
+               if (!new_list)
+                       return;
+               *dev_list = new_list;
+       }
+
+       *dev_list[*num_devices++] = dev;
+}
+
 HIDDEN int ibverbs_init(struct ibv_device ***list)
 {
        char *wr_path, *dir;
-       struct sysfs_class *cls;
-       struct dlist *verbs_dev_list;
-       struct sysfs_class_device *verbs_dev;
+       struct ibv_sysfs_dev *sysfs_dev, *next_dev;
        struct ibv_device *device;
-       struct ibv_device **new_list;
+       struct ibv_driver *driver;
        int num_devices = 0;
        int list_size = 0;
+       int no_driver = 0;
+       int statically_linked = 0;
 
        *list = NULL;
 
+       if (check_abi_version())
+               return 0;
+
        if (ibv_init_mem_map())
                return 0;
 
+       find_sysfs_devs();
+
+       /*
+        * First check if a driver statically linked in can support
+        * all the devices.  This is needed to avoid dlopen() in the
+        * all-static case (which will break because we end up with
+        * both a static and dynamic copy of libdl.
+        */
+       for (sysfs_dev = sysfs_dev_list; sysfs_dev; sysfs_dev = 
sysfs_dev->next) {
+               device = try_driver(openib_driver_init, sysfs_dev->verbs_dev);
+               if (device) {
+                       add_device(device, list, &num_devices, &list_size);
+                       sysfs_dev->have_driver = 1;
+               } else
+                       ++no_driver;
+       }
+
+       if (!no_driver)
+               goto out;
+
+       /*
+        * Check if we can dlopen() ourselves.  If this fails,
+        * libibverbs is probably statically linked into the
+        * executable, and we should just give up, since trying to
+        * dlopen() a driver module will fail spectacularly (loading a
+        * driver .so will bring in dynamic copies of libibverbs and
+        * libdl to go along with the static copies the executable
+        * has, which quickly leads to a crash.
+        */
+       {
+               void *hand = dlopen(NULL, RTLD_NOW);
+               if (!hand) {
+                       fprintf(stderr, PFX "Warning: dlopen(NULL) failed, "
+                               "assuming static linking.\n");
+                       statically_linked = 1;
+                       goto out;
+               }
+               dlclose(hand);
+       }
+
        find_drivers(default_path);
 
        /*
-        * Only follow use path passed in through the calling user's
+        * Only use path passed in through the calling user's
         * environment if we're not running SUID.
         */
        if (getuid() == geteuid()) {
@@ -222,42 +314,37 @@ HIDDEN int ibverbs_init(struct ibv_devic
                }
        }
 
-       /*
-        * Now check if a driver is statically linked.  Since we push
-        * drivers onto our driver list, the last driver we find will
-        * be the first one we try.
-        */
-       load_driver(NULL);
-
-       cls = sysfs_open_class("infiniband_verbs");
-       if (!cls) {
-               fprintf(stderr, PFX "Fatal: couldn't open sysfs class 
'infiniband_verbs'.\n");
-               return 0;
-       }
-
-       if (check_abi_version())
-               return 0;
-
-       verbs_dev_list = sysfs_get_class_devices(cls);
-       if (!verbs_dev_list) {
-               fprintf(stderr, PFX "Fatal: no infiniband class devices 
found.\n");
-               return 0;
+       for (sysfs_dev = sysfs_dev_list; sysfs_dev; sysfs_dev = 
sysfs_dev->next) {
+               if (sysfs_dev->have_driver)
+                       continue;
+               for (driver = driver_list; driver; driver = driver->next) {
+                       device = try_driver(driver->init_func, 
sysfs_dev->verbs_dev);
+                       if (device) {
+                               add_device(device, list, &num_devices, 
&list_size);
+                               sysfs_dev->have_driver = 1;
+                       }
+               }
        }
 
-       dlist_for_each_data(verbs_dev_list, verbs_dev, struct 
sysfs_class_device) {
-               device = init_drivers(verbs_dev);
-               if (device) {
-                       if (list_size <= num_devices) {
-                               list_size = list_size ? list_size * 2 : 1;
-                               new_list = realloc(*list, list_size * sizeof 
(struct ibv_device *));
-                               if (!new_list)
-                                       goto out;
-                               *list = new_list;
+out:
+       for (sysfs_dev = sysfs_dev_list, next_dev = sysfs_dev->next;
+            sysfs_dev;
+            sysfs_dev = next_dev, next_dev = sysfs_dev ? sysfs_dev->next : 
NULL) {
+               if (!sysfs_dev->have_driver) {
+                       fprintf(stderr, PFX "Warning: no userspace 
device-specific "
+                               " driver found for %s\n", 
sysfs_dev->verbs_dev->name);
+                       if (statically_linked)
+                               fprintf(stderr, "       When linking libibverbs 
statically, "
+                                       "driver must be statically linked 
too.\n");
+                       else {
+                               fprintf(stderr, "       driver search path: ");
+                               if (user_path)
+                                       fprintf(stderr, "%s:", user_path);
+                               fprintf(stderr, "%s\n", default_path);
                        }
-                       (*list)[num_devices++] = device;
                }
+               free(sysfs_dev);
        }
 
-out:
        return num_devices;
 }
--- libibverbs-1.0/README       (revision 9973)
+++ libibverbs-1.0/README       (working copy)
@@ -60,6 +60,23 @@ via the file /etc/security/limits.conf. 
 necessary if you are logging in via OpenSSH and your sshd is
 configured to use privilege separation.
 
+Static linking
+--------------
+
+In almost all cases it is better to dynamically link libibverbs into
+an application.  However, if you are forced to use static linking for
+libibverbs, then you will also have to link a device-specific
+userspace driver (such as libmthca, libipathverbs, libehca, etc)
+statically into your application.  This is because of limitations on
+dynamically loading new modules into a static executable.
+
+In particular, a static application can only be linked against a
+single device-specific driver, which means that the application will
+only work with a single type of device.  This limitation will be
+removed in future libibverbs releases, but this will require a change
+to the libibverbs ABI, so it cannot be done as part of the libibverbs
+1.0 release series.
+
 Valgrind support
 ----------------
 

_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to