On 07/22/2016 10:46 AM, Philipp Thomas wrote:
> To help a customer that complained about the time it takes for df to check
> one of their machines with nearly 22000 lines in /proc/self/mountinfo, my
> collegue came up with this patch that uses a hash table for seaching in 
> filter_mount_list() and get_dev(). Data from said machine:
> 
> Without patch:
> real    0m2.273s
> user    0m0.768s
> sys     0m1.500s
> 
> real    0m1.731s
> user    0m0.532s
> sys     0m1.188s
> 
> With patch:
> real    0m1.286s
> user    0m0.052s
> sys     0m1.232s
> 
> real    0m1.066s
> user    0m0.028s
> sys     0m1.032s
> 
> Is this patch acceptable?

Hi Philipp,

the patch was missing, so I attached it from here:

https://build.opensuse.org/package/view_file/Base:System/coreutils/coreutils-df-hash-in-filter.patch?expand=1

Maybe adding an IF_LINT'ed hash_free call would be worth;
otherwise it looks good to me.

Have a nice day,
Berny
From: Philipp Thomas <[email protected]>
Date: 2016-07-22 10:34:59+02:00
Subject: speed up df
References: 
Upstream: 


Use hash table for seaching in filter_mount_list() and get_dev().
Patch was done by Josef Cejka <[email protected]>.

---
 src/df.c |   80 ++++++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 62 insertions(+), 18 deletions(-)

Index: coreutils-8.25/src/df.c
===================================================================
--- coreutils-8.25.orig/src/df.c	2016-07-22 10:31:00.838791851 +0200
+++ coreutils-8.25/src/df.c	2016-07-22 10:33:37.285989052 +0200
@@ -34,6 +34,7 @@
 #include "mountlist.h"
 #include "quote.h"
 #include "find-mount-point.h"
+#include "hash.h"
 
 /* The official name of this program (e.g., no 'g' prefix).  */
 #define PROGRAM_NAME "df"
@@ -43,14 +44,16 @@
   proper_name ("David MacKenzie"), \
   proper_name ("Paul Eggert")
 
-/* Filled with device numbers of examined file systems to avoid
-   duplicates in output.  */
-static struct devlist
+struct devlist
 {
   dev_t dev_num;
   struct mount_entry *me;
   struct devlist *next;
-} *device_list;
+};
+
+/* Filled with device numbers of examined file systems to avoid
+   duplicates in output.  */
+static Hash_table *devlist_table;
 
 /* If true, show even file systems with zero size or
    uninteresting types.  */
@@ -603,6 +606,37 @@ excluded_fstype (const char *fstype)
   return false;
 }
 
+static size_t
+devlist_hash (void const *x, size_t table_size)
+{
+  struct devlist const *p = x;
+  return (uintmax_t) p->dev_num % table_size;
+}
+
+static bool
+devlist_compare (void const *x, void const *y)
+{
+  struct devlist const *a = x;
+  struct devlist const *b = y;
+  return a->dev_num == b->dev_num;
+}
+
+static struct devlist *
+devlist_for_dev (dev_t dev)
+{
+  struct devlist dev_entry;
+  dev_entry.dev_num = dev;
+  if (devlist_table == NULL)
+    return NULL;
+  return hash_lookup(devlist_table, &dev_entry);
+}
+
+static void
+devlist_free(void *p)
+{
+  free(p);
+}
+
 /* Filter mount list by skipping duplicate entries.
    In the case of duplicates - based on the device number - the mount entry
    with a '/' in its me_devname (i.e., not pseudo name like tmpfs) wins.
@@ -615,6 +649,20 @@ filter_mount_list (bool devices_only)
 {
   struct mount_entry *me;
 
+  /* temp list to keep entries ordered */
+  struct devlist *device_list = NULL;
+  int mount_list_size = 0;
+
+  for (me = mount_list; me; me = me->me_next)
+    mount_list_size++;
+
+  devlist_table = hash_initialize (mount_list_size, NULL,
+                                 devlist_hash,
+                                 devlist_compare,
+                                 devlist_free);
+  if (devlist_table == NULL)
+    xalloc_die ();
+
   /* Sort all 'wanted' entries into the list device_list.  */
   for (me = mount_list; me;)
     {
@@ -635,9 +683,7 @@ filter_mount_list (bool devices_only)
       else
         {
           /* If we've already seen this device...  */
-          for (devlist = device_list; devlist; devlist = devlist->next)
-            if (devlist->dev_num == buf.st_dev)
-              break;
+          devlist = devlist_for_dev(buf.st_dev);
 
           if (devlist)
             {
@@ -697,6 +743,8 @@ filter_mount_list (bool devices_only)
           devlist->dev_num = buf.st_dev;
           devlist->next = device_list;
           device_list = devlist;
+          if (hash_insert (devlist_table, devlist) == NULL)
+            xalloc_die ();
 
           me = me->me_next;
         }
@@ -711,28 +759,24 @@ filter_mount_list (bool devices_only)
         me = device_list->me;
         me->me_next = mount_list;
         mount_list = me;
-        /* Free devlist entry and advance.  */
-        struct devlist *devlist = device_list->next;
-        free (device_list);
-        device_list = devlist;
+        device_list = device_list->next;
       }
+
+      hash_free(devlist_table);
+      devlist_table = NULL;
   }
 }
 
+
 /* Search a mount entry list for device id DEV.
    Return the corresponding mount entry if found or NULL if not.  */
 
 static struct mount_entry const * _GL_ATTRIBUTE_PURE
 me_for_dev (dev_t dev)
 {
-  struct devlist *dl = device_list;
-
-  while (dl)
-    {
-      if (dl->dev_num == dev)
+  struct devlist *dl = devlist_for_dev(dev);
+  if (dl)
         return dl->me;
-      dl = dl->next;
-    }
 
   return NULL;
 }

Reply via email to