Hi!

Busybox uses bsearch() to locate the applet's main function in
find_applet_by_name(). Running 'make defconfig' results in 355 applets
being built and this in turn results in 8-9 calls to strcmp() on average
per busybox execution.

Maybe we should switch to using a simple static hashtable? The following
patch is a simple & dirty proof of concept to show what I mean. It modifies
applet_tables.c to generate a static hashtable containing indicies of fields
in applet_nameofs. I used a simple and fast hash function taken from
Robert Jenkins.

With this patch, on each execution and after the hash computation, the
number of calls to strcmp() has been limited to four at most, and mostly
it's just one or two. There are no calls to applet_name_compare() too.

The patch results in bigger code, but there's room for improvement as
we could probably get rid of some of the arrays generated by applet_tables
and unify the hashtables used in busybox applets.

If there's any interest, I can prepare a better, more memory-wise optimized
version.

Best regards,
Bartosz Golaszewski

---
 applets/applet_tables.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++-
 libbb/appletlib.c       | 45 ++++++++++++++++++++++++++++++++------
 2 files changed, 95 insertions(+), 8 deletions(-)

diff --git a/applets/applet_tables.c b/applets/applet_tables.c
index 94b974e..9656b5c 100644
--- a/applets/applet_tables.c
+++ b/applets/applet_tables.c
@@ -14,6 +14,7 @@
 #include <string.h>
 #include <stdio.h>
 #include <unistd.h>
+#include <stdint.h>
 
 #undef ARRAY_SIZE
 #define ARRAY_SIZE(x) ((unsigned)(sizeof(x) / sizeof((x)[0])))
@@ -42,6 +43,26 @@ enum { NUM_APPLETS = ARRAY_SIZE(applets) };
 
 static int offset[NUM_APPLETS];
 
+#define MAX_BUCKET_SIZE 16
+static int applet_hashtable[NUM_APPLETS][16];
+
+static unsigned jenkins_hash(const char *key, size_t len)
+{
+       unsigned hash, i;
+
+       for(hash = i = 0; i < len; ++i) {
+               hash += key[i];
+               hash += (hash << 10);
+               hash ^= (hash >> 6);
+       }
+
+       hash += (hash << 3);
+       hash ^= (hash >> 11);
+       hash += (hash << 15);
+
+       return hash;
+}
+
 static int cmp_name(const void *a, const void *b)
 {
        const struct bb_applet *aa = a;
@@ -51,7 +72,7 @@ static int cmp_name(const void *a, const void *b)
 
 int main(int argc, char **argv)
 {
-       int i;
+       int i, j;
        int ofs;
 //     unsigned MAX_APPLET_NAME_LEN = 1;
 
@@ -129,6 +150,41 @@ int main(int argc, char **argv)
        }
        printf("};\n");
 #endif
+
+       // Initialize local hashtable
+       for (i = 0; i < NUM_APPLETS; i++) {
+               for (j = 0; j < MAX_BUCKET_SIZE; j++)
+                       applet_hashtable[i][j] = -1;
+       }
+
+       // For each applet - place it in appropriate bucket
+       for (i = 0; i < NUM_APPLETS; i++) {
+               unsigned ind = jenkins_hash(applets[i].name,
+                                       strlen(applets[i].name)) % NUM_APPLETS;
+
+               for (j = 0; j < MAX_BUCKET_SIZE; j++) {
+                       if (applet_hashtable[ind][j] < 0) {
+                               applet_hashtable[ind][j] = i;
+                               break;
+                       }
+               }
+       }
+
+       // Create a static array for each bucket
+       for (i = 0; i < NUM_APPLETS; i++) {
+               printf("const int16_t bucket%d[] = { ", i);
+               for (j = 0; applet_hashtable[i][j] >= 0; j++) {
+                       printf("%d, ", applet_hashtable[i][j]);
+               }
+               printf(" -1 };\n");
+       }
+
+       // Create a static array of pointers to the buckets
+       printf("\nconst int16_t *applet_hashtab[] = {\n");
+       for (i = 0; i < NUM_APPLETS; i++)
+               printf("\tbucket%d,\n", i);
+       printf("};\n");
+
        //printf("#endif /* SKIP_definitions */\n");
 //     printf("\n");
 //     printf("#define MAX_APPLET_NAME_LEN %u\n", MAX_APPLET_NAME_LEN);
diff --git a/libbb/appletlib.c b/libbb/appletlib.c
index f7c416e..6be536d 100644
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@@ -52,7 +52,6 @@
 
 #include "usage_compressed.h"
 
-
 #if ENABLE_SHOW_USAGE && !ENABLE_FEATURE_COMPRESS_USAGE
 static const char usage_messages[] ALIGN1 = UNPACKED_USAGE;
 #else
@@ -140,23 +139,55 @@ void FAST_FUNC bb_show_usage(void)
 }
 
 #if NUM_APPLETS > 8
-static int applet_name_compare(const void *name, const void *idx)
+static unsigned jenkins_hash(const char *key, size_t len)
 {
-       int i = (int)(ptrdiff_t)idx - 1;
-       return strcmp(name, APPLET_NAME(i));
+       unsigned hash, i;
+
+       for(hash = i = 0; i < len; ++i) {
+               hash += key[i];
+               hash += (hash << 10);
+               hash ^= (hash >> 6);
+       }
+
+       hash += (hash << 3);
+       hash ^= (hash >> 11);
+       hash += (hash << 15);
+
+       return hash;
 }
+
+//static int applet_name_compare(const void *name, const void *idx)
+//{
+//     int i = (int)(ptrdiff_t)idx - 1;
+//     return strcmp(name, APPLET_NAME(i));
+//}
 #endif
 int FAST_FUNC find_applet_by_name(const char *name)
 {
 #if NUM_APPLETS > 8
        /* Do a binary search to find the applet entry given the name. */
-       const char *p;
-       p = bsearch(name, (void*)(ptrdiff_t)1, ARRAY_SIZE(applet_main), 1, 
applet_name_compare);
+       //const char *p;
+       //p = bsearch(name, (void*)(ptrdiff_t)1, ARRAY_SIZE(applet_main), 1, 
applet_name_compare);
+
+       unsigned ind;
+       int ret = -1, i = 0;
+
+       ind = jenkins_hash(name, strlen(name)) % NUM_APPLETS;
+       while (applet_hashtab[ind][i] != -1) {
+               if (strcmp(name, APPLET_NAME(applet_hashtab[ind][i])) == 0) {
+                       ret = applet_hashtab[ind][i];
+                       break;
+               }
+               ++i;
+       }
+
+       return ret;
+
        /*
         * if (!p) return -1;
         * ^^^^^^^^^^^^^^^^^^ the code below will do this if p == NULL :)
         */
-       return (int)(ptrdiff_t)p - 1;
+       //return (int)(ptrdiff_t)p - 1;
 #else
        /* A version which does not pull in bsearch */
        int i = 0;
-- 
1.9.1

_______________________________________________
busybox mailing list
[email protected]
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to