The string of applet names seems ripe for compression:  it has a limited
range of characters and many common substrings.  If the compression is
too aggressive, though, the code required to handle it causes bloat.

Recursively replace common pairs of characters with a character having
its top bit set; the lower 7 bits being an index into an array of
character pairs.  This is only done if it results in a net saving,
otherwise the current code is used.

Provide an implementation of find_applet_by_name() which uses binary
search:  this reduces the number of string comparisons from an average
of 27.9 per lookup to 7.7.  This is important because of the added cost
of expanding strings before comparison.  (Compressing the input string
is uncompetitive.)

The new version of find_applet_by_name() is about 8% faster than the
old one when strings aren't abbreviated.  It isn't used in that case,
though, as it's larger than the current implementation.

When strings are abbreviated the new find_applet_by_name() is about
34% slower than currently.  (All comparisons are for the default
configuration.)

function                                             old     new   delta
applet_common                                          -     256    +256
find_applet_by_name                                  112     175     +63
expand_char                                            -      40     +40
expand_name                                            -      31     +31
run_applet_and_exit                                  675     697     +22
applet_nameofs                                        14      30     +16
applet_names                                        2536    1630    -906
------------------------------------------------------------------------------
(add/remove: 3/0 grow/shrink: 3/1 up/down: 428/-906)         Total: -478 bytes
   text    data     bss     dec     hex filename
 764477    2059    8808  775344   bd4b0 busybox_old
 763999    2059    8832  774890   bd2ea busybox_unstripped

Signed-off-by: Ron Yorston <[email protected]>
---
 applets/applet_tables.c | 260 +++++++++++++++++++++++++++++++++++++++++-------
 include/libbb.h         |   1 +
 libbb/appletlib.c       | 113 +++++++++++++++++++--
 libbb/lineedit.c        |   8 +-
 shell/ash.c             |   2 +-
 5 files changed, 340 insertions(+), 44 deletions(-)

diff --git a/applets/applet_tables.c b/applets/applet_tables.c
index 843f2ec..b8dd991 100644
--- a/applets/applet_tables.c
+++ b/applets/applet_tables.c
@@ -34,6 +34,7 @@ struct bb_applet {
        /* true if instead of fork(); exec("applet"); waitpid();
         * one can simply call applet_main(argc,argv); */
        unsigned char nofork;
+       char *abbrev;
 };
 
 /* Define struct bb_applet applets[] */
@@ -48,6 +49,21 @@ static int cmp_name(const void *a, const void *b)
        return strcmp(aa->name, bb->name);
 }
 
+#define MAXSUB 1000
+#define SUBLEN 2
+
+struct bb_substr {
+       char str[SUBLEN+1];
+       int save;
+};
+
+static int cmp_substr(const void *a, const void *b)
+{
+       const struct bb_substr *aa = a;
+       const struct bb_substr *bb = b;
+       return bb->save - aa->save;
+}
+
 static int str_isalnum_(const char *s)
 {
        while (*s) {
@@ -58,25 +74,31 @@ static int str_isalnum_(const char *s)
        return 1;
 }
 
+void print_char(char c)
+{
+       if (c & 0x80) {
+               printf("\\%o", (unsigned int)(c) & 0xff);
+       }
+       else {
+               putchar(c);
+       }
+}
+
 int main(int argc, char **argv)
 {
-       int i, j;
-
-       // In find_applet_by_name(), before linear search, narrow it down
-       // by looking at N "equidistant" names. With ~350 applets:
-       // KNOWN_APPNAME_OFFSETS  cycles
-       //                     0    9057
-       //                     2    4604 + ~100 bytes of code
-       //                     4    2407 + 4 bytes
-       //                     8    1342 + 8 bytes
-       //                    16     908 + 16 bytes
-       //                    32     884 + 32 bytes
-       // With 8, int16_t applet_nameofs[] table has 7 elements.
+       int i, j, k, n;
+       struct bb_substr substr[MAXSUB];
+       int minsub, numsub;
+       int minidx, maxidx, index;
+       char *s;
+       int MAX_APPLET_NAME_LEN = 0;
        int KNOWN_APPNAME_OFFSETS = 8;
-       // With 128 applets we do two linear searches, with 1..7 strcmp's in 
the first one
-       // and 1..16 strcmp's in the second. With 256 apps, second search does 
1..32 strcmp's.
-       if (NUM_APPLETS < 128)
+       int ABBREV_APPLET_NAMES = 1;
+
+       if (NUM_APPLETS < 128) {
                KNOWN_APPNAME_OFFSETS = 4;
+               ABBREV_APPLET_NAMES = 0;
+       }
        if (NUM_APPLETS < 32)
                KNOWN_APPNAME_OFFSETS = 0;
 
@@ -99,6 +121,174 @@ int main(int argc, char **argv)
                printf("#define SINGLE_APPLET_MAIN %s_main\n", applets[0].main);
        }
 
+       for (i = 0; i < NUM_APPLETS; i++) {
+               const char *t;
+
+               applets[i].abbrev = strdup(applets[i].name);
+               for (t = applets[i].name; ABBREV_APPLET_NAMES && *t; t++) {
+                       /* if any char has its top bit set we can't use 
substrings */
+                       if (*t & 0x80)
+                               ABBREV_APPLET_NAMES = 0;
+               }
+               if (MAX_APPLET_NAME_LEN < strlen(applets[i].name))
+                       MAX_APPLET_NAME_LEN = strlen(applets[i].name);
+       }
+
+       /* determine if applet names can be compressed */
+       minsub = numsub = 0;
+       minidx = index = 0;
+       maxidx = 96;    /* leave a few slots free initially */
+       if (ABBREV_APPLET_NAMES) {
+               int count, oldlen, newlen, newtot;
+
+ again:
+               /* make a first pass looking for common substrings */
+               for (i = 0; i < NUM_APPLETS; i++) {
+                       int len;
+                       char sub[SUBLEN+1];
+
+                       if ((len=strlen(applets[i].abbrev)) < SUBLEN)
+                               continue;
+
+                       for (j = 0; j < len-SUBLEN+1; j++) {
+                               strncpy(sub, applets[i].abbrev+j, SUBLEN);
+                               sub[SUBLEN] = '\0';
+
+                               /* skip substrings we've seen before */
+                               for (n = minsub; n < numsub; n++) {
+                                       if (strcmp(sub, substr[n].str) == 0) {
+                                               break;
+                                       }
+                               }
+                               if (n != numsub) {
+                                       continue;
+                               }
+
+                               count = 1;
+                               for (k = i + 1; k < NUM_APPLETS; k++) {
+                                       if (strstr(applets[k].abbrev, sub)) {
+                                               ++count;
+                                       }
+                               }
+
+                               /* only record substrings that result in a 
saving */
+                               if (count > 1) {
+                                       int save = count * SUBLEN;
+                                       int cost = count + SUBLEN;
+                                       if ( save > cost && numsub < MAXSUB ) {
+                                               strcpy(substr[numsub].str, sub);
+                                               substr[numsub++].save = save - 
cost;
+                                       }
+                               }
+                       }
+               }
+
+               /* sort by number of bytes saved */
+               qsort(substr+minsub, numsub-minsub, sizeof(substr[0]), 
cmp_substr);
+
+               for (j = minsub, index = minidx; j < numsub; j++) {
+                       /* check that each substring still gives a saving */
+                       count = 0;
+                       for (i = 0; i < NUM_APPLETS && index < maxidx; i++) {
+                               if (strstr(applets[i].abbrev, substr[j].str) != 
NULL) {
+                                       ++count;
+                               }
+                       }
+
+                       if ( count*SUBLEN > count+SUBLEN ) {
+                               /* replace each instance of substring with a 
character with */
+                               /* its top bit set, the remaining 7 bits are 
its index */
+                               for (i = 0; i < NUM_APPLETS; i++) {
+                                       while ((s=strstr(applets[i].abbrev, 
substr[j].str))) {
+                                               *s++ = 0x80 + index;
+                                               strcpy(s, s+SUBLEN-1);
+                                       }
+                               }
+                               ++index;
+                       }
+                       else {
+                               /* this substring no longer gives a saving */
+                               substr[j].save = 0;
+                       }
+               }
+
+               /* recurse a couple of times */
+               if (maxidx == 96) {
+                       minsub = numsub;
+                       minidx = index;
+                       maxidx = 120;
+                       goto again;
+               }
+               else if (maxidx == 120) {
+                       minsub = numsub;
+                       minidx = index;
+                       maxidx = 128;
+                       goto again;
+               }
+
+               /* calculate lengths of new and old strings */
+               oldlen = newlen = 1;
+               for (i = 0; i < NUM_APPLETS; i++) {
+                       oldlen += strlen(applets[i].name) + 1;
+                       newlen += strlen(applets[i].abbrev) + 1;
+               }
+
+               /* only use substrings if the saving outweighs the larger code 
*/
+               newtot = newlen + (index*SUBLEN) + 160 + 
2*KNOWN_APPNAME_OFFSETS;
+               ABBREV_APPLET_NAMES = (newtot < oldlen);
+
+               if (ABBREV_APPLET_NAMES) {
+                       /* use some of our savings to speed up the linear 
search */
+                       KNOWN_APPNAME_OFFSETS *= 2;
+                       printf("/* strings reduced from %d to %d = (%d + %d) 
bytes */\n",
+                               oldlen, newlen+(index*SUBLEN), newlen, 
index*SUBLEN);
+               }
+       }
+
+       printf("#define ABBREV_APPLET_NAMES %d\n\n", ABBREV_APPLET_NAMES);
+
+       //printf("#ifndef SKIP_definitions\n");
+       if (ABBREV_APPLET_NAMES) {
+               char c = 0x80;
+
+               printf("static const char applet_common[][2] ALIGN1 = {\n");
+               for (n = 0; n < numsub; n++) {
+                       if (substr[n].save != 0) {
+                               printf("/* ");
+                               print_char(c++);
+                               printf(" */ ");
+                               printf("{'");
+                               print_char(substr[n].str[0]);
+                               printf("', '");
+                               print_char(substr[n].str[1]);
+                               printf("'},\n");
+                       }
+               }
+               printf("};\n\n");
+
+               printf("const char applet_names[] ALIGN1 = \"\"\n");
+               for (i = 0; i < NUM_APPLETS; i++) {
+                       putchar('"');
+                       for (s = applets[i].abbrev; *s; s++) {
+                               if (*s & 0x80) {
+                                       printf("\\%o", (unsigned int)(*s) & 
0xff);
+                               }
+                               else {
+                                       putchar(*s);
+                               }
+                       }
+                       printf("\" \"\\0\"\n");
+               }
+               printf(";\n\n");
+       }
+       else {
+               printf("const char applet_names[] ALIGN1 = \"\"\n");
+               for (i = 0; i < NUM_APPLETS; i++) {
+                       printf("\"%s\" \"\\0\"\n", applets[i].name);
+               }
+               printf(";\n\n");
+       }
+
        printf("#define KNOWN_APPNAME_OFFSETS %u\n\n", KNOWN_APPNAME_OFFSETS);
        if (KNOWN_APPNAME_OFFSETS > 0) {
                int ofs, offset[KNOWN_APPNAME_OFFSETS], 
index[KNOWN_APPNAME_OFFSETS];
@@ -109,26 +299,20 @@ int main(int argc, char **argv)
                        for (j = 0; j < KNOWN_APPNAME_OFFSETS; j++)
                                if (i == index[j])
                                        offset[j] = ofs;
-                       ofs += strlen(applets[i].name) + 1;
+                       if (i == NUM_APPLETS-1)
+                               printf("#define LAST_APPLET_NAME_OFFSET 
%d\n\n", ofs);
+                       ofs += strlen(ABBREV_APPLET_NAMES ?
+                                       applets[i].abbrev : applets[i].name) + 
1;
                }
                /* If the list of names is too long refuse to proceed */
                if (ofs > 0xffff)
                        return 1;
-               printf("const uint16_t applet_nameofs[] ALIGN2 = {\n");
+               printf("static const uint16_t applet_nameofs[] ALIGN2 = {\n");
                for (i = 1; i < KNOWN_APPNAME_OFFSETS; i++)
                        printf("%d,\n", offset[i]);
                printf("};\n\n");
        }
 
-       //printf("#ifndef SKIP_definitions\n");
-       printf("const char applet_names[] ALIGN1 = \"\"\n");
-       for (i = 0; i < NUM_APPLETS; i++) {
-               printf("\"%s\" \"\\0\"\n", applets[i].name);
-//             if (MAX_APPLET_NAME_LEN < strlen(applets[i].name))
-//                     MAX_APPLET_NAME_LEN = strlen(applets[i].name);
-       }
-       printf(";\n\n");
-
        for (i = 0; i < NUM_APPLETS; i++) {
                if (str_isalnum_(applets[i].name))
                        printf("#define APPLET_NO_%s %d\n", applets[i].name, i);
@@ -190,26 +374,32 @@ int main(int argc, char **argv)
        printf("};\n");
 #endif
        //printf("#endif /* SKIP_definitions */\n");
-//     printf("\n");
-//     printf("#define MAX_APPLET_NAME_LEN %u\n", MAX_APPLET_NAME_LEN);
+       printf("\n");
+       printf("#define MAX_APPLET_NAME_LEN %u\n", MAX_APPLET_NAME_LEN);
 
        if (argv[2]) {
-               char line_old[80];
-               char line_new[80];
+               char line_old[2][80];
+               char line_new[2][80];
                FILE *fp;
 
-               line_old[0] = 0;
+               line_old[0][0] = 0;
+               line_old[1][0] = 0;
                fp = fopen(argv[2], "r");
                if (fp) {
-                       fgets(line_old, sizeof(line_old), fp);
+                       fgets(line_old[0], sizeof(line_old[0]), fp);
+                       fgets(line_old[1], sizeof(line_old[1]), fp);
                        fclose(fp);
                }
-               sprintf(line_new, "#define NUM_APPLETS %u\n", NUM_APPLETS);
-               if (strcmp(line_old, line_new) != 0) {
+               sprintf(line_new[0], "#define NUM_APPLETS %u\n", NUM_APPLETS);
+               sprintf(line_new[1], ABBREV_APPLET_NAMES ?
+                               "\n" : "#define expand_name(n) (n)\n");
+               if (strcmp(line_old[0], line_new[0]) != 0 ||
+                               strcmp(line_old[1], line_new[1]) != 0) {
                        fp = fopen(argv[2], "w");
                        if (!fp)
                                return 1;
-                       fputs(line_new, fp);
+                       fputs(line_new[0], fp);
+                       fputs(line_new[1], fp);
                }
        }
 
diff --git a/include/libbb.h b/include/libbb.h
index 111dd66..714283b 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -1229,6 +1229,7 @@ const struct hwtype *get_hwntype(int type) FAST_FUNC;
 
 #ifndef BUILD_INDIVIDUAL
 extern int find_applet_by_name(const char *name) FAST_FUNC;
+extern const char *expand_name(const char *name) FAST_FUNC;
 /* Returns only if applet is not found. */
 extern void run_applet_and_exit(const char *name, char **argv) FAST_FUNC;
 extern void run_applet_no_and_exit(int a, char **argv) NORETURN FAST_FUNC;
diff --git a/libbb/appletlib.c b/libbb/appletlib.c
index b682e6b..aef98cc 100644
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@@ -139,6 +139,7 @@ void FAST_FUNC bb_show_usage(void)
        xfunc_die();
 }
 
+#if !ABBREV_APPLET_NAMES
 int FAST_FUNC find_applet_by_name(const char *name)
 {
        unsigned i, max;
@@ -265,6 +266,104 @@ int FAST_FUNC find_applet_by_name(const char *name)
        return -1;
 #endif
 }
+#define expand_name(n) (n)
+#else /* ABBREV_APPLET_NAMES */
+static char *expand_char(char *t, int c)
+{
+       if (c & 0x80) {
+               /* substitute common substring for char with top bit set */
+               const char *u = applet_common[c & 0x7f];
+               t = expand_char(t, u[0]);
+               t = expand_char(t, u[1]);
+       }
+       else {
+               *t++ = c;
+       }
+
+       return t;
+}
+
+const char * FAST_FUNC expand_name(const char *name)
+{
+       static char expand[MAX_APPLET_NAME_LEN+1];
+       char *t = expand;
+
+       while (*name)
+               t = expand_char(t, *name++);
+       *t = '\0';
+
+       return expand;
+}
+
+int FAST_FUNC find_applet_by_name(const char *name)
+{
+       int i, ret, lower, upper, middle;
+
+       /* Do a binary search to find the applet entry given the name. */
+
+       lower = 0;
+       upper = LAST_APPLET_NAME_OFFSET;
+       do {
+               middle = (upper + lower) >> 1;
+               /* the new index is probably not at the start of a name */
+               /* first move forward to the start of the next name... */
+               while (applet_names[middle++])
+                       continue;
+               if (middle == upper) {
+                       /* ... if that didn't work go back to start of previous 
name, */
+                       /* taking care not to fall off the start of the string 
*/
+                       do {
+                               middle--;
+                       } while (middle && applet_names[middle-1]);
+                       if (middle == lower) {
+                               /*
+                                * End of binary search.  The bounds are either 
values
+                                * that were previously 'middle' and have been 
checked
+                                * already or are 0 or LAST_APPLET_NAME_OFFSET, 
which
+                                * still need to be checked.
+                                */
+                               if (lower == 0)
+                                       /* middle = lower */;
+                               else if (upper == LAST_APPLET_NAME_OFFSET)
+                                       middle = upper;
+                               else
+                                       break;
+                       }
+               }
+               ret = strcmp(name, expand_name(applet_names+middle));
+               if (ret < 0)
+                       upper = middle;
+               else if (ret > 0)
+                       lower = middle;
+               else
+                       goto found;
+       } while (lower != upper);
+
+       return -1;
+
+ found:
+       /*
+        * Find index of name by linear search, using the known offsets to
+        * limit the range we need to search.
+        */
+       lower = 0;
+       ret = NUM_APPLETS * (KNOWN_APPNAME_OFFSETS-1);
+       for (i = KNOWN_APPNAME_OFFSETS-2; i >= 0; i--) {
+               if (middle >= applet_nameofs[i]) {
+                       lower = applet_nameofs[i];
+                       ret /= KNOWN_APPNAME_OFFSETS;
+                       break;
+               }
+               ret -= NUM_APPLETS;
+       }
+       while (lower < middle) {
+               lower += strlen(applet_names+lower) + 1;
+               ++ret;
+       }
+
+       return ret;
+}
+#endif
 
 
 void lbb_prepare(const char *applet
@@ -678,7 +777,7 @@ static void install_links(const char *busybox, int 
use_symbolic_links,
         * busybox.h::bb_install_loc_t, or else... */
        int (*lf)(const char *, const char *);
        char *fpc;
-        const char *appname = applet_names;
+       const char *appname = applet_names;
        unsigned i;
        int rc;
 
@@ -689,7 +788,7 @@ static void install_links(const char *busybox, int 
use_symbolic_links,
        for (i = 0; i < ARRAY_SIZE(applet_main); i++) {
                fpc = concat_path_file(
                                custom_install_dir ? custom_install_dir : 
install_dir[APPLET_INSTALL_LOC(i)],
-                               appname);
+                               expand_name(appname));
                // debug: bb_error_msg("%slinking %s to busybox",
                //              use_symbolic_links ? "sym" : "", fpc);
                rc = lf(busybox, fpc);
@@ -760,7 +859,8 @@ static int busybox_main(char **argv)
                /* prevent last comma to be in the very last pos */
                output_width--;
                while (*a) {
-                       int len2 = strlen(a) + 2;
+                       const char *name = expand_name(a);
+                       int len2 = strlen(name) + 2;
                        if (col >= (int)output_width - len2) {
                                full_write2_str(",\n");
                                col = 0;
@@ -771,9 +871,10 @@ static int busybox_main(char **argv)
                        } else {
                                full_write2_str(", ");
                        }
-                       full_write2_str(a);
+                       full_write2_str(name);
                        col += len2;
-                       a += len2 - 1;
+                       while (*a++ != '\0')
+                               continue;
                }
                full_write2_str("\n\n");
                return 0;
@@ -788,7 +889,7 @@ static int busybox_main(char **argv)
                        if (argv[1][6]) /* --list-full? */
                                
full_write2_str(install_dir[APPLET_INSTALL_LOC(i)] + 1);
 # endif
-                       full_write2_str(a);
+                       full_write2_str(expand_name(a));
                        full_write2_str("\n");
                        i++;
                        while (*a++ != '\0')
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index 3e62f46..83c13ae 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -780,8 +780,12 @@ static NOINLINE unsigned complete_cmd_dir_file(const char 
*command, int type)
                const char *p = applet_names;
 
                while (*p) {
-                       if (strncmp(pfind, p, pf_len) == 0)
-                               add_match(xstrdup(p));
+                       const char *name = expand_name(p);
+                       int ret = strncmp(pfind, name, pf_len);
+                       if (ret < 0)
+                               break;
+                       else if (ret == 0)
+                               add_match(xstrdup(name));
                        while (*p++ != '\0')
                                continue;
                }
diff --git a/shell/ash.c b/shell/ash.c
index da9c950..7800f32 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -12592,7 +12592,7 @@ helpcmd(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
        {
                const char *a = applet_names;
                while (*a) {
-                       col += out1fmt("%c%s", ((col == 0) ? '\t' : ' '), a);
+                       col += out1fmt("%c%s", ((col == 0) ? '\t' : ' '), 
expand_name(a));
                        if (col > 60) {
                                out1fmt("\n");
                                col = 0;
-- 
2.5.5

_______________________________________________
busybox mailing list
[email protected]
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to