On Tue, Mar 29, 2016 at 5:31 PM, Ron Yorston <[email protected]> wrote:
> The array applet_nameofs consumes two bytes per applet.  It encodes
>
>    nofork/noexec flags
>    suid flags
>    the offset of the applet name in the applet_name string
>
> Change the applet_table build tool to store the flags in two separate
> arrays (applet_flags and applet_suid).  Replace applet_nameofs with a
> smaller version that only stores a limited number of offsets.
>
> This requires changes to the macros APPLET_IS_NOFORK, APPLET_IS_NOEXEC
> and APPLET_SUID.  The APPLET_NAME macro is replaced by a function,
> get_applet_name, which uses a linear search though the applet_name
> string to find the applet name given its number.  As an optimisation
> get_applet_name stores the last applet number/name looked up.
>
> There are three cases where get_applet_name is used:
>
> 1) In install_links applets are processed in numerical order.  Every
>    time get_applet_name is called it is to search for the applet
>    number following the last stored value.
>
> 2) In find_applet_by_name get_applet_name is called in the comparison
>    function of a binary search.  Replace this with a linear search which
>    uses the known offsets to provide an improved starting value.
>
>    According to Valgrind the original find_applet_by_name required
>    353 cycles per call, averaged over all names.  Adjusting the number
>    of known offsets allows space to be traded off against execution time:
>
>       KNOWN_OFFSETS     cycles     bytes (wrt KNOWN_OFFSETS = 0)
>
>            0             9057        -
>            2             4604       32
>            4             2407       75
>            8             1342       98
>           16              908      130
>           32              884      194
>
>    This patch uses KNOWN_OFFSETS = 8.
>
> 3) In run_applet_no_and_exit and run_nofork_applet the applet number
>    passed to get_applet_name was obtained by an earlier call to
>    find_applet_by_name.  The stored value is often exactly the one
>    required so no linear search is needed.
>
> v2:
>    Remove some dead code from the applet_table tool;
>    Treat the applet in the middle of the table as a special case.
>
> v3:
>    Use the middle applet to adjust the start of the linear search as
>    well as the last applet found.
>
> v4:
>    Use an augmented linear search in find_applet_by_name.
>    Drop the special treatment of the middle name from get_applet_name:
>    most of the advantage now derives from the last stored value.
>
> v5:
>    Don't store index in applet_nameofs, it can be calculated.
>
> function                                             old     new   delta
> find_applet_by_name                                   31     132    +101
> applet_suid                                            -      91     +91
> get_applet_name                                        -      60     +60
> static.last_applet_name                                -       8      +8
> static.last_applet_no                                  -       4      +4
> run_applet_no_and_exit                               480     481      +1
> run_applet_and_exit                                  755     742     -13
> applet_name_compare                                   31       -     -31
> .rodata                                           155866  155243    -623
> applet_nameofs                                       728      14    -714
> ------------------------------------------------------------------------------
> (add/remove: 4/1 grow/shrink: 2/3 up/down: 265/-1381)       Total: -1116 bytes
>    text    data     bss     dec     hex filename
>  823821    4078    9088  836987   cc57b busybox_old
>  823316    4086    9096  836498   cc392 busybox_unstripped
>
> Signed-off-by: Ron Yorston <[email protected]>

Good.

Please try and if possible, benchmark the attached patch.


function                                             old     new   delta
find_applet_by_name                                   25     125    +100
applet_suid                                            -      92     +92
run_applet_no_and_exit                               452     460      +8
run_applet_and_exit                                  695     697      +2
applet_name_compare                                   31       -     -31
applet_nameofs                                       734      14    -720
------------------------------------------------------------------------------
(add/remove: 1/1 grow/shrink: 3/1 up/down: 202/-751)         Total: -549 bytes
   text       data        bss        dec        hex    filename
 925464        906      17160     943530      e65aa    busybox_old
 924915        906      17160     942981      e6385    busybox_unstripped
diff --git a/applets/applet_tables.c b/applets/applet_tables.c
index 92bf1e4..2ad9c0d 100644
--- a/applets/applet_tables.c
+++ b/applets/applet_tables.c
@@ -41,8 +41,6 @@ struct bb_applet {
 
 enum { NUM_APPLETS = ARRAY_SIZE(applets) };
 
-static int offset[NUM_APPLETS];
-
 static int cmp_name(const void *a, const void *b)
 {
        const struct bb_applet *aa = a;
@@ -60,22 +58,37 @@ static int str_isalnum_(const char *s)
        return 1;
 }
 
+// Before linear search, try to narrow it down by looking at N "equidistant" 
names:
+// KNOWN_APPNAME_OFFSETS  cycles  code_size
+//                     0    9057
+//                     2    4604        +32
+//                     4    2407        +75
+//                     8    1342        +98
+//                    16     908       +130
+//                    32     884       +194
+// With 8, applet_nameofs[] table has 7 elements.
+#define KNOWN_APPNAME_OFFSETS 8
+
 int main(int argc, char **argv)
 {
-       int i;
-       int ofs;
+       int i, j;
+       int ofs, offset[KNOWN_APPNAME_OFFSETS], index[KNOWN_APPNAME_OFFSETS];
 //     unsigned MAX_APPLET_NAME_LEN = 1;
 
        qsort(applets, NUM_APPLETS, sizeof(applets[0]), cmp_name);
 
+       for (i = 0; i < KNOWN_APPNAME_OFFSETS; i++)
+               index[i] = i * NUM_APPLETS / KNOWN_APPNAME_OFFSETS;
+
        ofs = 0;
        for (i = 0; i < NUM_APPLETS; i++) {
-               offset[i] = ofs;
+               for (j = 0; j < KNOWN_APPNAME_OFFSETS; j++)
+                       if (i == index[j])
+                               offset[j] = ofs;
                ofs += strlen(applets[i].name) + 1;
        }
-       /* We reuse 4 high-order bits of offset array for other purposes,
-        * so if they are indeed needed, refuse to proceed */
-       if (ofs > 0xfff)
+       /* If the list of names is too long refuse to proceed */
+       if (ofs > 0xffff)
                return 1;
        if (!argv[1])
                return 1;
@@ -94,7 +107,17 @@ int main(int argc, char **argv)
                printf("#define SINGLE_APPLET_STR \"%s\"\n", applets[0].name);
                printf("#define SINGLE_APPLET_MAIN %s_main\n", applets[0].main);
        }
-       printf("\n");
+
+       if (KNOWN_APPNAME_OFFSETS > 0 && NUM_APPLETS > 2*KNOWN_APPNAME_OFFSETS) 
{
+               printf("#define KNOWN_APPNAME_OFFSETS %u\n\n", 
KNOWN_APPNAME_OFFSETS);
+               printf("const uint16_t applet_nameofs[] ALIGN2 = {\n");
+               for (i = 1; i < KNOWN_APPNAME_OFFSETS; i++)
+                       printf("%d,\n", offset[i]);
+               printf("};\n\n");
+       }
+       else {
+               printf("#define KNOWN_APPNAME_OFFSETS 0\n\n");
+       }
 
        //printf("#ifndef SKIP_definitions\n");
        printf("const char applet_names[] ALIGN1 = \"\"\n");
@@ -119,20 +142,39 @@ int main(int argc, char **argv)
        printf("};\n");
        printf("#endif\n\n");
 
-       printf("const uint16_t applet_nameofs[] ALIGN2 = {\n");
-       for (i = 0; i < NUM_APPLETS; i++) {
-               printf("0x%04x,\n",
-                       offset[i]
 #if ENABLE_FEATURE_PREFER_APPLETS
-                       + (applets[i].nofork << 12)
-                       + (applets[i].noexec << 13)
+       printf("const uint8_t applet_flags[] ALIGN1 = {\n");
+       i = 0;
+       while (i < NUM_APPLETS) {
+               int v = applets[i].nofork + (applets[i].noexec << 1);
+               if (++i < NUM_APPLETS)
+                       v |= (applets[i].nofork + (applets[i].noexec << 1)) << 
2;
+               if (++i < NUM_APPLETS)
+                       v |= (applets[i].nofork + (applets[i].noexec << 1)) << 
4;
+               if (++i < NUM_APPLETS)
+                       v |= (applets[i].nofork + (applets[i].noexec << 1)) << 
6;
+               printf("0x%02x,\n", v);
+               i++;
+       }
+       printf("};\n\n");
 #endif
+
 #if ENABLE_FEATURE_SUID
-                       + (applets[i].need_suid << 14) /* 2 bits */
-#endif
-               );
+       printf("const uint8_t applet_suid[] ALIGN1 = {\n");
+       i = 0;
+       while (i < NUM_APPLETS) {
+               int v = applets[i].need_suid; /* 2 bits */
+               if (++i < NUM_APPLETS)
+                       v |= applets[i].need_suid << 2;
+               if (++i < NUM_APPLETS)
+                       v |= applets[i].need_suid << 4;
+               if (++i < NUM_APPLETS)
+                       v |= applets[i].need_suid << 6;
+               printf("0x%02x,\n", v);
+               i++;
        }
        printf("};\n\n");
+#endif
 
 #if ENABLE_FEATURE_INSTALLER
        printf("const uint8_t applet_install_loc[] ALIGN1 = {\n");
diff --git a/include/busybox.h b/include/busybox.h
index b1e31e5..737627b 100644
--- a/include/busybox.h
+++ b/include/busybox.h
@@ -15,25 +15,20 @@ PUSH_AND_SET_FUNCTION_VISIBILITY_TO_HIDDEN
 /* Keep in sync with applets/applet_tables.c! */
 extern const char applet_names[] ALIGN1;
 extern int (*const applet_main[])(int argc, char **argv);
-extern const uint16_t applet_nameofs[];
+extern const uint8_t applet_flags[] ALIGN1;
+extern const uint8_t applet_suid[] ALIGN1;
 extern const uint8_t applet_install_loc[] ALIGN1;
 
-#if ENABLE_FEATURE_SUID || ENABLE_FEATURE_PREFER_APPLETS
-# define APPLET_NAME(i) (applet_names + (applet_nameofs[i] & 0x0fff))
-#else
-# define APPLET_NAME(i) (applet_names + applet_nameofs[i])
-#endif
-
 #if ENABLE_FEATURE_PREFER_APPLETS
-# define APPLET_IS_NOFORK(i) (applet_nameofs[i] & (1 << 12))
-# define APPLET_IS_NOEXEC(i) (applet_nameofs[i] & (1 << 13))
+# define APPLET_IS_NOFORK(i) (applet_flags[(i)/4] & (1 << (2 * ((i)%4))))
+# define APPLET_IS_NOEXEC(i) (applet_flags[(i)/4] & (1 << ((2 * ((i)%4))+1)))
 #else
 # define APPLET_IS_NOFORK(i) 0
 # define APPLET_IS_NOEXEC(i) 0
 #endif
 
 #if ENABLE_FEATURE_SUID
-# define APPLET_SUID(i) ((applet_nameofs[i] >> 14) & 0x3)
+# define APPLET_SUID(i) ((applet_suid[(i)/4] >> (2 * ((i)%4)) & 3))
 #endif
 
 #if ENABLE_FEATURE_INSTALLER
diff --git a/libbb/appletlib.c b/libbb/appletlib.c
index 95e589e..c8637d3 100644
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@@ -139,36 +139,56 @@ void FAST_FUNC bb_show_usage(void)
        xfunc_die();
 }
 
-#if NUM_APPLETS > 8
-static int applet_name_compare(const void *name, const void *idx)
-{
-       int i = (int)(ptrdiff_t)idx - 1;
-       return strcmp(name, APPLET_NAME(i));
-}
-#endif
 int FAST_FUNC find_applet_by_name(const char *name)
 {
-#if NUM_APPLETS > 8
-       /* Do a binary search to find the applet entry given the name. */
+       unsigned i, max;
+       int j;
        const char *p;
-       p = bsearch(name, (void*)(ptrdiff_t)1, ARRAY_SIZE(applet_main), 1, 
applet_name_compare);
-       /*
-        * if (!p) return -1;
-        * ^^^^^^^^^^^^^^^^^^ the code below will do this if p == NULL :)
-        */
-       return (int)(ptrdiff_t)p - 1;
+
+       p = applet_names;
+       i = 0;
+#if KNOWN_APPNAME_OFFSETS <= 0
+       max = NUM_APPLETS;
 #else
-       /* A version which does not pull in bsearch */
-       int i = 0;
-       const char *p = applet_names;
-       while (i < NUM_APPLETS) {
-               if (strcmp(name, p) == 0)
-                       return i;
-               p += strlen(p) + 1;
+       max = NUM_APPLETS * KNOWN_APPNAME_OFFSETS;
+       for (j = ARRAY_SIZE(applet_nameofs)-1; j >= 0; j--) {
+               const char *pp = applet_names + applet_nameofs[j];
+               if (strcmp(name, pp) >= 0) {
+                       //bb_error_msg("name:'%s' >= pp:'%s'", name, pp);
+                       p = pp;
+                       i = max - NUM_APPLETS;
+                       break;
+               }
+               max -= NUM_APPLETS;
+       }
+       max /= (unsigned)KNOWN_APPNAME_OFFSETS;
+       i /= (unsigned)KNOWN_APPNAME_OFFSETS;
+       //bb_error_msg("name:'%s' starting from:'%s' i:%u max:%u", name, p, i, 
max);
+#endif
+
+       /* Open-coding without strcmp/strlen calls for speed */
+       while (i < max) {
+               char ch;
+               j = 0;
+               /* Do we see "name\0" in applet_names[p] position? */
+               while ((ch = *p) == name[j]) {
+                       if (ch == '\0') {
+                               //bb_error_msg("found:'%s' i:%u", name, i);
+                               return i; /* yes */
+                       }
+                       p++;
+                       j++;
+               }
+               /* No.
+                * p => 1st non-matching char in applet_names[],
+                * skip to and including NUL.
+                */
+               while (ch != '\0')
+                       ch = *++p;
+               p++;
                i++;
        }
        return -1;
-#endif
 }
 
 
@@ -583,6 +603,7 @@ static void install_links(const char *busybox, int 
use_symbolic_links,
         * busybox.h::bb_install_loc_t, or else... */
        int (*lf)(const char *, const char *);
        char *fpc;
+        const char *appname = applet_names;
        unsigned i;
        int rc;
 
@@ -593,7 +614,7 @@ static void install_links(const char *busybox, int 
use_symbolic_links,
        for (i = 0; i < ARRAY_SIZE(applet_main); i++) {
                fpc = concat_path_file(
                                custom_install_dir ? custom_install_dir : 
install_dir[APPLET_INSTALL_LOC(i)],
-                               APPLET_NAME(i));
+                               appname);
                // debug: bb_error_msg("%slinking %s to busybox",
                //              use_symbolic_links ? "sym" : "", fpc);
                rc = lf(busybox, fpc);
@@ -601,6 +622,8 @@ static void install_links(const char *busybox, int 
use_symbolic_links,
                        bb_simple_perror_msg(fpc);
                }
                free(fpc);
+                while (*appname++ != '\0')
+                        continue;
        }
 }
 # else
@@ -754,7 +777,7 @@ void FAST_FUNC run_applet_no_and_exit(int applet_no, char 
**argv)
 
        /* Reinit some shared global data */
        xfunc_error_retval = EXIT_FAILURE;
-       applet_name = APPLET_NAME(applet_no);
+       applet_name = bb_get_last_path_component_nostrip(argv[0]);
 
        /* Special case. POSIX says "test --help"
         * should be no different from e.g. "test --foo".
@@ -785,11 +808,14 @@ void FAST_FUNC run_applet_no_and_exit(int applet_no, char 
**argv)
 
 void FAST_FUNC run_applet_and_exit(const char *name, char **argv)
 {
-       int applet = find_applet_by_name(name);
-       if (applet >= 0)
-               run_applet_no_and_exit(applet, argv);
+       int applet;
+
        if (is_prefixed_with(name, "busybox"))
                exit(busybox_main(argv));
+       /* find_applet_by_name() search is more expensive, so goes second */
+       applet = find_applet_by_name(name);
+       if (applet >= 0)
+               run_applet_no_and_exit(applet, argv);
 }
 
 #endif /* !defined(SINGLE_APPLET_MAIN) */
diff --git a/libbb/vfork_daemon_rexec.c b/libbb/vfork_daemon_rexec.c
index d6ca7b2..1adb5b3 100644
--- a/libbb/vfork_daemon_rexec.c
+++ b/libbb/vfork_daemon_rexec.c
@@ -116,8 +116,6 @@ int FAST_FUNC run_nofork_applet(int applet_no, char **argv)
 
        save_nofork_data(&old);
 
-       applet_name = APPLET_NAME(applet_no);
-
        xfunc_error_retval = EXIT_FAILURE;
 
        /* In case getopt() or getopt32() was already called:
@@ -157,6 +155,7 @@ int FAST_FUNC run_nofork_applet(int applet_no, char **argv)
                 * need argv untouched because they free argv[i]! */
                char *tmp_argv[argc+1];
                memcpy(tmp_argv, argv, (argc+1) * sizeof(tmp_argv[0]));
+               applet_name = tmp_argv[0];
                /* Finally we can call NOFORK applet's main() */
                rc = applet_main[applet_no](argc, tmp_argv);
        } else {
_______________________________________________
busybox mailing list
[email protected]
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to