And this smaller diff excludes the node-based table.

Since revision 1.8 (extra sanity checking for afree()) is the
youngest commit of alloc.c i guess that checking should not be
removed.  But that unidirectional list walk is a real killer:
with the test from one of my former mails and 10000 variables this
results in 282.724.854 list-node walks, with 21000 variables it
results in 1.228.967.558 list-walks.  (And not joking!)

So use a hashtable to manage allocations of and in struct Area, and
only use singly-linked nodes therein, since the most time-critical
part can is performed with unidirectional walks anyway.  This
results in a faster execution with a smaller memory footprint.

(It must be said that the following benchmarks use the long
variable prefix.  But struct link is now only one pointer, so the
memory overhead of the node-based table is eliminated.)

Benchmarks, original, 10000 variables:

    ?0%1[steffen@obsd src]$ /home/src/bin/ksh/obj/ksh tests/table-test.sh
    0m1.97s real     0m1.86s user     0m0.06s system
    0m1.47s real     0m1.40s user     0m0.03s system
    0m1.47s real     0m1.44s user     0m0.01s system
    init:2010144, freeall:2100161, alloc:9206483, resize:2760301,
    free:4905640 (walks:282724854)

    29603 R+    55.0  2628  2368 ttyp1

Benchmarks, patched (also using hashmap in hashtable), 10000 variables:

    ?0%0[steffen@obsd ksh]$ ./ksh tests/table-test.sh
    0m1.37s real     0m1.32s user     0m0.04s system
    0m1.28s real     0m1.25s user     0m0.01s system
    0m1.27s real     0m1.20s user     0m0.05s system
    init:2010144, freeall:2100161, alloc:9206483, resize:2760301,
    free:4905636 (walks:39262535)
    ...
    free:4905636 (walks:39260263)
    ...
    free:4905636 (walks:39260811)

    16713 R+    84.7  2392  2152 ttyp0

Benchmarks, original, 21000 variables:

    ?0%1[steffen@obsd src]$ /home/src/bin/ksh/obj/ksh tests/table-test.sh
    0m14.30s real     0m13.61s user     0m0.19s system
    0m4.30s real     0m4.19s user     0m0.04s system
    0m4.35s real     0m4.17s user     0m0.07s system
    init:4221144, freeall:4410161, alloc:19331984, resize:5796301,
    free:10301141 (walks:1228967558)

    13799 R+    97.4  4568  4196 ttyp1

Benchmarks, patched (also using hashmap in hashtable), 21000 variables:

    ?0%0[sdaoden@obsd ksh]$ ./ksh tests/table-test.sh
    0m4.32s real     0m3.87s user     0m0.15s system
    0m3.09s real     0m2.93s user     0m0.01s system
    0m3.10s real     0m3.04s user     0m0.02s system
    init:4221144, freeall:4410161, alloc:19331984, resize:5796301,
    free:10301137 (walks:242558821)  <- HASHMAP: 5 slots
    ...
    free:10301137 (walks:242561158)  <- HASHMAP: 5 slots
    ...
    (free:10301137 (walks:173195475)  <- HASHMAP: 7 slots)

    11285 R+    91.1  3980  3696 ttyp0

With a hashmap of seven slots one page more is shown by ps(1),
but for normal use cases five slots seem sufficient.
Also struct Area ends up with 40-bytes on 64-bit, not 56 as it
would otherwise.

--steffen

diff --git a/alloc.c b/alloc.c
index 7e41c2c..7e986a0 100644
--- a/alloc.c
+++ b/alloc.c
@@ -30,28 +30,35 @@
 
 #include "sh.h"
 
+#define ALLOCIDX(AP,P) ((size_t)(P) % NELEM((AP)->allocs))
+
 struct link {
-       struct link *prev;
        struct link *next;
 };
 
 Area *
 ainit(Area *ap)
 {
-       ap->freelist = NULL;
-       return ap;
+       bzero(ap->allocs, sizeof(ap->allocs));
+       return (ap);
 }
 
 void
 afreeall(Area *ap)
 {
-       struct link *l, *l2;
+       size_t i;
+
+       for (i = NELEM(ap->allocs); i-- != 0;) {
+               struct link *l = ap->allocs[i];
+               ap->allocs[i] = NULL;
 
-       for (l = ap->freelist; l != NULL; l = l2) {
-               l2 = l->next;
-               free(l);
+               while (l != NULL) {
+                       struct link *l2 = l;
+                       l = l->next;
+                       free(l2);
+               }
        }
-       ap->freelist = NULL;
+       return;
 }
 
 #define L2P(l) ( (void *)(((char *)(l)) + sizeof(struct link)) )
@@ -60,68 +67,72 @@ afreeall(Area *ap)
 void *
 alloc(size_t size, Area *ap)
 {
-       struct link *l;
+       struct link *l, **arr;
 
        l = malloc(sizeof(struct link) + size);
        if (l == NULL)
                internal_errorf(1, "unable to allocate memory");
-       l->next = ap->freelist;
-       l->prev = NULL;
-       if (ap->freelist)
-               ap->freelist->prev = l;
-       ap->freelist = l;
 
-       return L2P(l);
+       arr = ap->allocs + ALLOCIDX(ap, l);
+       l->next = *arr;
+       *arr = l;
+
+       return (L2P(l));
 }
 
 void *
 aresize(void *ptr, size_t size, Area *ap)
 {
-       struct link *l, *l2, *lprev, *lnext;
+       struct link *l, **arr;
 
        if (ptr == NULL)
                return alloc(size, ap);
 
        l = P2L(ptr);
-       lprev = l->prev;
-       lnext = l->next;
 
-       l2 = realloc(l, sizeof(struct link) + size);
-       if (l2 == NULL)
+       arr = ap->allocs + ALLOCIDX(ap, l);
+       if (*arr == l)
+               *arr = l->next;
+       else {
+               struct link *p = *arr;
+               while (p->next != l)
+                       p = p->next;
+               p->next = l->next;
+       }
+
+       l = realloc(l, sizeof(struct link) + size);
+       if (l == NULL)
                internal_errorf(1, "unable to allocate memory");
-       if (lprev)
-               lprev->next = l2;
-       else
-               ap->freelist = l2;
-       if (lnext)
-               lnext->prev = l2;
-
-       return L2P(l2);
+
+       arr = ap->allocs + ALLOCIDX(ap, l);
+       l->next = *arr;
+       *arr = l;
+
+       return (L2P(l));
 }
 
 void
 afree(void *ptr, Area *ap)
 {
-       struct link *l, *l2;
+       struct link *l, **arr;
 
        if (!ptr)
                return;
-
        l = P2L(ptr);
 
-       for (l2 = ap->freelist; l2 != NULL; l2 = l2->next) {
-               if (l == l2)
-                       break;
+       arr = ap->allocs + ALLOCIDX(ap, l);
+       if (*arr == l)
+               *arr = l->next;
+       else {
+               struct link *p = *arr;
+               while (p != NULL && p->next != l)
+                       p = p->next;
+               if (p == NULL)
+                       internal_errorf(1, "afree: %p not present in area %p",
+                           ptr, (void*)ap);
+               p->next = l->next;
        }
-       if (l2 == NULL)
-               internal_errorf(1, "afree: %p not present in area %p", ptr, ap);
-
-       if (l->prev)
-               l->prev->next = l->next;
-       else
-               ap->freelist = l->next;
-       if (l->next)
-               l->next->prev = l->prev;
 
        free(l);
+       return;
 }
diff --git a/exec.c b/exec.c
index 1c144fa..fb1e5bb 100644
--- a/exec.c
+++ b/exec.c
@@ -920,11 +920,14 @@ flushcom(int all) /* just relative or all */
        struct tbl *tp;
        struct tstate ts;
 
-       for (ktwalk(&ts, &taliases); (tp = ktnext(&ts)) != NULL; )
-               if ((tp->flag&ISSET) && (all || tp->val.s[0] != '/')) {
+       ktwalk(&ts, &taliases);
+       ts.flag |= ISSET;
+       while ((tp = ktnext(&ts)) != NULL)
+               if (all || tp->val.s[0] != '/') {
                        if (tp->flag&ALLOC) {
                                tp->flag &= ~(ALLOC|ISSET);
                                afree(tp->val.s, APERM);
+                               continue;
                        }
                        tp->flag &= ~ISSET;
                }
diff --git a/main.c b/main.c
index 0718a63..6a44cbd 100644
--- a/main.c
+++ b/main.c
@@ -119,7 +119,7 @@ main(int argc, char *argv[])
        initkeywords();
 
        /* define built-in commands */
-       ktinit(&builtins, APERM, 64); /* must be 2^n (currently 40 builtins) */
+       ktinit(&builtins, APERM, 40); /* Currently 40 builtins */
        for (i = 0; shbuiltins[i].name != NULL; i++)
                builtin(shbuiltins[i].name, shbuiltins[i].func);
        for (i = 0; kshbuiltins[i].name != NULL; i++)
diff --git a/sh.h b/sh.h
index e4e8807..af4bac3 100644
--- a/sh.h
+++ b/sh.h
@@ -83,7 +83,7 @@ EXTERN        char    username[];     /* username for \u 
prompt expansion */
  * Area-based allocation built on malloc/free
  */
 typedef struct Area {
-       struct link *freelist;  /* free list */
+       struct link     *allocs[5];     /* Hashmap of alloc nodes */
 } Area;
 
 EXTERN Area    aperm;          /* permanent object space */
diff --git a/syn.c b/syn.c
index 9b7451a..6b73aea 100644
--- a/syn.c
+++ b/syn.c
@@ -666,7 +666,7 @@ initkeywords(void)
        struct tokeninfo const *tt;
        struct tbl *p;
 
-       ktinit(&keywords, APERM, 32); /* must be 2^n (currently 20 keywords) */
+       ktinit(&keywords, APERM, 20); /* Currently 20 keywords */
        for (tt = tokentab; tt->name; tt++) {
                if (tt->reserved) {
                        p = ktenter(&keywords, tt->name, hash(tt->name));
diff --git a/table.c b/table.c
index 74d1684..3565e62 100644
--- a/table.c
+++ b/table.c
@@ -28,8 +28,13 @@ ktinit(struct table *tp, Area *ap, int tsize)
        tp->areap = ap;
        tp->tbls = NULL;
        tp->size = tp->nfree = 0;
-       if (tsize)
+       if (tsize) {
+               int i;
+               for (i = 3; (1 << i) < tsize; ++i)
+                       ;
+               tsize = 1 << i;
                texpand(tp, tsize);
+       }
 }
 
 static void
@@ -143,14 +148,16 @@ ktwalk(struct tstate *ts, struct table *tp)
 {
        ts->left = tp->size;
        ts->next = tp->tbls;
+       ts->flag = DEFINED;
 }
 
 struct tbl *
 ktnext(struct tstate *ts)
 {
+       Tflag flag = ts->flag;
        while (--ts->left >= 0) {
                struct tbl *p = *ts->next++;
-               if (p != NULL && (p->flag&DEFINED))
+               if (p != NULL && (p->flag & flag) == flag)
                        return p;
        }
        return NULL;
diff --git a/table.h b/table.h
index 3fe35eb..e113d33 100644
--- a/table.h
+++ b/table.h
@@ -132,11 +132,12 @@ struct block {
  * Used by ktwalk() and ktnext() routines.
  */
 struct tstate {
-       int left;
+       int     left;
        struct tbl **next;
+       Tflag   flag;           /* ktnext(): flags to compare against,
+                                * default: DEFINED */
 };
 
-
 EXTERN struct table taliases;  /* tracked aliases */
 EXTERN struct table builtins;  /* built-in commands */
 EXTERN struct table aliases;   /* aliases */
diff --git a/var.c b/var.c
index 77d3969..bfdb149 100644
--- a/var.c
+++ b/var.c
@@ -60,17 +60,18 @@ void
 popblock(void)
 {
        struct block *l = e->loc;
-       struct tbl *vp, **vpp = l->vars.tbls, *vq;
-       int i;
+       struct tbl *vp;
+       struct tstate ts;
 
        e->loc = l->next;       /* pop block */
-       for (i = l->vars.size; --i >= 0; )
-               if ((vp = *vpp++) != NULL && (vp->flag&SPECIAL)) {
-                       if ((vq = global(vp->name))->flag & ISSET)
-                               setspec(vq);
-                       else
-                               unsetspec(vq);
-               }
+       ktwalk(&ts, &l->vars);
+       ts.flag = SPECIAL;
+       while ((vp = ktnext(&ts)) != NULL) {
+               if ((vp = global(vp->name))->flag & ISSET)
+                       setspec(vp);
+               else
+                       unsetspec(vp);
+       }
        if (l->flags & BF_DOGETOPTS)
                user_opt = l->getopts_state;
        afreeall(&l->area);
@@ -111,7 +112,7 @@ initvar(void)
        int i;
        struct tbl *tp;
 
-       ktinit(&specials, APERM, 32); /* must be 2^n (currently 17 specials) */
+       ktinit(&specials, APERM, 17); /* Currently 17 specials */
        for (i = 0; names[i].name; i++) {
                tp = ktenter(&specials, names[i].name, hash(names[i].name));
                tp->flag = DEFINED|ISSET;
@@ -832,34 +833,35 @@ makenv(void)
 {
        struct block *l = e->loc;
        XPtrV env;
-       struct tbl *vp, **vpp;
-       int i;
+       struct tbl *vp;
+       struct tstate ts;
 
        XPinit(env, 64);
-       for (l = e->loc; l != NULL; l = l->next)
-               for (vpp = l->vars.tbls, i = l->vars.size; --i >= 0; )
-                       if ((vp = *vpp++) != NULL &&
-                           (vp->flag&(ISSET|EXPORT)) == (ISSET|EXPORT)) {
-                               struct block *l2;
-                               struct tbl *vp2;
-                               unsigned int h = hash(vp->name);
-
-                               /* unexport any redefined instances */
-                               for (l2 = l->next; l2 != NULL; l2 = l2->next) {
-                                       vp2 = ktsearch(&l2->vars, vp->name, h);
-                                       if (vp2 != NULL)
-                                               vp2->flag &= ~EXPORT;
-                               }
-                               if ((vp->flag&INTEGER)) {
-                                       /* integer to string */
-                                       char *val;
-                                       val = str_val(vp);
-                                       vp->flag &= ~(INTEGER|RDONLY);
-                                       /* setstr can't fail here */
-                                       setstr(vp, val, KSH_RETURN_ERROR);
-                               }
-                               XPput(env, vp->val.s);
+       for (l = e->loc; l != NULL; l = l->next) {
+               ktwalk(&ts, &l->vars);
+               ts.flag = ISSET | EXPORT;
+               while ((vp = ktnext(&ts)) != NULL) {
+                       struct block *l2;
+                       struct tbl *vp2;
+                       unsigned int h = hash(vp->name);
+
+                       /* unexport any redefined instances */
+                       for (l2 = l->next; l2 != NULL; l2 = l2->next) {
+                               vp2 = ktsearch(&l2->vars, vp->name, h);
+                               if (vp2 != NULL)
+                                       vp2->flag &= ~EXPORT;
                        }
+                       if ((vp->flag&INTEGER)) {
+                               /* integer to string */
+                               char *val;
+                               val = str_val(vp);
+                               vp->flag &= ~(INTEGER|RDONLY);
+                               /* setstr can't fail here */
+                               setstr(vp, val, KSH_RETURN_ERROR);
+                       }
+                       XPput(env, vp->val.s);
+               }
+       }
        XPput(env, NULL);
        return (char **) XPclose(env);
 }

Reply via email to