commit fbace74784b115d24441d2a67b932a96011d7012
Author:     Mattias Andrée <[email protected]>
AuthorDate: Wed May 4 21:04:52 2016 +0200
Commit:     Mattias Andrée <[email protected]>
CommitDate: Wed May 4 21:04:52 2016 +0200

    Optimise zlsh
    
    Signed-off-by: Mattias Andrée <[email protected]>

diff --git a/src/internals.h b/src/internals.h
index a475ada..0690ad3 100644
--- a/src/internals.h
+++ b/src/internals.h
@@ -4,6 +4,7 @@
 #include <errno.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 
 /* clang pretends to be GCC... */
 #if defined(__GNUC__) && defined(__clang__)
@@ -103,7 +104,6 @@ extern void *libzahl_temp_allocation;
 #define zpositive1(a, b)             (zpositive(a) + zpositive(b) > 0)
 #define zpositive2(a, b)             (zsignum(a) + zsignum(b) == 2)
 #define zzero2(a, b)                 (!(zsignum(a) | zsignum(b)))
-#define zmemmove(d, s, n)            memmove((d), (s), (n) * 
sizeof(zahl_char_t))
 #define zmemcpy(d, s, n)             libzahl_memcpy(d, s, n)
 #define zmemset(a, v, n)             libzahl_memset(a, v, n)
 
@@ -245,12 +245,8 @@ static inline void
 zswap_tainted_unsigned(z_t a, z_t b)
 {
        z_t t;
-       t->used = b->used;
-       b->used = a->used;
-       a->used = t->used;
-       t->chars = b->chars;
-       b->chars = a->chars;
-       a->chars = t->chars;
+       SWAP(a, b, t, used);
+       SWAP(b, a, t, chars);
 }
 
 static inline void
@@ -398,3 +394,40 @@ zfree_temp(z_t a)
                                        a__[i__] = OP(b__[i__]);  \
                }                                                 \
        } while (0)
+
+static inline void
+zmemcpyb(register zahl_char_t *restrict d, register const zahl_char_t 
*restrict s, size_t n_)
+{
+       ssize_t i, n = (ssize_t)n_;
+       switch (n & 3) {
+       case 3:
+               d[n - 1] = s[n - 1];
+               d[n - 2] = s[n - 2];
+               d[n - 3] = s[n - 3];
+               break;
+       case 2:
+               d[n - 1] = s[n - 1];
+               d[n - 2] = s[n - 2];
+               break;
+       case 1:
+               d[n - 1] = s[n - 1];
+               break;
+       default:
+               break;
+       }
+       for (i = n & ~3; (i -= 4) >= 0;) {
+               d[i + 3] = s[i + 3];
+               d[i + 2] = s[i + 2];
+               d[i + 1] = s[i + 1];
+               d[i + 0] = s[i + 0];
+       }
+}
+
+static inline void
+zmemmove(register zahl_char_t *d, register const zahl_char_t *s, size_t n)
+{
+       if (d < s)
+               zmemcpy(d, s, n);
+       else
+               zmemcpyb(d, s, n);
+}
diff --git a/src/zlsh.c b/src/zlsh.c
index 1c9fd8f..59a05bf 100644
--- a/src/zlsh.c
+++ b/src/zlsh.c
@@ -18,10 +18,11 @@ zlsh(z_t a, z_t b, size_t bits)
        cbits = BITS_PER_CHAR - bits;
 
        ENSURE_SIZE(a, b->used + chars + 1);
-       if (likely(a == b))
-               zmemmove(a->chars + chars, b->chars, b->used);
-       else
+       if (likely(a == b)) {
+               zmemcpyb(a->chars + chars, b->chars, b->used);
+       } else {
                zmemcpy(a->chars + chars, b->chars, b->used);
+       }
        zmemset(a->chars, 0, chars);
        a->used = b->used + chars;
 
diff --git a/zahl-internals.h b/zahl-internals.h
index fc6768a..5c9cc5e 100644
--- a/zahl-internals.h
+++ b/zahl-internals.h
@@ -110,7 +110,7 @@ struct zahl {
 void libzahl_realloc(struct zahl *, size_t);
 
 ZAHL_INLINE void
-libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t 
*restrict s, size_t n)
+libzahl_memcpy(register zahl_char_t *d, register const zahl_char_t *s, size_t 
n)
 {
        size_t i;
        if (n <= 4) {
@@ -124,10 +124,10 @@ libzahl_memcpy(register zahl_char_t *restrict d, register 
const zahl_char_t *res
                        d[3] = s[3];
        } else {
                for (i = 0; (i += 4) <= n;) {
-                       d[i - 1] = s[i - 1];
-                       d[i - 2] = s[i - 2];
-                       d[i - 3] = s[i - 3];
                        d[i - 4] = s[i - 4];
+                       d[i - 3] = s[i - 3];
+                       d[i - 2] = s[i - 2];
+                       d[i - 1] = s[i - 1];
                }
                if (i > n) {
                        i -= 4;
@@ -138,7 +138,7 @@ libzahl_memcpy(register zahl_char_t *restrict d, register 
const zahl_char_t *res
                        if (i < n)
                                d[i] = s[i], i++;
                        if (i < n)
-                               d[i] = s[i], i++;
+                               d[i] = s[i];
                }
        }
 }

Reply via email to