commit 1f8023e18a6dc7b950826810b392fdd46bcb0d45
Author:     Mattias Andrée <[email protected]>
AuthorDate: Wed May 4 14:33:27 2016 +0200
Commit:     Mattias Andrée <[email protected]>
CommitDate: Wed May 4 14:33:27 2016 +0200

    Optimise zand, zor, and zxor
    
    Signed-off-by: Mattias Andrée <[email protected]>

diff --git a/src/internals.h b/src/internals.h
index 841346f..f1a2f78 100644
--- a/src/internals.h
+++ b/src/internals.h
@@ -343,3 +343,31 @@ zfree_temp(z_t a)
 }
 
 /* } */
+
+#define ZMEM_OP(a, b, c, n, OP)                                               \
+       do {                                                                  \
+               zahl_char_t *a__ = (a);                                       \
+               const zahl_char_t *b__ = (b);                                 \
+               const zahl_char_t *c__ = (c);                                 \
+               size_t i__, n__ = (n);                                        \
+               if (n__ <= 4) {                                               \
+                       if (n__ >= 1)                                         \
+                               a__[0] = b__[0] OP c__[0];                    \
+                       if (n__ >= 2)                                         \
+                               a__[1] = b__[1] OP c__[1];                    \
+                       if (n__ >= 3)                                         \
+                               a__[2] = b__[2] OP c__[2];                    \
+                       if (n__ >= 4)                                         \
+                               a__[3] = b__[3] OP c__[3];                    \
+               } else {                                                      \
+                       for (i__ = 0; (i__ += 4) < n__;) {                    \
+                               a__[i__ - 1] = b__[i__ - 1] OP c__[i__ - 1];  \
+                               a__[i__ - 2] = b__[i__ - 2] OP c__[i__ - 2];  \
+                               a__[i__ - 3] = b__[i__ - 3] OP c__[i__ - 3];  \
+                               a__[i__ - 4] = b__[i__ - 4] OP c__[i__ - 4];  \
+                       }                                                     \
+                       if (i__ > n__)                                        \
+                               for (i__ -= 4; i__ < n__; i__++)              \
+                                       a__[i__] = b__[i__] OP c__[i__];      \
+               }                                                             \
+       } while (0)
diff --git a/src/zand.c b/src/zand.c
index 6d3a184..45d1ed5 100644
--- a/src/zand.c
+++ b/src/zand.c
@@ -2,25 +2,6 @@
 #include "internals.h"
 
 
-O2 static inline void
-zand_impl_3(register zahl_char_t *restrict a,
-           register const zahl_char_t *restrict b, size_t n)
-{
-       size_t i;
-       for (i = 0; i < n; i++)
-               a[i] &= b[i];
-}
-
-static inline void
-zand_impl_4(register zahl_char_t *restrict a,
-           register const zahl_char_t *restrict b,
-           register const zahl_char_t *restrict c, size_t n)
-{
-       size_t i;
-       for (i = 0; i < n; i++)
-               a[i] = b[i] & c[i];
-}
-
 void
 zand(z_t a, z_t b, z_t c)
 {
@@ -36,12 +17,12 @@ zand(z_t a, z_t b, z_t c)
        a->used = MIN(b->used, c->used);
 
        if (a == b) {
-               zand_impl_3(a->chars, c->chars, a->used);
+               ZMEM_OP(a->chars, a->chars, c->chars, a->used, &);
        } else if (unlikely(a == c)) {
-               zand_impl_3(a->chars, b->chars, a->used);
+               ZMEM_OP(a->chars, a->chars, b->chars, a->used, &);
        } else {
                ENSURE_SIZE(a, a->used);
-               zand_impl_4(a->chars, b->chars, c->chars, a->used);
+               ZMEM_OP(a->chars, b->chars, c->chars, a->used, &);
        }
 
        TRIM_AND_SIGN(a, zpositive1(b, c) * 2 - 1);
diff --git a/src/zor.c b/src/zor.c
index 2152c7f..ed952ca 100644
--- a/src/zor.c
+++ b/src/zor.c
@@ -2,27 +2,6 @@
 #include "internals.h"
 
 
-O2 static inline void
-zor_impl_3(register zahl_char_t *restrict a,
-           register const zahl_char_t *restrict b, size_t n)
-{
-       size_t i;
-       for (i = 0; i < n; i++)
-               a[i] |= b[i];
-}
-
-static inline void
-zor_impl_5(register zahl_char_t *restrict a,
-           register const zahl_char_t *restrict b, size_t n,
-           register const zahl_char_t *restrict c, size_t m)
-{
-       size_t i;
-       for (i = 0; i < n; i++)
-               a[i] = b[i] | c[i];
-       for (; i < m; i++)
-               a[i] = c[i];
-}
-
 void
 zor(z_t a, z_t b, z_t c)
 {
@@ -40,17 +19,19 @@ zor(z_t a, z_t b, z_t c)
        ENSURE_SIZE(a, m);
 
        if (a == b) {
-               zor_impl_3(a->chars, c->chars, n);
+               ZMEM_OP(a->chars, a->chars, c->chars, n, |);
                if (a->used < c->used)
                        zmemcpy_range(a->chars, c->chars, n, m);
        } else if (unlikely(a == c)) {
-               zor_impl_3(a->chars, b->chars, n);
+               ZMEM_OP(a->chars, a->chars, b->chars, n, |);
                if (a->used < b->used)
                        zmemcpy_range(a->chars, b->chars, n, m);
        } else  if (m == b->used) {
-               zor_impl_5(a->chars, c->chars, n, b->chars, m);
+               ZMEM_OP(a->chars, c->chars, b->chars, n, |);
+               zmemcpy_range(a->chars, b->chars, n, m);
        } else {
-               zor_impl_5(a->chars, b->chars, n, c->chars, m);
+               ZMEM_OP(a->chars, b->chars, c->chars, n, |);
+               zmemcpy_range(a->chars, c->chars, n, m);
        }
 
        a->used = m;
diff --git a/src/zxor.c b/src/zxor.c
index 601a947..3f2ca5c 100644
--- a/src/zxor.c
+++ b/src/zxor.c
@@ -2,27 +2,6 @@
 #include "internals.h"
 
 
-O2 static inline void
-zxor_impl_3(register zahl_char_t *restrict a,
-           register const zahl_char_t *restrict b, size_t n)
-{
-       size_t i;
-       for (i = 0; i < n; i++)
-               a[i] ^= b[i];
-}
-
-static inline void
-zxor_impl_5(register zahl_char_t *restrict a,
-           register const zahl_char_t *restrict b, size_t n,
-           register const zahl_char_t *restrict c, size_t m)
-{
-       size_t i;
-       for (i = 0; i < n; i++)
-               a[i] = b[i] ^ c[i];
-       for (; i < m; i++)
-               a[i] = c[i];
-}
-
 void
 zxor(z_t a, z_t b, z_t c)
 {
@@ -47,17 +26,19 @@ zxor(z_t a, z_t b, z_t c)
        ENSURE_SIZE(a, m);
 
        if (a == b) {
-               zxor_impl_3(a->chars, cc, n);
+               ZMEM_OP(a->chars, a->chars, cc, n, ^);
                if (a->used < cn)
                        zmemcpy_range(a->chars, cc, n, m);
        } else if (unlikely(a == c)) {
-               zxor_impl_3(a->chars, bc, n);
+               ZMEM_OP(a->chars, b->chars, cc, n, ^);
                if (a->used < bn)
                        zmemcpy_range(a->chars, bc, n, m);
        } else if (m == bn) {
-               zxor_impl_5(a->chars, cc, n, bc, m);
+               ZMEM_OP(a->chars, c->chars, b->chars, n, ^);
+               zmemcpy_range(a->chars, b->chars, n, m);
        } else {
-               zxor_impl_5(a->chars, bc, n, cc, m);
+               ZMEM_OP(a->chars, b->chars, c->chars, n, ^);
+               zmemcpy_range(a->chars, c->chars, n, m);
        }
 
        a->used = m;

Reply via email to