commit f6cb7f3e7382a19a6d6d9990c243ffb8a666182d
Author:     Mattias Andrée <[email protected]>
AuthorDate: Sun Mar 13 05:30:01 2016 +0100
Commit:     Mattias Andrée <[email protected]>
CommitDate: Sun Mar 13 05:30:01 2016 +0100

    Optimisations
    
    Signed-off-by: Mattias Andrée <[email protected]>

diff --git a/Makefile b/Makefile
index c7c5822..60dda0a 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,6 @@ HDR =\
 FUN =\
        zadd\
        zand\
-       zbits\
        zbset\
        zbtest\
        zcmp\
@@ -20,7 +19,6 @@ FUN =\
        zfree\
        zgcd\
        zload\
-       zlsb\
        zlsh\
        zmod\
        zmodmul\
@@ -61,7 +59,9 @@ INLINE_FUN =\
        zzero\
        zsignum\
        zabs\
-       zneg
+       zneg\
+       zlsb\
+       zbits
 
 OBJ = $(FUN:=.o) allocator.o
 MAN3 = $(FUN:=.3) $(INLINE_FUN:=.3)
diff --git a/bench/libgmp.h b/bench/libgmp.h
index 832bfe9..7dc74e2 100644
--- a/bench/libgmp.h
+++ b/bench/libgmp.h
@@ -40,7 +40,7 @@ zunsetup(void)
 #define QUASIUNIFORM            0
 #define UNIFORM                 1
 
-#define zperror(x)              0
+#define zperror(x)              ((void)0)
 #define zinit                   mpz_init
 #define zfree                   mpz_clear
 
@@ -54,7 +54,6 @@ zunsetup(void)
 #define zand                    mpz_and
 #define zor                     mpz_ior
 #define zxor                    mpz_xor
-#define znot                    mpz_com
 #define zbtest                  mpz_tstbit
 #define zeven                   mpz_even_p /* Note, must not have side 
effects. */
 #define zodd                    mpz_odd_p /* Note, must not have side effects. 
*/
@@ -118,6 +117,17 @@ zseti(z_t r, long long int val)
 }
 
 static void
+znot(z_t r, z_t a)
+{
+       size_t bits = zbits(a);
+       mpz_set_ui(_b, 0);
+       mpz_setbit(_b, bits);
+       zsub(_b, _b, _1);
+       zxor(r, a, _b);
+       zneg(r, r);
+}
+
+static void
 zsplit(z_t high, z_t low, z_t a, size_t brk)
 {
        if (low == a) {
diff --git a/bench/libtommath.h b/bench/libtommath.h
index c321c78..4a81373 100644
--- a/bench/libtommath.h
+++ b/bench/libtommath.h
@@ -33,7 +33,7 @@ zunsetup(void)
 #define QUASIUNIFORM            0
 #define UNIFORM                 1
 
-#define zperror(x)              0
+#define zperror(x)              ((void)0)
 #define zinit(a)                mp_init(a)
 #define zfree(a)                mp_clear(a)
 
diff --git a/src/internals.h b/src/internals.h
index 25a640a..2a947dd 100644
--- a/src/internals.h
+++ b/src/internals.h
@@ -59,17 +59,34 @@ extern zahl_char_t **libzahl_pool[sizeof(size_t) * 8];
 extern size_t libzahl_pool_n[sizeof(size_t) * 8];
 extern size_t libzahl_pool_alloc[sizeof(size_t) * 8];
 
-#define FAILURE(error)               (libzahl_error = (error), 
longjmp(libzahl_jmp_buf, 1))
-#define zmemcpy(d, s, n)             memcpy(d, s, (n) * sizeof(zahl_char_t))
-#define zmemmove(d, s, n)            memmove(d, s, (n) * sizeof(zahl_char_t))
-#define zmemset(a, v, n)             memset(a, v, (n) * sizeof(zahl_char_t))
-#define zmemcmp(a, b, n)             memcmp(a, b, (n) * sizeof(zahl_char_t))
+#if defined(__GNUC__) || defined(__clang__)
+# define EXPECT(value, expected)     __builtin_expect(value, expected)
+#else
+# define EXPECT(value, expected)     (value)
+#endif
 
+#define FAILURE(error)               (libzahl_error = (error), 
longjmp(libzahl_jmp_buf, 1))
+#define zmemmove(d, s, n)            memmove((d), (s), (n) * 
sizeof(zahl_char_t))
 #define SET_SIGNUM(a, signum)        ((a)->sign = (signum))
 #define SET(a, b)                    do { if ((a) != (b)) zset(a, b); } while 
(0)
 #define ENSURE_SIZE(a, n)            do { if ((a)->alloced < (n)) 
libzahl_realloc(a, (n)); } while (0)
-
 #define MIN(a, b)                    ((a) < (b) ? (a) : (b))
 #define MAX(a, b)                    ((a) > (b) ? (a) : (b))
+#define TRIM(a)                      for (; (a)->used && !(a)->chars[(a)->used 
- 1]; (a)->used--)
+#define TRIM_NONZERO(a)              for (; !(a)->chars[(a)->used - 1]; 
(a)->used--)
 
 void libzahl_realloc(z_t a, size_t need);
+
+static inline void
+zmemcpy(zahl_char_t *restrict d, const zahl_char_t *restrict s, register 
size_t n)
+{
+       while (n--)
+               d[n] = s[n];
+}
+
+static inline void
+zmemset(zahl_char_t *a, register zahl_char_t v, register size_t n)
+{
+       while (n--)
+               a[n] = v;
+}
diff --git a/src/zadd.c b/src/zadd.c
index e7d5265..ec17ca4 100644
--- a/src/zadd.c
+++ b/src/zadd.c
@@ -9,10 +9,10 @@ zadd_unsigned(z_t a, z_t b, z_t c)
        uint32_t carry[] = {0, 0};
        zahl_char_t *addend;
 
-       if (zzero(b)) {
+       if (EXPECT(zzero(b), 0)) {
                zabs(a, c);
                return;
-       } else if (zzero(c)) {
+       } else if (EXPECT(zzero(c), 0)) {
                zabs(a, b);
                return;
        }
@@ -29,7 +29,7 @@ zadd_unsigned(z_t a, z_t b, z_t c)
                        zmemset(a->chars + a->used, 0, n - a->used);
                }
                addend = c->chars;
-       } else if (a == c) {
+       } else if (EXPECT(a == c, 0)) {
                if (a->used < b->used) {
                        n = b->used;
                        zmemset(a->chars + a->used, 0, n - a->used);
@@ -67,13 +67,13 @@ zadd_unsigned(z_t a, z_t b, z_t c)
 void
 zadd(z_t a, z_t b, z_t c)
 {
-       if (zzero(b)) {
+       if (EXPECT(zzero(b), 0)) {
                SET(a, c);
-       } else if (zzero(c)) {
+       } else if (EXPECT(zzero(c), 0)) {
                SET(a, b);
-       } else if (b == c) {
+       } else if (EXPECT(b == c, 0)) {
                zlsh(a, b, 1);
-       } else if ((zsignum(b) | zsignum(c)) < 0) {
+       } else if (EXPECT((zsignum(b) | zsignum(c)) < 0, 0)) {
                if (zsignum(b) < 0) {
                        if (zsignum(c) < 0) {
                                zadd_unsigned(a, b, c);
diff --git a/src/zand.c b/src/zand.c
index e07705a..65a8962 100644
--- a/src/zand.c
+++ b/src/zand.c
@@ -7,7 +7,7 @@ zand(z_t a, z_t b, z_t c)
 {
        size_t n;
 
-       if (zzero(b) || zzero(c)) {
+       if (EXPECT(zzero(b) || zzero(c), 0)) {
                SET_SIGNUM(a, 0);
                return;
        }
@@ -24,7 +24,7 @@ found_highest:
        if (a == b) {
                while (n--)
                        a->chars[n] &= c->chars[n];
-       } else if (a == c) {
+       } else if (EXPECT(a == c, 0)) {
                while (n--)
                        a->chars[n] &= b->chars[n];
        } else {
diff --git a/src/zbits.c b/src/zbits.c
deleted file mode 100644
index 0bee579..0000000
--- a/src/zbits.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-size_t
-zbits(z_t a)
-{
-       size_t i;
-       zahl_char_t x;
-       if (zzero(a))
-               return 1; /* Deliver us from evil! */
-       for (i = a->used - 1;; i--) {
-               x = a->chars[i];
-               if (x) {
-                       a->used = i + 1;
-                       for (i *= BITS_PER_CHAR; x; x >>= 1, i++);
-                       return i;
-               }
-       }
-}
diff --git a/src/zbset.c b/src/zbset.c
index c379e2a..33252c1 100644
--- a/src/zbset.c
+++ b/src/zbset.c
@@ -35,8 +35,7 @@ zbset(z_t a, z_t b, size_t bit, int action)
                a->chars[chars] &= ~mask;
        }
 
-       while (a->used && !a->chars[a->used - 1])
-               a->used--;
+       TRIM(a);
        if (!a->used)
                SET_SIGNUM(a, 0);
 }
diff --git a/src/zcmpi.c b/src/zcmpi.c
index e7a6158..52fbaf9 100644
--- a/src/zcmpi.c
+++ b/src/zcmpi.c
@@ -5,9 +5,9 @@
 int
 zcmpi(z_t a, long long int b)
 {
-       if (!b)
+       if (EXPECT(!b, 0))
                return zsignum(a);
-       if (zzero(a))
+       if (EXPECT(zzero(a), 0))
                return b > 0 ? -1 : b < 0;
        zseti(libzahl_tmp_cmp, b);
        return zcmp(a, libzahl_tmp_cmp);
diff --git a/src/zcmpmag.c b/src/zcmpmag.c
index 95668d3..59cc415 100644
--- a/src/zcmpmag.c
+++ b/src/zcmpmag.c
@@ -6,9 +6,9 @@ int
 zcmpmag(z_t a, z_t b)
 {
        size_t i, j;
-       if (zzero(a))
+       if (EXPECT(zzero(a), 0))
                return -!zzero(b);
-       if (zzero(b))
+       if (EXPECT(zzero(b), 0))
                return 1;
        i = a->used - 1;
        j = b->used - 1;
diff --git a/src/zcmpu.c b/src/zcmpu.c
index 0c7d68e..7da7f9a 100644
--- a/src/zcmpu.c
+++ b/src/zcmpu.c
@@ -5,9 +5,9 @@
 int
 zcmpu(z_t a, unsigned long long int b)
 {
-       if (!b)
+       if (EXPECT(!b, 0))
                return zsignum(a);
-       if (zsignum(a) <= 0)
+       if (EXPECT(zsignum(a) <= 0, 0))
                return -1;
        zsetu(libzahl_tmp_cmp, b);
        return zcmp(a, libzahl_tmp_cmp);
diff --git a/src/zdivmod.c b/src/zdivmod.c
index def98b5..c77e34a 100644
--- a/src/zdivmod.c
+++ b/src/zdivmod.c
@@ -15,7 +15,7 @@ zdivmod(z_t a, z_t b, z_t c, z_t d)
 
        sign = zsignum(c) * zsignum(d);
 
-       if (!sign) {
+       if (EXPECT(!sign, 0)) {
                if (zzero(c)) {
                        if (zzero(d)) {
                                FAILURE(EDOM); /* Indeterminate form: 0 divided 
by 0 */
@@ -27,7 +27,7 @@ zdivmod(z_t a, z_t b, z_t c, z_t d)
                        FAILURE(EDOM); /* Undefined form: Division by 0 */
                }
                return;
-       } else if ((cmpmag = zcmpmag(c, d)) <= 0) {
+       } else if (EXPECT((cmpmag = zcmpmag(c, d)) <= 0, 0)) {
                if (cmpmag == 0) {
                        zseti(a, sign);
                        SET_SIGNUM(b, 0);
diff --git a/src/zgcd.c b/src/zgcd.c
index 70bce64..9b1d451 100644
--- a/src/zgcd.c
+++ b/src/zgcd.c
@@ -16,15 +16,15 @@ zgcd(z_t a, z_t b, z_t c)
        zahl_char_t uv, bit;
        int neg;
 
-       if (!zcmp(b, c)) {
+       if (EXPECT(!zcmp(b, c), 0)) {
                SET(a, b);
                return;
        }
-       if (zzero(b)) {
+       if (EXPECT(zzero(b), 0)) {
                SET(a, c);
                return;
        }
-       if (zzero(c)) {
+       if (EXPECT(zzero(c), 0)) {
                SET(a, b);
                return;
        }
diff --git a/src/zload.c b/src/zload.c
index 3ab939d..03bcbb9 100644
--- a/src/zload.c
+++ b/src/zload.c
@@ -8,9 +8,9 @@ zload(z_t a, const void *buffer)
        const char *buf = buffer;
        a->sign = *((const int *)buf),    buf += sizeof(int);
        a->used = *((const size_t *)buf), buf += sizeof(size_t);
-       if (a->sign) {
+       if (EXPECT(!!a->sign, 1)) {
                ENSURE_SIZE(a, a->used);
-               zmemcpy(a->chars, buf, a->used);
+               zmemcpy(a->chars, (const zahl_char_t *)buf, a->used);
        }
        return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * 
sizeof(zahl_char_t));
 }
diff --git a/src/zlsb.c b/src/zlsb.c
deleted file mode 100644
index 17012f8..0000000
--- a/src/zlsb.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-size_t
-zlsb(z_t a)
-{
-       size_t i = 0;
-       zahl_char_t x;
-       if (zzero(a))
-               return SIZE_MAX;
-       for (;; i++) {
-               x = a->chars[i];
-               if (x) {
-                       x = ~x;
-                       for (i *= BITS_PER_CHAR; x & 1; x >>= 1, i++);
-                       return i;
-               }
-       }
-}
diff --git a/src/zlsh.c b/src/zlsh.c
index 2a76fbc..3daa01d 100644
--- a/src/zlsh.c
+++ b/src/zlsh.c
@@ -8,11 +8,11 @@ zlsh(z_t a, z_t b, size_t bits)
        size_t i, chars, cbits;
        zahl_char_t carry[] = {0, 0};
 
-       if (zzero(b)) {
+       if (EXPECT(zzero(b), 0)) {
                SET_SIGNUM(a, 0);
                return;
        }
-       if (!bits) {
+       if (EXPECT(!bits, 0)) {
                SET(a, b);
                return;
        }
@@ -22,14 +22,14 @@ zlsh(z_t a, z_t b, size_t bits)
        cbits = BITS_PER_CHAR - bits;
 
        ENSURE_SIZE(a, b->used + chars);
-       if (a == b)
+       if (EXPECT(a == b, 1))
                zmemmove(a->chars + chars, b->chars, b->used);
        else
                zmemcpy(a->chars + chars, b->chars, b->used);
        zmemset(a->chars, 0, chars);
        a->used = b->used + chars;
 
-       if (bits) { /* This if statement is very important in C. */
+       if (EXPECT(bits, 1)) { /* This if statement is very important in C. */
                for (i = chars; i < a->used; i++) {
                        carry[~i & 1] = a->chars[i] >> cbits;
                        a->chars[i] <<= bits;
diff --git a/src/zmodmul.c b/src/zmodmul.c
index 6b04909..bb0aa03 100644
--- a/src/zmodmul.c
+++ b/src/zmodmul.c
@@ -6,7 +6,7 @@ void
 zmodmul(z_t a, z_t b, z_t c, z_t d)
 {
        /* TODO Montgomery modular multiplication */
-       if (a == d) {
+       if (EXPECT(a == d, 0)) {
                zset(libzahl_tmp_modmul, d);
                zmul(a, b, c);
                zmod(a, a, libzahl_tmp_modmul);
diff --git a/src/zmodpow.c b/src/zmodpow.c
index 6cb50d2..ed87ac5 100644
--- a/src/zmodpow.c
+++ b/src/zmodpow.c
@@ -12,7 +12,7 @@ zmodpow(z_t a, z_t b, z_t c, z_t d)
        size_t i, j, n, bits;
        zahl_char_t x;
 
-       if (zsignum(c) <= 0) {
+       if (EXPECT(zsignum(c) <= 0, 0)) {
                if (zzero(c)) {
                        if (zzero(b))
                                FAILURE(EDOM); /* Indeterminate form: 0:th 
power of 0 */
@@ -25,9 +25,9 @@ zmodpow(z_t a, z_t b, z_t c, z_t d)
                        SET_SIGNUM(a, 0);
                }
                return;
-       } else if (zzero(d)) {
+       } else if (EXPECT(zzero(d), 0)) {
                FAILURE(EDOM); /* Undefined form: Division by 0 */
-       } else if (zzero(b)) {
+       } else if (EXPECT(zzero(b), 0)) {
                SET_SIGNUM(a, 0);
                return;
        }
diff --git a/src/zmodpowu.c b/src/zmodpowu.c
index 7fb2c3d..6dcb6b7 100644
--- a/src/zmodpowu.c
+++ b/src/zmodpowu.c
@@ -8,7 +8,7 @@
 void
 zmodpowu(z_t a, z_t b, unsigned long long int c, z_t d)
 {
-       if (!c) {
+       if (EXPECT(!c, 0)) {
                if (zzero(b))
                        FAILURE(EDOM); /* Indeterminate form: 0:th power of 0 */
                else if (zzero(d))
@@ -16,9 +16,9 @@ zmodpowu(z_t a, z_t b, unsigned long long int c, z_t d)
                else
                        zsetu(a, 1);
                return;
-       } else if (zzero(d)) {
+       } else if (EXPECT(zzero(d), 0)) {
                FAILURE(EDOM); /* Undefined form: Division by 0 */
-       } else if (zzero(b)) {
+       } else if (EXPECT(zzero(b), 0)) {
                SET_SIGNUM(a, 0);
                return;
        }
diff --git a/src/zmodsqr.c b/src/zmodsqr.c
index 8a1442d..bdf0ea2 100644
--- a/src/zmodsqr.c
+++ b/src/zmodsqr.c
@@ -6,7 +6,7 @@ void
 zmodsqr(z_t a, z_t b, z_t c)
 {
        /* TODO What is the fastest way to do zmodsqr? */
-       if (a == c) {
+       if (EXPECT(a == c, 0)) {
                zset(libzahl_tmp_modsqr, c);
                zsqr(a, b);
                zmod(a, a, libzahl_tmp_modsqr);
diff --git a/src/zmul.c b/src/zmul.c
index 6023664..c2cca10 100644
--- a/src/zmul.c
+++ b/src/zmul.c
@@ -22,7 +22,7 @@ zmul(z_t a, z_t b, z_t c)
        b_sign = zsignum(b);
        c_sign = zsignum(c);
 
-       if (!b_sign || !c_sign) {
+       if (EXPECT(!b_sign || !c_sign, 0)) {
                SET_SIGNUM(a, 0);
                return;
        }
diff --git a/src/znot.c b/src/znot.c
index d0e52c0..e1b114c 100644
--- a/src/znot.c
+++ b/src/znot.c
@@ -5,25 +5,24 @@
 void
 znot(z_t a, z_t b)
 {
-       size_t bits, n;
+       size_t bits, i;
 
-       if (zzero(b)) {
+       if (EXPECT(zzero(b), 0)) {
                SET_SIGNUM(a, 0);
                return;
        }
 
        bits = zbits(b);
-       SET(a, b);
-       SET_SIGNUM(a, -zsignum(a));
+       a->used = b->used;
+       SET_SIGNUM(a, -zsignum(b));
 
-       for (n = a->used; n--;)
-               a->chars[n] = ~(a->chars[n]);
+       for (i = 0; i < a->used; i++)
+               a->chars[i] = ~(b->chars[i]);
        bits = BITS_IN_LAST_CHAR(bits);
        if (bits)
                a->chars[a->used - 1] &= ((zahl_char_t)1 << bits) - 1;
 
-       while (a->used && !a->chars[a->used - 1])
-                a->used--;
+       TRIM(a);
        if (!a->used)
                SET_SIGNUM(a, 0);
 }
diff --git a/src/zor.c b/src/zor.c
index c50f5f8..b67f5b3 100644
--- a/src/zor.c
+++ b/src/zor.c
@@ -7,13 +7,13 @@ zor(z_t a, z_t b, z_t c)
 {
        size_t n, m;
 
-       if (zzero(b)) {
+       if (EXPECT(zzero(b), 0)) {
                if (zzero(c))
                        SET_SIGNUM(a, 0);
                else
                        SET(a, c);
                return;
-       } else if (zzero(c)) {
+       } else if (EXPECT(zzero(c), 0)) {
                SET(a, b);
                return;
        }
@@ -28,7 +28,7 @@ zor(z_t a, z_t b, z_t c)
                        zmemcpy(a->chars + n, c->chars + n, m - n);
                while (n--)
                        a->chars[n] |= c->chars[n];
-       } else if (a == c) {
+       } else if (EXPECT(a == c, 0)) {
                if (c->used < b->used)
                        zmemcpy(a->chars + n, b->chars + n, m - n);
                while (n--)
diff --git a/src/zpow.c b/src/zpow.c
index f709ff9..9eb5724 100644
--- a/src/zpow.c
+++ b/src/zpow.c
@@ -17,7 +17,7 @@ zpow(z_t a, z_t b, z_t c)
        size_t i, j, n, bits;
        zahl_char_t x;
 
-       if (zsignum(c) <= 0) {
+       if (EXPECT(zsignum(c) <= 0, 0)) {
                if (zzero(c)) {
                        if (zzero(b))
                                FAILURE(EDOM); /* Indeterminate form: 0:th 
power of 0 */
@@ -28,7 +28,7 @@ zpow(z_t a, z_t b, z_t c)
                        SET_SIGNUM(a, 0);
                }
                return;
-       } else if (zzero(b)) {
+       } else if (EXPECT(zzero(b), 0)) {
                SET_SIGNUM(a, 0);
                return;
        }
diff --git a/src/zpowu.c b/src/zpowu.c
index f8e5897..67cdc58 100644
--- a/src/zpowu.c
+++ b/src/zpowu.c
@@ -7,12 +7,12 @@
 void
 zpowu(z_t a, z_t b, unsigned long long int c)
 {
-       if (!c) {
+       if (EXPECT(!c, 0)) {
                if (zzero(b))
                        FAILURE(EDOM); /* Indeterminate form: 0:th power of 0 */
                zsetu(a, 1);
                return;
-       } else if (zzero(b)) {
+       } else if (EXPECT(zzero(b), 0)) {
                SET_SIGNUM(a, 0);
                return;
        }
diff --git a/src/zptest.c b/src/zptest.c
index a1d112c..d6fa80d 100644
--- a/src/zptest.c
+++ b/src/zptest.c
@@ -17,7 +17,7 @@ zptest(z_t witness, z_t n, int t)
 
        size_t i, r;
 
-       if (zcmpu(n, 3) <= 0) {
+       if (EXPECT(zcmpu(n, 3) <= 0, 0)) {
                if (zcmpu(n, 1) <= 0) {
                        if (witness)
                                SET(witness, n);
@@ -26,7 +26,7 @@ zptest(z_t witness, z_t n, int t)
                        return PRIME;
                }
        }
-       if (zeven(n)) {
+       if (EXPECT(zeven(n), 0)) {
                if (witness)
                        SET(witness, n);
                return NONPRIME;
diff --git a/src/zrand.c b/src/zrand.c
index 30dc61d..48d4802 100644
--- a/src/zrand.c
+++ b/src/zrand.c
@@ -65,7 +65,7 @@ zrand(z_t r, enum zranddev dev, enum zranddist dist, z_t n)
                abort();
        }
 
-       if (zzero(n)) {
+       if (EXPECT(zzero(n), 0)) {
                SET_SIGNUM(r, 0);
                return;
        }
@@ -76,7 +76,7 @@ zrand(z_t r, enum zranddev dev, enum zranddist dist, z_t n)
 
        switch (dist) {
        case QUASIUNIFORM:
-               if (zsignum(n) < 0)
+               if (EXPECT(zsignum(n) < 0, 0))
                        FAILURE(EDOM); /* n must be non-negative. */
                bits = zbits(n);
                zrand_get_random_bits(r, bits, fd);
@@ -86,7 +86,7 @@ zrand(z_t r, enum zranddev dev, enum zranddist dist, z_t n)
                break;
 
        case UNIFORM:
-               if (zsignum(n) < 0)
+               if (EXPECT(zsignum(n) < 0, 0))
                        FAILURE(EDOM); /* n must be non-negative. */
                bits = zbits(n);
                do
diff --git a/src/zrsh.c b/src/zrsh.c
index a7b886b..cc83ae5 100644
--- a/src/zrsh.c
+++ b/src/zrsh.c
@@ -7,14 +7,14 @@ zrsh(z_t a, z_t b, size_t bits)
 {
        size_t i, chars, cbits;
 
-       if (!bits) {
+       if (EXPECT(!bits, 0)) {
                SET(a, b);
                return;
        }
 
        chars = FLOOR_BITS_TO_CHARS(bits);
 
-       if (zzero(b) || chars >= b->used || zbits(b) <= bits) {
+       if (EXPECT(zzero(b) || chars >= b->used || zbits(b) <= bits, 0)) {
                SET_SIGNUM(a, 0);
                return;
        }
@@ -22,23 +22,22 @@ zrsh(z_t a, z_t b, size_t bits)
        bits = BITS_IN_LAST_CHAR(bits);
        cbits = BITS_PER_CHAR - bits;
 
-       if (chars && a == b) {
+       if (EXPECT(chars, 1) && EXPECT(a == b, 1)) {
                a->used -= chars;
                zmemmove(a->chars, a->chars + chars, a->used);
-       } else if (a != b) {
+       } else if (EXPECT(a != b, 0)) {
                a->used = b->used - chars;
                ENSURE_SIZE(a, a->used);
                zmemcpy(a->chars, b->chars + chars, a->used);
        }
 
-       if (bits) { /* This if statement is very important in C. */
+       if (EXPECT(bits, 0)) { /* This if statement is very important in C. */
                a->chars[0] >>= bits;
                for (i = 1; i < a->used; i++) {
                        a->chars[i - 1] |= a->chars[i] << cbits;
                        a->chars[i] >>= bits;
                }
-               while (!a->chars[a->used - 1])
-                       a->used--;
+               TRIM_NONZERO(a);
        }
 
        SET_SIGNUM(a, zsignum(b));
diff --git a/src/zsave.c b/src/zsave.c
index 9a171d6..07d43fa 100644
--- a/src/zsave.c
+++ b/src/zsave.c
@@ -7,10 +7,10 @@ zsave(z_t a, void *buffer)
 {
        if (buffer) {
                char *buf = buffer;
-               *((int *)buf)    = a->sign,    buf += sizeof(int);
-               *((size_t *)buf) = a->used,    buf += sizeof(size_t);
-               if (!zzero(a))
-                       zmemcpy(buf, a->chars, a->used);
+               *((int *)buf)    = a->sign, buf += sizeof(int);
+               *((size_t *)buf) = a->used, buf += sizeof(size_t);
+               if (EXPECT(!zzero(a), 1))
+                       zmemcpy((zahl_char_t *)buf, a->chars, a->used);
        }
        return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * 
sizeof(zahl_char_t));
 }
diff --git a/src/zset.c b/src/zset.c
index f7fe52e..9bb2778 100644
--- a/src/zset.c
+++ b/src/zset.c
@@ -5,12 +5,12 @@
 void
 zset(z_t a, z_t b)
 {
-       if (zzero(b)) {
-               SET_SIGNUM(a, 0);
+       if (EXPECT(b->sign == 0, 0)) {
+               a->sign = 0;
        } else {
-               ENSURE_SIZE(a, b->used);
                a->sign = b->sign;
                a->used = b->used;
+               ENSURE_SIZE(a, b->used);
                zmemcpy(a->chars, b->chars, b->used);
        }
 }
diff --git a/src/zseti.c b/src/zseti.c
index 81a7a13..d3514fb 100644
--- a/src/zseti.c
+++ b/src/zseti.c
@@ -5,7 +5,7 @@
 void
 zseti(z_t a, long long int b)
 {
-       if (b >= 0) {
+       if (EXPECT(b >= 0, 0)) {
                zsetu(a, (unsigned long long int)b);
        } else {
                zsetu(a, (unsigned long long int)-b);
diff --git a/src/zsets.c b/src/zsets.c
index d4bedc0..24ca082 100644
--- a/src/zsets.c
+++ b/src/zsets.c
@@ -13,12 +13,12 @@ zsets(z_t a, const char *str)
 
        str += neg || (*str == '+');
 
-       if (!*str) {
+       if (EXPECT(!*str, 0)) {
                errno = EINVAL;
                return -1;
        }
        for (str_end = str; *str_end; str_end++) {
-               if (!isdigit(*str_end)) {
+               if (EXPECT(!isdigit(*str_end), 0)) {
                        errno = EINVAL;
                        return -1;
                }
diff --git a/src/zsplit.c b/src/zsplit.c
index 8f51721..68810d2 100644
--- a/src/zsplit.c
+++ b/src/zsplit.c
@@ -5,13 +5,13 @@
 void
 zsplit(z_t high, z_t low, z_t a, size_t delim)
 {
-       if (zzero(a)) {
+       if (EXPECT(zzero(a), 0)) {
                SET_SIGNUM(high, 0);
                SET_SIGNUM(low, 0);
                return;
        }
 
-       if (high == a) {
+       if (EXPECT(high == a, 0)) {
                ztrunc(low, a, delim);
                zrsh(high, a, delim);
        } else {
diff --git a/src/zsqr.c b/src/zsqr.c
index 39f5591..9601eb6 100644
--- a/src/zsqr.c
+++ b/src/zsqr.c
@@ -13,7 +13,7 @@ zsqr(z_t a, z_t b)
        z_t z0, z1, z2, high, low;
        int sign;
 
-       if (zzero(b)) {
+       if (EXPECT(zzero(b), 0)) {
                SET_SIGNUM(a, 0);
                return;
        }
diff --git a/src/zsub.c b/src/zsub.c
index cdb13f4..bf918da 100644
--- a/src/zsub.c
+++ b/src/zsub.c
@@ -10,11 +10,11 @@ zsub_unsigned(z_t a, z_t b, z_t c)
        size_t i, n;
        int magcmp;
 
-       if (zzero(b)) {
+       if (EXPECT(zzero(b), 0)) {
                zabs(a, c);
                zneg(a, a);
                return;
-       } else if (zzero(c)) {
+       } else if (EXPECT(zzero(c), 0)) {
                zabs(a, b);
                return;
        }
@@ -61,13 +61,13 @@ zsub_unsigned(z_t a, z_t b, z_t c)
 void
 zsub(z_t a, z_t b, z_t c)
 {
-       if (b == c) {
+       if (EXPECT(b == c, 0)) {
                SET_SIGNUM(a, 0);
-       } else if (zzero(b)) {
+       } else if (EXPECT(zzero(b), 0)) {
                zneg(a, c);
-       } else if (zzero(c)) {
+       } else if (EXPECT(zzero(c), 0)) {
                SET(a, b);
-       } else if ((zsignum(b) | zsignum(c)) < 0) {
+       } else if (EXPECT((zsignum(b) | zsignum(c)) < 0, 0)) {
                if (zsignum(b) < 0) {
                        if (zsignum(c) < 0) {
                                zsub_unsigned(a, c, b);
diff --git a/src/ztrunc.c b/src/ztrunc.c
index c634707..7dcafd9 100644
--- a/src/ztrunc.c
+++ b/src/ztrunc.c
@@ -6,9 +6,9 @@ void
 ztrunc(z_t a, z_t b, size_t bits)
 {
        zahl_char_t mask = 1;
-       size_t chars, i;
+       size_t chars;
 
-       if (zzero(b)) {
+       if (EXPECT(zzero(b), 0)) {
                SET_SIGNUM(a, 0);
                return;
        }
@@ -16,21 +16,20 @@ ztrunc(z_t a, z_t b, size_t bits)
        chars = CEILING_BITS_TO_CHARS(bits);
        a->sign = b->sign;
        a->used = MIN(chars, b->used);
-       if (a->used < chars)
+       if (EXPECT(a->used < chars, 0))
                bits = 0;
-       if (a != b) {
+       if (EXPECT(a != b, 1)) {
                ENSURE_SIZE(a, a->used);
                zmemcpy(a->chars, b->chars, a->used);
        }
        bits = BITS_IN_LAST_CHAR(bits);
-       if (bits) {
+       if (EXPECT(!!bits, 1)) {
                mask <<= bits;
                mask -= 1;
                a->chars[a->used - 1] &= mask;
        }
 
-       for (i = a->used; i--;)
-               if (a->chars[i])
-                       return;
-       SET_SIGNUM(a, 0);
+       TRIM(a);
+       if (!a->used)
+               SET_SIGNUM(a, 0);
 }
diff --git a/src/zxor.c b/src/zxor.c
index d152ed7..da56a24 100644
--- a/src/zxor.c
+++ b/src/zxor.c
@@ -7,13 +7,13 @@ zxor(z_t a, z_t b, z_t c)
 {
        size_t n, m;
 
-       if (zzero(b)) {
+       if (EXPECT(zzero(b), 0)) {
                if (zzero(c))
                        SET_SIGNUM(a, 0);
                else
                        SET(a, c);
                return;
-       } else if (zzero(c)) {
+       } else if (EXPECT(zzero(c), 0)) {
                SET(a, b);
                return;
        }
@@ -28,7 +28,7 @@ zxor(z_t a, z_t b, z_t c)
                        zmemcpy(a->chars + n, c->chars + n, m - n);
                while (n--)
                        a->chars[n] ^= c->chars[n];
-       } else if (a == c) {
+       } else if (EXPECT(a == c, 0)) {
                if (c->used < b->used)
                        zmemcpy(a->chars + n, b->chars + n, m - n);
                while (n--)
@@ -44,8 +44,7 @@ zxor(z_t a, z_t b, z_t c)
        }
 
        a->used = m;
-       while (a->used && !a->chars[a->used - 1])
-               a->used--;
+       TRIM(a);
        if (a->used)
                SET_SIGNUM(a, 1 - 2 * ((zsignum(b) ^ zsignum(c)) < 0));
        else
diff --git a/zahl.h b/zahl.h
index 9d2e654..89fc6f8 100644
--- a/zahl.h
+++ b/zahl.h
@@ -98,8 +98,6 @@ void zrsh(z_t, z_t, size_t);           /* a := b >> c */
 void ztrunc(z_t, z_t, size_t);         /* a := b & ((1 << c) - 1) */
 int zbtest(z_t, size_t);               /* (a >> b) & 1 */
 void zsplit(z_t, z_t, z_t, size_t);    /* a := c >> d, b := c - (a << d) */
-size_t zbits(z_t);                     /* ⌊log₂ |a|⌋ + 1, 1 if a = 0 */
-size_t zlsb(z_t);                      /* Index of first set bit, SIZE_MAX if 
none are set. */
 
 /* If d > 0: a := b | (1 << c), f d = 0: a := b & ~(1 << c), if d < 0: a := b 
^ (1 << c) */
 void zbset(z_t, z_t, size_t, int);
@@ -147,3 +145,66 @@ static inline int zzero(z_t a)         { return !a->sign; }
 static inline int zsignum(z_t a)       { return a->sign; }                     
     /* a/|a|, 0 if a is zero. */
 static inline void zabs(z_t a, z_t b)  { if (a != b) zset(a, b); a->sign = 
!!a->sign; }  /* a := |b| */
 static inline void zneg(z_t a, z_t b)  { if (a != b) zset(a, b); a->sign = 
-a->sign; }   /* a := -b */
+
+
+/* Bitwise inline functions. */
+
+
+/* Index of first set bit, SIZE_MAX if none are set. */
+#if defined(__GNUC__) || defined(__clang__)
+static inline size_t
+zlsb(z_t a)
+{
+       size_t i = 0;
+       if (__builtin_expect(zzero(a), 0))
+               return SIZE_MAX;
+       for (; !a->chars[i]; i++);
+       i *= 8 * sizeof(zahl_char_t);
+       i += __builtin_ctzll(a->chars[i]);
+       return i;
+}
+#else
+static inline size_t
+zlsb(z_t a)
+{
+       size_t i = 0;
+       zahl_char_t x;
+       if (zzero(a))
+               return SIZE_MAX;
+       for (; !a->chars[i]; i++);
+       i *= 8 * sizeof(zahl_char_t);
+       x = ~(a->chars[i]);
+       for (; x & 1; x >>= 1, i++);
+       return i;
+}
+#endif
+
+
+/* ⌊log₂ |a|⌋ + 1, 1 if a = 0 */
+#if defined(__GNUC__) || defined(__clang__)
+static inline size_t
+zbits(z_t a)
+{
+       size_t rc;
+       if (__builtin_expect(zzero(a), 0))
+               return 1;
+       while (!a->chars[a->used - 1])  a->used--; /* TODO should not be 
necessary */
+       rc = a->used * 8 * sizeof(zahl_char_t);
+       rc -= __builtin_clzll(a->chars[a->used - 1]);
+       return rc;
+}
+#else
+static inline size_t
+zbits(z_t a)
+{
+       size_t rc;
+       zahl_char_t x;
+       if (zzero(a))
+               return 1;
+       while (!a->chars[a->used - 1])  a->used--; /* TODO should not be 
necessary */
+       x = a->chars[a->used - 1];
+       rc = (a->used - 1) * 8 * sizeof(zahl_char_t);
+       for (; x; x >>= 1, rc++);
+       return rc;
+}
+#endif

Reply via email to