commit 4bba6e7176632b4d760ba9362a1515552471d741
Author:     Mattias Andrée <[email protected]>
AuthorDate: Fri Apr 29 21:54:39 2016 +0200
Commit:     Mattias Andrée <[email protected]>
CommitDate: Fri Apr 29 21:55:15 2016 +0200

    Some optimisations, fix refsheet, and disable const/pure attributes in gmp 
in benchmark
    
    Signed-off-by: Mattias Andrée <[email protected]>

diff --git a/Makefile b/Makefile
index 406209d..5e4ace9 100644
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@ include config.mk
 
 HDR_PUBLIC =\
        zahl.h\
-       zahl-internals.h\
-       zahl-inlines.h
+       zahl-inlines.h\
+       zahl-internals.h
 
 HDR_PRIVATE =\
        src/internals.h
@@ -33,12 +33,9 @@ FUN =\
        zptest\
        zrand\
        zrsh\
-       zsave\
        zset\
        zsets\
-       zsetu\
        zsetup\
-       zsplit\
        zsqr\
        zstr\
        zstr_length\
@@ -48,24 +45,27 @@ FUN =\
        zxor
 
 INLINE_FUN =\
-       zinit\
-       zswap\
-       zeven\
-       zodd\
-       zeven_nonzero\
-       zodd_nonzero\
-       zzero\
-       zsignum\
        zabs\
-       zneg\
-       zlsb\
        zbits\
-       zseti\
+       zbtest\
        zcmp\
        zcmpi\
        zcmpmag\
        zcmpu\
-       zbtest
+       zeven\
+       zeven_nonzero\
+       zinit\
+       zlsb\
+       zneg\
+       zodd\
+       zodd_nonzero\
+       zsave\
+       zseti\
+       zsetu\
+       zsignum\
+       zsplit\
+       zswap\
+       zzero
 
 DOC =\
        refsheet.pdf
diff --git a/STATUS b/STATUS
index aa8d510..5e705ee 100644
--- a/STATUS
+++ b/STATUS
@@ -16,7 +16,7 @@ zadd_unsigned ........... fastest (faster than all others' 
zadd)
 zsub_unsigned ........... fastest (faster than all others' zsub)
 zadd .................... 87 % of tomsfastmath, 83 % libtommath, 80 % of 
hebimath
 zsub .................... 97 % of tomsfastmath, 95 % hebimath, 93 % of 
libtommath
-zand .................... 93 % of gmp, 49 % of tomsfastmath
+zand .................... 49 % of tomsfastmath
 zor ..................... 36 % of tomsfastmath
 zxor .................... 51 % of tomsfastmath
 znot .................... fastest
@@ -26,16 +26,16 @@ zeven_nonzero ........... fastest (shared with gmp, 
libtommath, and tomsfastmath
 zodd_nonzero ............ fastest (shared with gmp, libtommath, and 
tomsfastmath)
 zzero ................... fastest (shared with gmp and libtommath)
 zsignum ................. fastest (shared with gmp)
-zbits ................... gmp is faster, because of bug in libzahl
-zlsb .................... fastest (shared with gmp)
+zbits ................... fastest
+zlsb .................... fastest
 zswap ................... fastest
 zlsh .................... fastest
 zrsh .................... fastest
 ztrunc(a, b, c) ......... fastest
 ztrunc(a, a, b) ......... fastest
-zsplit .................. 95 % of gmp
-zcmpmag ................. gmp is faster
-zcmp .................... 94 % of tomsfastmath, 81 % of hebimath, gmp is even 
faster (zcmpmag)
+zsplit .................. fastest
+zcmpmag ................. fastest
+zcmp .................... 94 % of tomsfastmath, 81 % of hebimath (zcmpmag)
 zcmpi ................... fastest
 zcmpu ................... fastest
 zbset(a, b, 1) .......... fastest
@@ -44,8 +44,8 @@ zbset(a, b, 0) .......... fastest
 zbset(a, a, 0) .......... fastest
 zbset(a, b, -1) ......... fastest
 zbset(a, a, -1) ......... fastest
-zbtest .................. fastest (shared with gmp)
-zgcd .................... 26 % of gmp (zcmpmag)
+zbtest .................. fastest
+zgcd .................... 17 % of gmp (zcmpmag)
 zmul .................... slowest
 zsqr .................... slowest (zmul)
 zmodmul(big mod) ........ slowest ((zmul, zmod))
diff --git a/bench/libgmp.h b/bench/libgmp.h
index de59602..7b5e1c8 100644
--- a/bench/libgmp.h
+++ b/bench/libgmp.h
@@ -1,3 +1,5 @@
+#define __GMP_NO_ATTRIBUTE_CONST_PURE
+
 #include <gmp.h>
 
 #include <setjmp.h>
diff --git a/doc/refsheet.tex b/doc/refsheet.tex
index e39c509..d0c31f7 100644
--- a/doc/refsheet.tex
+++ b/doc/refsheet.tex
@@ -106,10 +106,10 @@ Get string length of $a$      & {\tt zstr\_length(a, b)}  
 & returns {\tt size\_
 
 \textbf{Marshallisation}      & {}                         & {}                
                                \\
 Marshal $a$ into $b$          & {\tt zsave(a, b)}          & returns {\tt 
size\_t} number of saved bytes,      \\
-{}                            & {}                         & $~~~~~$ {\tt b} 
is a {\tt char *\_t}              \\
+{}                            & {}                         & $~~~~~$ {\tt b} 
is a {\tt void *\_t}              \\
 Get marshal-size of $a$       & {\tt zsave(a, NULL)}       & returns {\tt 
size\_t}                             \\
 Unmarshal $a$ from $b$        & {\tt zload(a, b)}          & returns {\tt 
size\_t} number of read bytes,       \\
-{}                            & {}                         & $~~~~~$ {\tt b} 
is a {\tt const char *\_t}        \\
+{}                            & {}                         & $~~~~~$ {\tt b} 
is a {\tt const void *\_t}        \\
 \\
 
 \textbf{Number theory}        & {}                         & {}                
                                \\
diff --git a/src/internals.h b/src/internals.h
index 6faa254..a049e1a 100644
--- a/src/internals.h
+++ b/src/internals.h
@@ -105,11 +105,12 @@ extern void *libzahl_temp_allocation;
 
 #define SET_SIGNUM(a, signum)        ZAHL_SET_SIGNUM(a, signum)
 #define SET(a, b)                    ZAHL_SET(a, b)
-#define ENSURE_SIZE(a, n)            do { if ((a)->alloced < (n)) 
libzahl_realloc(a, (n)); } while (0)
+#define ENSURE_SIZE(a, n)            ZAHL_ENSURE_SIZE(a, n)
 #define TRIM(a)                      ZAHL_TRIM(a)
 #define TRIM_NONZERO(a)              ZAHL_TRIM_NONZERO(a)
 #define TRIM_AND_ZERO(a)             ZAHL_TRIM_AND_ZERO(a)
 #define TRIM_AND_SIGN(a, s)          ZAHL_TRIM_AND_SIGN(a, s)
+#define SWAP(a, b, t, m)             ((t)->m = (a)->m, (a)->m = (b)->m, (b)->m 
= (t)->m)
 #define MIN(a, b)                    ((a) < (b) ? (a) : (b))
 #define MAX(a, b)                    ((a) > (b) ? (a) : (b))
 #define znegative(a)                 (zsignum(a) < 0)
@@ -120,8 +121,9 @@ extern void *libzahl_temp_allocation;
 #define zpositive2(a, b)             (zsignum(a) + zsignum(b) == 2)
 #define zzero2(a, b)                 (!(zsignum(a) | zsignum(b)))
 #define zmemmove(d, s, n)            memmove((d), (s), (n) * 
sizeof(zahl_char_t))
+#define zmemcpy(d, s, n)             libzahl_memcpy(d, s, n)
+#define zmemset(a, v, n)             libzahl_memset(a, v, n)
 
-void libzahl_realloc(z_t a, size_t need);
 void zmul_impl(z_t a, z_t b, z_t c);
 void zsqr_impl(z_t a, z_t b);
 
@@ -151,20 +153,6 @@ libzahl_memfailure(void)
        libzahl_failure(errno);
 }
 
-static inline void
-zmemcpy(zahl_char_t *restrict d, const zahl_char_t *restrict s, register 
size_t n)
-{
-       while (n--)
-               d[n] = s[n];
-}
-
-static inline void
-zmemset(zahl_char_t *a, register zahl_char_t v, register size_t n)
-{
-       while (n--)
-               a[n] = v;
-}
-
 /*
  * libzahl_msb_nz_zu
  *         ^^^ ^^ ^^
@@ -218,6 +206,17 @@ libzahl_add_overflow(zahl_char_t *rp, zahl_char_t a, 
zahl_char_t b)
 #endif
 
 static inline void
+zsplit_pz(z_t high, z_t low, z_t a, size_t delim)
+{
+       if (unlikely(zzero(a))) {
+               SET_SIGNUM(high, 0);
+               SET_SIGNUM(low, 0);
+       } else {
+               zsplit(high, low, a, delim);
+       }
+}
+
+static inline void
 zrsh_taint(z_t a, size_t bits)
 {
        size_t i, chars, cbits;
diff --git a/src/zbset.c b/src/zbset.c
index 3d93fd4..223b2d5 100644
--- a/src/zbset.c
+++ b/src/zbset.c
@@ -23,14 +23,14 @@
 
 
 void
-zbset_impl_set(z_t a, size_t bit)
+zbset_ll_set(z_t a, size_t bit)
 {
        PROLOGUE(1);
        a->chars[chars] |= mask;
 }
 
 void
-zbset_impl_clear(z_t a, size_t bit)
+zbset_ll_clear(z_t a, size_t bit)
 {
        PROLOGUE(0);
        a->chars[chars] &= ~mask;
@@ -38,7 +38,7 @@ zbset_impl_clear(z_t a, size_t bit)
 }
 
 void
-zbset_impl_flip(z_t a, size_t bit)
+zbset_ll_flip(z_t a, size_t bit)
 {
        PROLOGUE(1);
        a->chars[chars] ^= mask;
diff --git a/src/zmul.c b/src/zmul.c
index 6ea6d21..3129061 100644
--- a/src/zmul.c
+++ b/src/zmul.c
@@ -53,8 +53,8 @@ zmul_impl(z_t a, z_t b, z_t c)
        zinit_temp(c_high);
        zinit_temp(c_low);
 
-       zsplit(b_high, b_low, b, m2);
-       zsplit(c_high, c_low, c, m2);
+       zsplit_pz(b_high, b_low, b, m2);
+       zsplit_pz(c_high, c_low, c, m2);
 
 
        zmul_impl(z0, b_low, c_low);
diff --git a/src/zsave.c b/src/zsave.c
deleted file mode 100644
index 8b08f2a..0000000
--- a/src/zsave.c
+++ /dev/null
@@ -1,16 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-size_t
-zsave(z_t a, void *buffer)
-{
-       if (buffer) {
-               char *buf = buffer;
-               *((int *)buf)    = a->sign, buf += sizeof(int);
-               *((size_t *)buf) = a->used, buf += sizeof(size_t);
-               if (likely(!zzero(a)))
-                       zmemcpy((zahl_char_t *)buf, a->chars, a->used);
-       }
-       return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * 
sizeof(zahl_char_t));
-}
diff --git a/src/zsetu.c b/src/zsetu.c
deleted file mode 100644
index 42e8cec..0000000
--- a/src/zsetu.c
+++ /dev/null
@@ -1,16 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-void
-zsetu(z_t a, uint64_t b)
-{
-       if (!b) {
-               SET_SIGNUM(a, 0);
-               return;
-       }
-       ENSURE_SIZE(a, 1);
-       SET_SIGNUM(a, 1);
-       a->chars[0] = (zahl_char_t)b;
-       a->used = 1;
-}
diff --git a/src/zsplit.c b/src/zsplit.c
deleted file mode 100644
index afb8a33..0000000
--- a/src/zsplit.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-void
-zsplit(z_t high, z_t low, z_t a, size_t delim)
-{
-       if (unlikely(zzero(a))) {
-               /* This is for performance. */
-               SET_SIGNUM(high, 0);
-               SET_SIGNUM(low, 0);
-               return;
-       }
-
-       if (unlikely(high == a)) {
-               ztrunc(low, a, delim);
-               zrsh(high, a, delim);
-       } else {
-               zrsh(high, a, delim);
-               ztrunc(low, a, delim);
-       }
-}
diff --git a/zahl-inlines.h b/zahl-inlines.h
index 7c06270..abea865 100644
--- a/zahl-inlines.h
+++ b/zahl-inlines.h
@@ -14,19 +14,12 @@ ZAHL_INLINE void zneg(z_t a, z_t b)   { ZAHL_SET(a, b); 
a->sign = -a->sign; }
 ZAHL_INLINE void
 zswap(z_t a, z_t b)
 {
+       /* Almost three times faster than the naïve method. */
        z_t t;
-       t->sign = a->sign;
-       a->sign = b->sign;
-       b->sign = t->sign;
-       t->used = b->used;
-       b->used = a->used;
-       a->used = t->used;
-       t->alloced = a->alloced;
-       a->alloced = b->alloced;
-       b->alloced = t->alloced;
-       t->chars = b->chars;
-       b->chars = a->chars;
-       a->chars = t->chars;
+       ZAHL_SWAP(a, b, t, sign);
+       ZAHL_SWAP(b, a, t, used);
+       ZAHL_SWAP(a, b, t, alloced);
+       ZAHL_SWAP(b, a, t, chars);
 }
 
 
@@ -42,6 +35,20 @@ zseti(z_t a, int64_t b)
 }
 
 
+ZAHL_INLINE void
+zsetu(z_t a, uint64_t b)
+{
+       if (!b) {
+               ZAHL_SET_SIGNUM(a, 0);
+               return;
+       }
+       ZAHL_ENSURE_SIZE(a, 1);
+       ZAHL_SET_SIGNUM(a, 1);
+       a->chars[0] = (zahl_char_t)b;
+       a->used = 1;
+}
+
+
 ZAHL_INLINE size_t
 zlsb(z_t a)
 {
@@ -140,10 +147,6 @@ zcmpi(z_t a, int64_t b)
 }
 
 
-void zbset_impl_set(z_t a, size_t bit);
-void zbset_impl_clear(z_t a, size_t bit);
-void zbset_impl_flip(z_t a, size_t bit);
-
 ZAHL_INLINE void
 zbset(z_t a, z_t b, size_t bit, int action)
 {
@@ -174,11 +177,11 @@ fallback:
 #endif
 
        if (action > 0)
-               zbset_impl_set(a, bit);
+               zbset_ll_set(a, bit);
        else if (action < 0)
-               zbset_impl_flip(a, bit);
+               zbset_ll_flip(a, bit);
        else
-               zbset_impl_clear(a, bit);
+               zbset_ll_clear(a, bit);
 }
 
 
@@ -196,3 +199,30 @@ zbtest(z_t a, size_t bit)
        bit &= ZAHL_BITS_IN_LAST_CHAR(bit);
        return (a->chars[chars] >> bit) & 1;
 }
+
+
+ZAHL_INLINE void
+zsplit(z_t high, z_t low, z_t a, size_t delim)
+{
+       if (ZAHL_UNLIKELY(high == a)) {
+               ztrunc(low, a, delim);
+               zrsh(high, a, delim);
+       } else {
+               zrsh(high, a, delim);
+               ztrunc(low, a, delim);
+       }
+}
+
+
+ZAHL_INLINE size_t
+zsave(z_t a, void *buffer)
+{
+       if (ZAHL_LIKELY(buffer)) {
+               char *buf = buffer;
+               *((int *)buf)    = a->sign, buf += sizeof(int);
+               *((size_t *)buf) = a->used, buf += sizeof(size_t);
+               if (ZAHL_LIKELY(!zzero(a)))
+                       libzahl_memcpy((zahl_char_t *)buf, a->chars, a->used);
+       }
+       return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * 
sizeof(zahl_char_t));
+}
diff --git a/zahl-internals.h b/zahl-internals.h
index 9d6df24..4adbdfe 100644
--- a/zahl-internals.h
+++ b/zahl-internals.h
@@ -32,10 +32,12 @@
 
 #define ZAHL_SET_SIGNUM(a, signum)        ((a)->sign = (signum))
 #define ZAHL_SET(a, b)                    do { if ((a) != (b)) zset(a, b); } 
while (0)
+#define ZAHL_ENSURE_SIZE(a, n)            do { if ((a)->alloced < (n)) 
libzahl_realloc(a, (n)); } while (0)
 #define ZAHL_TRIM(a)                      for (; (a)->used && 
!(a)->chars[(a)->used - 1]; (a)->used--)
 #define ZAHL_TRIM_NONZERO(a)              for (; !(a)->chars[(a)->used - 1]; 
(a)->used--)
 #define ZAHL_TRIM_AND_ZERO(a)             do { ZAHL_TRIM(a); if (!(a)->used) 
ZAHL_SET_SIGNUM(a, 0); } while (0)
 #define ZAHL_TRIM_AND_SIGN(a, s)          do { ZAHL_TRIM(a); 
ZAHL_SET_SIGNUM(a, (a)->used ? (s) : 0); } while (0)
+#define ZAHL_SWAP(a, b, t, m)             ((t)->m = (a)->m, (a)->m = (b)->m, 
(b)->m = (t)->m)
 
 
 #if defined(__GNUC__) || defined(__clang__)
@@ -72,3 +74,20 @@ struct zahl {
         size_t alloced;
         zahl_char_t *chars;
 };
+
+
+void libzahl_realloc(struct zahl *, size_t);
+
+static inline void
+libzahl_memcpy(zahl_char_t *restrict d, const zahl_char_t *restrict s, 
register size_t n)
+{
+       while (n--)
+               d[n] = s[n];
+}
+
+static inline void
+libzahl_memset(zahl_char_t *a, register zahl_char_t v, register size_t n)
+{
+       while (n--)
+               a[n] = v;
+}
diff --git a/zahl.h b/zahl.h
index f3d3d69..1e27882 100644
--- a/zahl.h
+++ b/zahl.h
@@ -66,14 +66,14 @@ void zunsetup(void);                    /* Free resources 
used by libzahl */
 ZAHL_INLINE void zinit(z_t);            /* Prepare a for use. */
 ZAHL_INLINE void zswap(z_t, z_t);       /* (a, b) := (b, a) */
 void zfree(z_t);                        /* Free resources in a. */
-size_t zsave(z_t, void *);              /* Store a into b (if !!b), and return 
number of written bytes. */
+ZAHL_INLINE size_t zsave(z_t, void *);  /* Store a into b (if !!b), and return 
number of written bytes. */
 size_t zload(z_t, const void *);        /* Restore a from b, and return number 
of read bytes. */
 
 
 /* Assignment functions. */
 
 void zset(z_t, z_t);                    /* a := b */
-void zsetu(z_t, uint64_t);              /* a := b */
+ZAHL_INLINE void zsetu(z_t, uint64_t);  /* a := b */
 ZAHL_INLINE void zseti(z_t, int64_t);   /* a := b */
 
 /* Comparison functions. */
@@ -119,7 +119,8 @@ void znot(z_t, z_t);                    /* a := ~b */
 void zlsh(z_t, z_t, size_t);            /* a := b << c */
 void zrsh(z_t, z_t, size_t);            /* a := b >> c */
 void ztrunc(z_t, z_t, size_t);          /* a := b & ((1 << c) - 1) */
-void zsplit(z_t, z_t, z_t, size_t);     /* a := c >> d, b := c - (a << d) */
+ZAHL_INLINE void zsplit(z_t, z_t, z_t, size_t);
+                                        /* a := c >> d, b := c - (a << d) */
 ZAHL_INLINE int zbtest(z_t, size_t);    /* (a >> b) & 1 */
 ZAHL_INLINE size_t zlsb(z_t);           /* Index of first set bit, SIZE_MAX if 
none are set. */
 ZAHL_INLINE size_t zbits(z_t);          /* ⌊log₂ |a|⌋ + 1, 1 if a = 0 */
@@ -165,6 +166,14 @@ void zperror(const char *);             /* Identical to 
perror(3p) except it sup
 
 
 
+/* Low-level functions. [Do not count on these to be retained between 
different versions of libzahl.] */
+
+void zbset_ll_set(z_t, size_t);         /* zbset(a, a, b, 1) */
+void zbset_ll_clear(z_t, size_t);       /* zbset(a, a, b, 0) */
+void zbset_ll_flip(z_t, size_t);        /* zbset(a, a, b, -1) */
+
+
+
 #include "zahl-inlines.h"
 
 #endif

Reply via email to