preemptively fix an out of bounds write in the zmul patch, where

zsplit(c_high, c_low, c, m2)
 zrsh(c_high, c, m2)
  zmemcpy(c_high, &c[m2>>6], c->used - (m2>>6))  (assumes alloced is a multiple
of 4, then writes into b_low->chars, causing disaster)

in this case there is &c_high->chars[c_high->alloced] == &b_low->chars[0], see
zmul_bump_alloc_temps() in part 2.
---
 zahl/memory.h | 99 ++++++++++++++++++---------------------------------
 1 file changed, 35 insertions(+), 64 deletions(-)

diff --git a/zahl/memory.h b/zahl/memory.h
index 797beab..43ba0a1 100644
--- a/zahl/memory.h
+++ b/zahl/memory.h
@@ -40,34 +40,20 @@ libzahl_memcpy(register zahl_char_t *restrict d, register 
const zahl_char_t *res
        LIBZAHL_SMALL_INPUT_BEGIN(n);
        {
 #if defined(__x86_64__) && !defined(ZAHL_NO_ASM)
-               /* This crap is needed for clang. */
-               register zahl_char_t t;
-               __asm__ __volatile__ (
-# if defined(ZAHL_ISA_MISSING_INDIRECT_JUMP)
-                       "\n    testq %[e], %[e]"
-                       "\n    jz 2f"
-# endif
-                       "\n    shlq $3, %[e]"
-                       "\n    addq %[d], %[e]"
-                       "\n 1:"
-                       "\n    movq 0(%[s]), %[t]"
-                       "\n    movq %[t], 0(%[d])"
-                       "\n    movq 8(%[s]), %[t]"
-                       "\n    movq %[t], 8(%[d])"
-                       "\n    movq 16(%[s]), %[t]"
-                       "\n    movq %[t], 16(%[d])"
-                       "\n    movq 24(%[s]), %[t]"
-                       "\n    movq %[t], 24(%[d])"
-                       "\n    addq $32, %[s]"
-                       "\n    addq $32, %[d]"
-                       "\n    cmpq %[e], %[d]"
-                       "\n    jl 1b"
-# if defined(ZAHL_ISA_MISSING_INDIRECT_JUMP)
-                       "\n 2:"
-# endif
-                       : [t]"=r"(t), [d]"+r"(d), [s]"+r"(s), [e]"+r"(n));
+               __asm__ volatile (
+                       "pushf"     "\n"
+                       "cld"       "\n"
+                       "rep movsq" "\n"
+                       "popf"      "\n"
+                       : "+c" (n), "+D" (d), "+S" (s)
+                       :
+                       : "memory"
+               );
 #else
                size_t i;
+               for (; n&3; n--)
+                       *d++ = *s++;
+
                for (i = 0; i < n; i += 4) {
                        d[i + 0] = s[i + 0];
                        d[i + 1] = s[i + 1];
@@ -85,6 +71,9 @@ ZAHL_INLINE void
 libzahl_memset(register zahl_char_t *a, register zahl_char_t v, size_t n)
 {
        size_t i;
+       for (; n&3; n--)
+               *a++ = v;
+
        for (i = 0; i < n; i += 4) {
                a[i + 0] = v;
                a[i + 1] = v;
@@ -96,45 +85,22 @@ libzahl_memset(register zahl_char_t *a, register 
zahl_char_t v, size_t n)
 ZAHL_INLINE void
 libzahl_memset_precise(register zahl_char_t *a, register zahl_char_t v, size_t 
n)
 {
-       size_t i;
-       if (n <= 4) {
-               if (n >= 1)
-                       a[0] = v;
-               if (n >= 2)
-                       a[1] = v;
-               if (n >= 3)
-                       a[2] = v;
-               if (n >= 4)
-                       a[3] = v;
-       } else {
-               for (i = 0; (i += 4) <= n;) {
-                       a[i - 1] = v;
-                       a[i - 2] = v;
-                       a[i - 3] = v;
-                       a[i - 4] = v;
-               }
-               if (i > n)
-                       for (i -= 4; i < n; i++)
-                               a[i] = v;
-       }
+       libzahl_memset(a, v, n);
 }
 
 
 ZAHL_INLINE void
 libzahl_memmovef(register zahl_char_t *d, register const zahl_char_t *s, 
size_t n)
 {
-       if (n && n < 4) {
-               d[0] = s[0];
-               d[1] = s[1];
-               d[2] = s[2];
-       } else {
-               size_t i;
-               for (i = 0; i < n; i += 4) {
-                       d[i + 0] = s[i + 0];
-                       d[i + 1] = s[i + 1];
-                       d[i + 2] = s[i + 2];
-                       d[i + 3] = s[i + 3];
-               }
+       size_t i;
+       for (; n&3; n--)
+               *d++ = *s++;
+
+       for (i = 0; i < n; i += 4) {
+               d[i + 0] = s[i + 0];
+               d[i + 1] = s[i + 1];
+               d[i + 2] = s[i + 2];
+               d[i + 3] = s[i + 3];
        }
 }
 
@@ -144,11 +110,16 @@ libzahl_memmoveb(register zahl_char_t *d, register const 
zahl_char_t *s, size_t
        ssize_t i;
 #define LIBZAHL_X(I)  case I:  d[I - 1] = s[I - 1];
        LIBZAHL_SMALL_INPUT_BEGIN(n);
-       for (i = ((ssize_t)n + 3) & ~3; (i -= 4) >= 0;) {
-               d[i + 3] = s[i + 3];
-               d[i + 2] = s[i + 2];
-               d[i + 1] = s[i + 1];
-               d[i + 0] = s[i + 0];
+       {
+               for (; n&3; n--)
+                       d[n - 1] = s[n - 1];
+               for (i = n; (i -= 4) >= 0;) {
+                       d[i + 3] = s[i + 3];
+                       d[i + 2] = s[i + 2];
+                       d[i + 1] = s[i + 1];
+                       d[i + 0] = s[i + 0];
+               }
+               break;
        }
        LIBZAHL_SMALL_INPUT_END;
 #undef LIBZAHL_X
-- 
2.53.0



Reply via email to