[Chicken-hackers] [PATCH] Fix undefined behaviour of shifting left signed values

Peter Bex Sun, 21 Jun 2015 06:45:00 -0700

Hello all,

I ran another test with the sanitizer, this time using the undefined
behaviour checks (-fsanitize=undefined).  This found a few problems:


- We are using left shifts on signed types.  According to the C spec
   this behaviour is undefined (which is completely retarded, IMHO).
   The even more retarded fix is to simply cast to unsigned and back to
   signed whereever this is done.  The attached patch does this for
   master and chicken-5.

- We are dividing floating-point numbers by zero, which is apparently
   undefined.  However, we do this to obtain NaN and Inf values.
   As far as I can determine, there is no really portable way to do
   this.  See also for example this Stack Overflow thread:
   http://stackoverflow.com/questions/1923837/how-to-use-nan-and-inf-in-c
   It looks like INFINITY is defined in C99, and NAN *may* be.

Fixing the signed shift did not fix the build on Macppc, so effectively
this patch "does nothing" and I wasn't sure I should even be sending
this mail.  However, it might make the code a little more robust if
there are C compilers that abuse this undefined behaviour in strange
ways.

Any thoughts on whether we should, and if so, how to, fix the undefined
division by zero behaviour?  In CHICKEN 5, all these uses seem to be in
functions marked OBSOLETE, so that "problem" may go away by itself.

Cheers,
Peter

From c9635d922430717ed4ccdb1d3df41d6beed44aeb Mon Sep 17 00:00:00 2001
From: Peter Bex <[email protected]>
Date: Sat, 20 Jun 2015 21:04:01 +0200
Subject: [PATCH] Avoid shifting signed values, which is undefined behaviour in
 C.

---
 chicken.h | 16 ++++++++--------
 runtime.c | 30 +++++++++++++++---------------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/chicken.h b/chicken.h
index f53fb3d..c0f437c 100644
--- a/chicken.h
+++ b/chicken.h
@@ -910,7 +910,7 @@ DECL_C_PROC_p0 (128,  1,0,0,0,0,0,0,0)
 /* This is word-size dependent: */
 #ifdef C_SIXTY_FOUR
 # define C_align(n)                C_align8(n)
-# define C_wordstobytes(n)         ((n) << 3)
+# define C_wordstobytes(n)         ((C_uword)(n) << 3)
 # define C_bytestowords(n)         (((n) + 7) >> 3)
 # define C_wordsperdouble(n)       (n)
 # define C_WORD_MIN                LONG_MIN
@@ -918,9 +918,9 @@ DECL_C_PROC_p0 (128,  1,0,0,0,0,0,0,0)
 # define C_UWORD_MAX               ULONG_MAX
 #else
 # define C_align(n)                C_align4(n)
-# define C_wordstobytes(n)         ((n) << 2)
+# define C_wordstobytes(n)         ((C_uword)(n) << 2)
 # define C_bytestowords(n)         (((n) + 3) >> 2)
-# define C_wordsperdouble(n)       ((n) << 1)
+# define C_wordsperdouble(n)       ((C_uword)(n) << 1)
 # define C_WORD_MIN                INT_MIN
 # define C_WORD_MAX                INT_MAX
 # define C_UWORD_MAX               UINT_MAX
@@ -1134,7 +1134,7 @@ DECL_C_PROC_p0 (128,  1,0,0,0,0,0,0,0)
 #endif
 #define C_stack_pointer_test       ((C_word *)C_alloca(1))
 #define C_demand_2(n)              (((C_word *)C_fromspace_top + (n)) < (C_word *)C_fromspace_limit)
-#define C_fix(n)                   (((C_word)(n) << C_FIXNUM_SHIFT) | C_FIXNUM_BIT)
+#define C_fix(n)                   ((C_word)((C_uword)(n) << C_FIXNUM_SHIFT) | C_FIXNUM_BIT)
 #define C_unfix(x)                 C_CHECKp(x,C_fixnump(C_VAL1(x)),((C_VAL1(x)) >> C_FIXNUM_SHIFT))
 #define C_make_character(c)        (((((C_uword)(c)) & C_CHAR_BIT_MASK) << C_CHAR_SHIFT) | C_CHARACTER_BITS)
 #define C_character_code(x)        C_CHECKp(x,C_charp(C_VAL1(x)),((C_word)(C_VAL1(x)) >> C_CHAR_SHIFT) & C_CHAR_BIT_MASK)
@@ -1155,7 +1155,7 @@ DECL_C_PROC_p0 (128,  1,0,0,0,0,0,0,0)
 #define C_fitsinbignumhalfdigitp(n)(C_BIGNUM_DIGIT_HI_HALF(n) == 0)
 #define C_bignum_negated_fitsinfixnump(b) (C_bignum_size(b) == 1 && (C_bignum_negativep(b) ? C_ufitsinfixnump(*C_bignum_digits(b)) : !(*C_bignum_digits(b) & C_INT_SIGN_BIT) && C_fitsinfixnump(-(C_word)*C_bignum_digits(b))))
 #define C_bignum_mutate_size(b, s) (C_block_header(C_internal_bignum_vector(b)) = (C_STRING_TYPE | C_wordstobytes((s)+1)))
-#define C_fitsinfixnump(n)         (((n) & C_INT_SIGN_BIT) == (((n) & C_INT_TOP_BIT) << 1))
+#define C_fitsinfixnump(n)         (((n) & C_INT_SIGN_BIT) == (((C_uword)(n) & C_INT_TOP_BIT) << 1))
 #define C_ufitsinfixnump(n)        (((n) & (C_INT_SIGN_BIT | (C_INT_SIGN_BIT >> 1))) == 0)
 #define C_and(x, y)                (C_truep(x) ? (y) : C_SCHEME_FALSE)
 #define C_c_bytevector(x)          ((unsigned char *)C_data_pointer(x))
@@ -1284,7 +1284,7 @@ DECL_C_PROC_p0 (128,  1,0,0,0,0,0,0,0)
 #define C_fixnum_or(n1, n2)             (C_u_fixnum_or(n1, n2) | C_FIXNUM_BIT)
 #define C_fixnum_xor(n1, n2)            (((n1) ^ (n2)) | C_FIXNUM_BIT)
 #define C_fixnum_not(n)                 ((~(n)) | C_FIXNUM_BIT)
-#define C_fixnum_shift_left(n1, n2)     (C_fix(C_unfix(n1) << C_unfix(n2)))
+#define C_fixnum_shift_left(n1, n2)     (C_fix(((C_uword)C_unfix(n1) << (C_uword)C_unfix(n2))))
 #define C_fixnum_shift_right(n1, n2)    (((n1) >> C_unfix(n2)) | C_FIXNUM_BIT)
 /* XXX TODO OBSOLETE, but still used by C_fixnum_negate, which is fxneg */
 #define C_u_fixnum_negate(n)            (-(n) + 2 * C_FIXNUM_BIT)
@@ -2561,7 +2561,7 @@ C_inline C_s64 C_num_to_int64(C_word x)
   } else if (C_truep(C_bignump(x))) {
     C_s64 num = C_bignum_digits(x)[0];
 #ifndef C_SIXTY_FOUR
-    if (C_bignum_size(x) > 1) num |= ((C_s64)C_bignum_digits(x)[1]) << 32;
+    if (C_bignum_size(x) > 1) num |= (C_s64)(((C_u64)C_bignum_digits(x)[1]) << 32);
 #endif
     if (C_bignum_negativep(x)) return -num;
     else return num;
@@ -3068,7 +3068,7 @@ C_inline C_word C_i_fixnum_bit_setp(C_word n, C_word i)
     } else {
       i = C_unfix(i);
       if (i >= C_WORD_SIZE) return C_mk_bool(n & C_INT_SIGN_BIT);
-      else return C_mk_bool((C_unfix(n) & ((C_word)1 << i)) != 0);
+      else return C_mk_bool((C_unfix(n) & (C_word)((C_uword)1 << i)) != 0);
     }
 }
 
diff --git a/runtime.c b/runtime.c
index 6393896..ce2d80f 100644
--- a/runtime.c
+++ b/runtime.c
@@ -244,7 +244,7 @@ extern void _C_do_apply_hack(void *proc, C_word *args, int count) C_noret;
 #define free_tmp_bignum(b)           C_free((void *)(b))
 #define is_fptr(x)                   (((x) & C_GC_FORWARDING_BIT) != 0)
 #define ptr_to_fptr(x)               ((((x) >> FORWARDING_BIT_SHIFT) & 1) | C_GC_FORWARDING_BIT | ((x) & ~1))
-#define fptr_to_ptr(x)               (((x) << FORWARDING_BIT_SHIFT) | ((x) & ~(C_GC_FORWARDING_BIT | 1)))
+#define fptr_to_ptr(x)               (((C_uword)(x) << FORWARDING_BIT_SHIFT) | ((x) & ~(C_GC_FORWARDING_BIT | 1)))
 
 #define C_check_real(x, w, v)       if(((x) & C_FIXNUM_BIT) != 0) v = C_unfix(x); \
                                      else if(C_immediatep(x) || C_block_header(x) != C_FLONUM_TAG) \
@@ -2583,7 +2583,7 @@ C_regparm C_word C_fcall C_static_bignum(C_word **ptr, int len, C_char *str)
     negp = ((*str++) == '-') ? 1 : 0;
     --len;
   }
-  size = C_BIGNUM_BITS_TO_DIGITS(len << 2);
+  size = C_BIGNUM_BITS_TO_DIGITS((unsigned int)len << 2);
 
   dptr = (C_word *)C_malloc(C_wordstobytes(C_SIZEOF_INTERNAL_BIGNUM_VECTOR(size)));
   if(dptr == NULL)
@@ -6337,7 +6337,7 @@ C_regparm C_word C_fcall C_a_i_arithmetic_shift(C_word **a, int c, C_word n1, C_
 
   if(sgn < 0) {
     if(s < 0) nn >>= -s;
-    else nn <<= s;
+    else nn = (C_word)((C_uword)nn << s);
 
     if(C_fitsinfixnump(nn)) return C_fix(nn);
     else return C_flonum(a, nn);
@@ -6374,7 +6374,7 @@ C_s_a_i_arithmetic_shift(C_word **ptr, C_word n, C_word x, C_word y)
     } else if (y > 0 && y < C_WORD_SIZE-2 &&
                /* After shifting, the length still fits a fixnum */
                (C_ilen(C_unfix(x)) + y) < C_WORD_SIZE-2) {
-      return C_fix(C_unfix(x) << y);
+      return C_fix((C_uword)C_unfix(x) << y);
     } else {
       x = C_a_u_i_fix_to_big(&a, x);
     }
@@ -7829,7 +7829,7 @@ bignum_times_bignum_karatsuba(C_word **ptr, C_word x, C_word y, C_word negp)
 
    /* top(x) = a << (bits - 1)  and  bottom(y) = ((b + (a - c)) << bits) + b */
    bits = C_unfix(n) * C_BIGNUM_DIGIT_LENGTH;
-   x = o[i++] = C_s_a_i_arithmetic_shift(&ka, 2, a, C_fix(bits << 1));
+   x = o[i++] = C_s_a_i_arithmetic_shift(&ka, 2, a, C_fix((C_uword)bits << 1));
    c = o[i++] = C_s_a_u_i_integer_minus(&ka, 2, a, c);
    c = o[i++] = C_s_a_u_i_integer_plus(&ka, 2, b, c);
    c = o[i++] = C_s_a_i_arithmetic_shift(&ka, 2, c, C_fix(bits));
@@ -9306,8 +9306,8 @@ C_regparm double C_fcall C_bignum_to_double(C_word bignum)
   C_uword *start = C_bignum_digits(bignum),
           *scan = start + C_bignum_size(bignum);
   while (start < scan) {
-    accumulator *= (C_word)1 << C_BIGNUM_HALF_DIGIT_LENGTH;
-    accumulator *= (C_word)1 << C_BIGNUM_HALF_DIGIT_LENGTH;
+    accumulator *= (C_uword)1 << C_BIGNUM_HALF_DIGIT_LENGTH;
+    accumulator *= (C_uword)1 << C_BIGNUM_HALF_DIGIT_LENGTH;
     accumulator += (*--scan);
   }
   return(C_bignum_negativep(bignum) ? -accumulator : accumulator);
@@ -10358,9 +10358,9 @@ bignum_destructive_divide_normalized(C_word big_u, C_word big_v, C_word big_q)
     rhat = hat % vn_1;
 
     /* Two whiles is faster than one big check with an OR.  Thanks, Gauche! */
-    while(qhat >= (1L << C_BIGNUM_HALF_DIGIT_LENGTH)) { qhat--; rhat += vn_1; }
+    while(qhat >= (1UL << C_BIGNUM_HALF_DIGIT_LENGTH)) { qhat--; rhat += vn_1; }
     while(qhat * vn_2 > C_BIGNUM_DIGIT_COMBINE(rhat, C_uhword_ref(u, j+n-2))
-	  && rhat < (1L << C_BIGNUM_HALF_DIGIT_LENGTH)) {
+	  && rhat < (1UL << C_BIGNUM_HALF_DIGIT_LENGTH)) {
       qhat--;
       rhat += vn_1;
     }
@@ -11043,7 +11043,7 @@ C_regparm C_word C_fcall convert_string_to_number(C_char *str, int radix, C_word
 
     return 0;
   }
-  else if((n & C_INT_SIGN_BIT) != ((n << 1) & C_INT_SIGN_BIT)) { /* doesn't fit into fixnum? */
+  else if((n & C_INT_SIGN_BIT) != (((C_uword)n << 1) & C_INT_SIGN_BIT)) { /* doesn't fit into fixnum? */
     if(*eptr == '\0' || !C_strncmp(eptr, ".0", C_strlen(eptr))) {
       *flo = (double)n;
       return 2;
@@ -11265,7 +11265,7 @@ bignum_to_str_2(C_word c, C_word self, C_word string)
         assert(index >= buf);
 	radix_digit = big_digit;
         big_digit = *scan++;
-	radix_digit |= (big_digit << big_digit_len) & radix_mask;
+	radix_digit |= ((unsigned int)big_digit << big_digit_len) & radix_mask;
 	big_digit >>= (radix_shift - big_digit_len);
         big_digit_len = C_BIGNUM_DIGIT_LENGTH - big_digit_len;
       }
@@ -12413,10 +12413,10 @@ static C_regparm C_word C_fcall decode_literal2(C_word **ptr, C_char **str,
       return (C_word)(*(*str - 1));
 
     case C_FIXNUM_BIT:
-      val = *((*str)++) << 24; /* always big endian */
-      val |= (*((*str)++) & 0xff) << 16;
-      val |= (*((*str)++) & 0xff) << 8;
-      val |= (*((*str)++) & 0xff);
+      val = (C_uword)*((*str)++) << 24; /* always big endian */
+      val |= ((C_uword)*((*str)++) & 0xff) << 16;
+      val |= ((C_uword)*((*str)++) & 0xff) << 8;
+      val |= ((C_uword)*((*str)++) & 0xff);
       return C_fix(val); 
 
 #ifdef C_SIXTY_FOUR
-- 
2.1.4

From 32bdbece9ff928f7d06e2e3d9c99f740b12689d4 Mon Sep 17 00:00:00 2001
From: Peter Bex <[email protected]>
Date: Sat, 20 Jun 2015 20:55:23 +0200
Subject: [PATCH] Avoid shifting signed values, which is undefined behaviour in
 C.

---
 chicken.h | 12 ++++++------
 runtime.c | 18 +++++++++---------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/chicken.h b/chicken.h
index 8f8d82e..66c276a 100644
--- a/chicken.h
+++ b/chicken.h
@@ -864,7 +864,7 @@ DECL_C_PROC_p0 (128,  1,0,0,0,0,0,0,0)
 /* This is word-size dependent: */
 #ifdef C_SIXTY_FOUR
 # define C_align(n)                C_align8(n)
-# define C_wordstobytes(n)         ((n) << 3)
+# define C_wordstobytes(n)         ((C_uword)(n) << 3)
 # define C_bytestowords(n)         (((n) + 7) >> 3)
 # define C_wordsperdouble(n)       (n)
 # define C_WORD_MIN                LONG_MIN
@@ -872,9 +872,9 @@ DECL_C_PROC_p0 (128,  1,0,0,0,0,0,0,0)
 # define C_UWORD_MAX               ULONG_MAX
 #else
 # define C_align(n)                C_align4(n)
-# define C_wordstobytes(n)         ((n) << 2)
+# define C_wordstobytes(n)         ((C_uword)(n) << 2)
 # define C_bytestowords(n)         (((n) + 3) >> 2)
-# define C_wordsperdouble(n)       ((n) << 1)
+# define C_wordsperdouble(n)       ((C_uword)(n) << 1)
 # define C_WORD_MIN                INT_MIN
 # define C_WORD_MAX                INT_MAX
 # define C_UWORD_MAX               UINT_MAX
@@ -1086,7 +1086,7 @@ DECL_C_PROC_p0 (128,  1,0,0,0,0,0,0,0)
 #endif
 #define C_stack_pointer_test       ((C_word *)C_alloca(1))
 #define C_demand_2(n)              (((C_word *)C_fromspace_top + (n)) < (C_word *)C_fromspace_limit)
-#define C_fix(n)                   (((C_word)(n) << C_FIXNUM_SHIFT) | C_FIXNUM_BIT)
+#define C_fix(n)                   ((C_word)((C_uword)(n) << C_FIXNUM_SHIFT) | C_FIXNUM_BIT)
 #define C_unfix(x)                 C_CHECKp(x,C_fixnump(C_VAL1(x)),((C_VAL1(x)) >> C_FIXNUM_SHIFT))
 #define C_make_character(c)        (((((C_uword)(c)) & C_CHAR_BIT_MASK) << C_CHAR_SHIFT) | C_CHARACTER_BITS)
 #define C_character_code(x)        C_CHECKp(x,C_charp(C_VAL1(x)),((C_word)(C_VAL1(x)) >> C_CHAR_SHIFT) & C_CHAR_BIT_MASK)
@@ -1102,7 +1102,7 @@ DECL_C_PROC_p0 (128,  1,0,0,0,0,0,0,0)
 #define C_mk_nbool(x)              ((x) ? C_SCHEME_FALSE : C_SCHEME_TRUE)
 #define C_port_file(p)             C_CHECKp(p,C_portp(C_VAL1(p)),(C_FILEPTR)C_block_item(C_VAL1(p), 0))
 #define C_data_pointer(b)          C_CHECKp(b,C_blockp((C_word)C_VAL1(b)),(void *)(((C_SCHEME_BLOCK *)(C_VAL1(b)))->data))
-#define C_fitsinfixnump(n)         (((n) & C_INT_SIGN_BIT) == (((n) & C_INT_TOP_BIT) << 1))
+#define C_fitsinfixnump(n)         (((n) & C_INT_SIGN_BIT) == ((C_uword)((n) & C_INT_TOP_BIT) << 1))
 #define C_ufitsinfixnump(n)        (((n) & (C_INT_SIGN_BIT | (C_INT_SIGN_BIT >> 1))) == 0)
 #define C_quickflonumtruncate(n)   (C_fix((C_word)C_flonum_magnitude(n)))
 #define C_and(x, y)                (C_truep(x) ? (y) : C_SCHEME_FALSE)
@@ -1220,7 +1220,7 @@ DECL_C_PROC_p0 (128,  1,0,0,0,0,0,0,0)
 #define C_fixnum_or(n1, n2)             (C_u_fixnum_or(n1, n2) | C_FIXNUM_BIT)
 #define C_fixnum_xor(n1, n2)            (((n1) ^ (n2)) | C_FIXNUM_BIT)
 #define C_fixnum_not(n)                 ((~(n)) | C_FIXNUM_BIT)
-#define C_fixnum_shift_left(n1, n2)     (C_fix(C_unfix(n1) << C_unfix(n2)))
+#define C_fixnum_shift_left(n1, n2)     (C_fix(((C_uword)C_unfix(n1) << (C_uword)C_unfix(n2))))
 #define C_fixnum_shift_right(n1, n2)    (((n1) >> C_unfix(n2)) | C_FIXNUM_BIT)
 #define C_u_fixnum_negate(n)            (-(n) + 2 * C_FIXNUM_BIT)
 #define C_fixnum_negate(n)              (C_u_fixnum_negate(n) | C_FIXNUM_BIT)
diff --git a/runtime.c b/runtime.c
index 75ca654..f592f93 100644
--- a/runtime.c
+++ b/runtime.c
@@ -218,7 +218,7 @@ extern void _C_do_apply_hack(void *proc, C_word *args, int count) C_noret;
 
 #define is_fptr(x)                   (((x) & C_GC_FORWARDING_BIT) != 0)
 #define ptr_to_fptr(x)               ((((x) >> FORWARDING_BIT_SHIFT) & 1) | C_GC_FORWARDING_BIT | ((x) & ~1))
-#define fptr_to_ptr(x)               (((x) << FORWARDING_BIT_SHIFT) | ((x) & ~(C_GC_FORWARDING_BIT | 1)))
+#define fptr_to_ptr(x)               (((C_uword)(x) << FORWARDING_BIT_SHIFT) | ((x) & ~(C_GC_FORWARDING_BIT | 1)))
 
 #define C_check_flonum(x, w)        if(C_immediatep(x) || C_block_header(x) != C_FLONUM_TAG) \
                                        barf(C_BAD_ARGUMENT_TYPE_NO_FLONUM_ERROR, w, x);
@@ -5316,7 +5316,7 @@ C_regparm C_word C_fcall C_a_i_arithmetic_shift(C_word **a, int c, C_word n1, C_
 
   if(sgn < 0) {
     if(s < 0) nn >>= -s;
-    else nn <<= s;
+    else nn = (C_word)((C_uword)nn << s);
 
     if(C_fitsinfixnump(nn)) return C_fix(nn);
     else return C_flonum(a, nn);
@@ -7633,7 +7633,7 @@ C_regparm C_word C_fcall convert_string_to_number(C_char *str, int radix, C_word
 
     return 0;
   }
-  else if((n & C_INT_SIGN_BIT) != ((n << 1) & C_INT_SIGN_BIT)) { /* doesn't fit into fixnum? */
+  else if((n & C_INT_SIGN_BIT) != (((C_uword)n << 1) & C_INT_SIGN_BIT)) { /* doesn't fit into fixnum? */
     if(*eptr == '\0' || !C_strncmp(eptr, ".0", C_strlen(eptr))) {
       *flo = (double)n;
       return 2;
@@ -7943,7 +7943,7 @@ void C_ccall C_peek_signed_integer(C_word c, C_word closure, C_word k, C_word v,
   C_word x = C_block_item(v, C_unfix(index));
   C_alloc_flonum;
 
-  if((x & C_INT_SIGN_BIT) != ((x << 1) & C_INT_SIGN_BIT)) {
+  if((x & C_INT_SIGN_BIT) != (((C_uword)x << 1) & C_INT_SIGN_BIT)) {
     C_kontinue_flonum(k, (double)x);
   }
 
@@ -7956,7 +7956,7 @@ void C_ccall C_peek_unsigned_integer(C_word c, C_word closure, C_word k, C_word
   C_word x = C_block_item(v, C_unfix(index));
   C_alloc_flonum;
 
-  if((x & C_INT_SIGN_BIT) || ((x << 1) & C_INT_SIGN_BIT)) {
+  if((x & C_INT_SIGN_BIT) || (((C_uword)x << 1) & C_INT_SIGN_BIT)) {
     C_kontinue_flonum(k, (double)(C_uword)x);
   }
 
@@ -8845,10 +8845,10 @@ static C_regparm C_word C_fcall decode_literal2(C_word **ptr, C_char **str,
       return (C_word)(*(*str - 1));
 
     case C_FIXNUM_BIT:
-      val = *((*str)++) << 24; /* always big endian */
-      val |= (*((*str)++) & 0xff) << 16;
-      val |= (*((*str)++) & 0xff) << 8;
-      val |= (*((*str)++) & 0xff);
+      val = (C_uword)*((*str)++) << 24; /* always big endian */
+      val |= ((C_uword)*((*str)++) & 0xff) << 16;
+      val |= ((C_uword)*((*str)++) & 0xff) << 8;
+      val |= ((C_uword)*((*str)++) & 0xff);
       return C_fix(val); 
 
 #ifdef C_SIXTY_FOUR
-- 
2.1.4

signature.asc
Description: Digital signature

_______________________________________________
Chicken-hackers mailing list
[email protected]
https://lists.nongnu.org/mailman/listinfo/chicken-hackers

[Chicken-hackers] [PATCH] Fix undefined behaviour of shifting left signed values

Reply via email to