IMPALA-5031: undefined behavior: codegen signed overflow

This patch changes the way signed integer arithmetic is performed in
generated code. It uses unsigned arithmetic instead, because overflow
causes undefined behavior according to the standard.

Compiling with -full_ubsan, the following types of errors are avoided
by this patch:

    exprs/operators-ir.cc:167:803: runtime error: signed integer
    overflow: -9223372036854775807 + -9223372036854775807 cannot be
    represented in type 'long'

    exprs/operators-ir.cc:168:823: runtime error: signed integer
    overflow: -9223372036854775807 - 9223372036854775807 cannot be
    represented in type 'long'

    exprs/operators-ir.cc:169:823: runtime error: signed integer
    overflow: -9223372036854775807 * -9223372036854775807 cannot be
    represented in type 'long'

This is visible when running be/build/latest/exprs/expr-test
--gtest_filter=ExprTest.ArithmeticExprs

To check for performance regressions, I ran TPCH and TPCDS at scale
factor 20 with three minicluster nodes and observed no performance
changes. I also checked the LLVM IR to see that the generated code
after this commit is identical to the generated code at the previous
commit, except that the new code doesn't use the 'nsw' modifier. That
modifier normally indicates that signed wrap does not occur; in this
case the operation happens on unsigned representation of the same
bits, that modifier is no longer relevant.

Change-Id: I79ec3a5ed974709e5e47be6b074d39ee89461f7f
Reviewed-on: http://gerrit.cloudera.org:8080/11406
Reviewed-by: Jim Apple <jbapple-imp...@apache.org>
Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/b727bb03
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/b727bb03
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/b727bb03

Branch: refs/heads/master
Commit: b727bb03fda35e4c955277a55e81bf57bfb99690
Parents: e563a8d
Author: Jim Apple <jbapple-imp...@apache.org>
Authored: Fri Sep 7 16:57:32 2018 -0700
Committer: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
Committed: Thu Sep 20 21:38:21 2018 +0000

----------------------------------------------------------------------
 be/src/exprs/operators-ir.cc | 47 ++++++++++++++++++++++++++------
 be/src/util/bit-util.h       | 56 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/b727bb03/be/src/exprs/operators-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/operators-ir.cc b/be/src/exprs/operators-ir.cc
index 6ef41a8..a95cd7f 100644
--- a/be/src/exprs/operators-ir.cc
+++ b/be/src/exprs/operators-ir.cc
@@ -23,6 +23,7 @@
 #include "gutil/strings/substitute.h"
 #include "runtime/string-value.inline.h"
 #include "runtime/timestamp-value.h"
+#include "util/bit-util.h"
 
 #include "common/names.h"
 
@@ -33,6 +34,30 @@
     return TYPE(v1.val OP v2.val);\
   }
 
+// Operations on signed integers that overflow are undefined in C++. From the 
standard:
+// [expr] "If during the evaluation of an expression, the result is not 
mathematically
+// defined or not in the range of representable values for its type, the 
behavior is
+// undefined."
+//
+// Unsigned integers do not suffer the same fate: [basic.fundamental] 
"unsigned arithmetic
+// does not overflow because a result that cannot be represented by the 
resulting unsigned
+// integer type is reduced modulo the number that is one greater than the 
largest value
+// that can be represented by the resulting unsigned integer type."
+//
+// If we take the bits in two signed values and treat them as unsigned values, 
the result
+// of multiplication, addition, and subtraction are identical to the version 
of the signed
+// operation on a two's complement machine in which overflowing is not 
undefined, but
+// wraps.
+#define BINARY_OP_AS_UNSIGNED_FN(NAME, TYPE, OP)                               
        \
+  TYPE Operators::NAME##_##TYPE##_##TYPE(                                      
        \
+      FunctionContext* c, const TYPE& v1, const TYPE& v2) {                    
        \
+    if (v1.is_null || v2.is_null) return TYPE::null();                         
        \
+    using UNSIGNED = UnsignedType<decltype(TYPE::val)>;                        
        \
+    const UNSIGNED u1 = BitUtil::ToUnsigned(v1.val), u2 = 
BitUtil::ToUnsigned(v2.val); \
+    const UNSIGNED u0 = u1 OP u2;                                              
        \
+    return TYPE(u0);                                                           
        \
+  }
+
 #define BINARY_OP_CHECK_ZERO_FN(NAME, TYPE, OP) \
   TYPE Operators::NAME##_##TYPE##_##TYPE(\
       FunctionContext* c, const TYPE& v1, const TYPE& v2) {\
@@ -118,11 +143,13 @@
     BINARY_PREDICATE_CHAR_NONNULL(OP, v1, v2);\
   }
 
-#define BINARY_OP_NUMERIC_TYPES(NAME, OP) \
-  BINARY_OP_FN(NAME, TinyIntVal, OP); \
-  BINARY_OP_FN(NAME, SmallIntVal, OP);\
-  BINARY_OP_FN(NAME, IntVal, OP);\
-  BINARY_OP_FN(NAME, BigIntVal, OP);\
+#define BINARY_OP_AS_UNSIGNED_TYPES(NAME, OP) \
+  BINARY_OP_AS_UNSIGNED_FN(NAME, TinyIntVal, OP); \
+  BINARY_OP_AS_UNSIGNED_FN(NAME, SmallIntVal, OP);\
+  BINARY_OP_AS_UNSIGNED_FN(NAME, IntVal, OP);\
+  BINARY_OP_AS_UNSIGNED_FN(NAME, BigIntVal, OP);
+
+#define BINARY_OP_FLOAT_TYPES(NAME, OP) \
   BINARY_OP_FN(NAME, FloatVal, OP);\
   BINARY_OP_FN(NAME, DoubleVal, OP);
 
@@ -164,9 +191,13 @@
 
 namespace impala {
 
-BINARY_OP_NUMERIC_TYPES(Add, +);
-BINARY_OP_NUMERIC_TYPES(Subtract, -);
-BINARY_OP_NUMERIC_TYPES(Multiply, *);
+BINARY_OP_AS_UNSIGNED_TYPES(Add, +);
+BINARY_OP_AS_UNSIGNED_TYPES(Subtract, -);
+BINARY_OP_AS_UNSIGNED_TYPES(Multiply, *);
+
+BINARY_OP_FLOAT_TYPES(Add, +);
+BINARY_OP_FLOAT_TYPES(Subtract, -);
+BINARY_OP_FLOAT_TYPES(Multiply, *);
 
 BINARY_OP_FN(Divide, DoubleVal, /);
 

http://git-wip-us.apache.org/repos/asf/impala/blob/b727bb03/be/src/util/bit-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h
index d07dd9d..61b72b2 100644
--- a/be/src/util/bit-util.h
+++ b/be/src/util/bit-util.h
@@ -52,6 +52,20 @@ struct MakeUnsigned<int128_t> {
 template <typename T>
 using UnsignedType = typename MakeUnsigned<T>::type;
 
+/// Nested 'type' corresponds to the signed version of T.
+template <typename T>
+struct MakeSigned {
+  using type = std::make_signed_t<T>;
+};
+
+template <>
+struct MakeSigned<__uint128_t> {
+  using type = __int128_t;
+};
+
+template <typename T>
+using SignedType = typename MakeSigned<T>::type;
+
 // Doubles the width of integer types (e.g. int32_t -> int64_t).
 // Currently only works with a few signed types.
 // Feel free to extend it to other types as well.
@@ -406,8 +420,50 @@ class BitUtil {
       return floor + 1;
     }
   }
+
+  // There are times when it is useful to treat the bits in a signed integer 
as if they
+  // represent an unsigned integer, or vice versa. This is known as "type 
punning".
+  //
+  // For type punning signed values to unsigned values we can use the 
assurance in the
+  // standard's [conv.integral] to convert by simply returning the value: "A 
prvalue of an
+  // integer type can be converted to a prvalue of another integer type. ... 
If the
+  // destination type is unsigned, the resulting value is the least unsigned 
integer
+  // congruent to the source integer (modulo 2n where n is the number of bits 
used to
+  // represent the unsigned type). [Note: In a two's complement 
representation, this
+  // conversion is conceptual and there is no change in the bit pattern (if 
there is no
+  // truncation).]"
+  //
+  // For the other direction, the conversion is implementation-defined: "If the
+  // destination type is signed, the value is unchanged if it can be 
represented in the
+  // destination type (and bit-field width); otherwise, the value is
+  // implementation-defined."
+  //
+  // In GCC, the docs promise that when "[t]he result of, or the signal raised 
by,
+  // converting an integer to a signed integer type when the value cannot be 
represented
+  // in an object of that type ... For conversion to a type of width N, the 
value is
+  // reduced modulo 2^N to be within range of the type". As such, the same 
method of
+  // converting by simply returning works.
+  //
+  // Note that Clang does not document its implementation-defined behavior,
+  // https://bugs.llvm.org/show_bug.cgi?id=11272, so the static_asserts below 
are
+  // important
+  template <typename T>
+  constexpr static inline SignedType<T> ToSigned(T x) {
+    return x;
+  }
+  template <typename T>
+  constexpr static inline UnsignedType<T> ToUnsigned(T x) {
+    return x;
+  }
 };
 
+static_assert(BitUtil::ToSigned<uint16_t>(0xffff) == -1
+        && BitUtil::ToSigned<uint16_t>(0x8000) == -0x8000,
+    "ToSigned is not a two's complement no-op");
+static_assert(BitUtil::ToUnsigned<int16_t>(-1) == 0xffff
+        && BitUtil::ToUnsigned<int16_t>(-0x8000) == 0x8000,
+    "ToUnsigned is not a two's complement no-op");
+
 template<>
 inline int256_t BitUtil::Sign(int256_t value) {
   return value < 0 ? -1 : 1;

Reply via email to