IMPALA-5031: undefined behavior: codegen signed overflow This patch changes the way signed integer arithmetic is performed in generated code. It uses unsigned arithmetic instead, because overflow causes undefined behavior according to the standard.
Compiling with -full_ubsan, the following types of errors are avoided by this patch: exprs/operators-ir.cc:167:803: runtime error: signed integer overflow: -9223372036854775807 + -9223372036854775807 cannot be represented in type 'long' exprs/operators-ir.cc:168:823: runtime error: signed integer overflow: -9223372036854775807 - 9223372036854775807 cannot be represented in type 'long' exprs/operators-ir.cc:169:823: runtime error: signed integer overflow: -9223372036854775807 * -9223372036854775807 cannot be represented in type 'long' This is visible when running be/build/latest/exprs/expr-test --gtest_filter=ExprTest.ArithmeticExprs To check for performance regressions, I ran TPCH and TPCDS at scale factor 20 with three minicluster nodes and observed no performance changes. I also checked the LLVM IR to see that the generated code after this commit is identical to the generated code at the previous commit, except that the new code doesn't use the 'nsw' modifier. That modifier normally indicates that signed wrap does not occur; in this case the operation happens on unsigned representation of the same bits, that modifier is no longer relevant. Change-Id: I79ec3a5ed974709e5e47be6b074d39ee89461f7f Reviewed-on: http://gerrit.cloudera.org:8080/11406 Reviewed-by: Jim Apple <jbapple-imp...@apache.org> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/b727bb03 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/b727bb03 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/b727bb03 Branch: refs/heads/master Commit: b727bb03fda35e4c955277a55e81bf57bfb99690 Parents: e563a8d Author: Jim Apple <jbapple-imp...@apache.org> Authored: Fri Sep 7 16:57:32 2018 -0700 Committer: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Committed: Thu Sep 20 21:38:21 2018 +0000 ---------------------------------------------------------------------- be/src/exprs/operators-ir.cc | 47 ++++++++++++++++++++++++++------ be/src/util/bit-util.h | 56 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/b727bb03/be/src/exprs/operators-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/operators-ir.cc b/be/src/exprs/operators-ir.cc index 6ef41a8..a95cd7f 100644 --- a/be/src/exprs/operators-ir.cc +++ b/be/src/exprs/operators-ir.cc @@ -23,6 +23,7 @@ #include "gutil/strings/substitute.h" #include "runtime/string-value.inline.h" #include "runtime/timestamp-value.h" +#include "util/bit-util.h" #include "common/names.h" @@ -33,6 +34,30 @@ return TYPE(v1.val OP v2.val);\ } +// Operations on signed integers that overflow are undefined in C++. From the standard: +// [expr] "If during the evaluation of an expression, the result is not mathematically +// defined or not in the range of representable values for its type, the behavior is +// undefined." +// +// Unsigned integers do not suffer the same fate: [basic.fundamental] "unsigned arithmetic +// does not overflow because a result that cannot be represented by the resulting unsigned +// integer type is reduced modulo the number that is one greater than the largest value +// that can be represented by the resulting unsigned integer type." +// +// If we take the bits in two signed values and treat them as unsigned values, the result +// of multiplication, addition, and subtraction are identical to the version of the signed +// operation on a two's complement machine in which overflowing is not undefined, but +// wraps. +#define BINARY_OP_AS_UNSIGNED_FN(NAME, TYPE, OP) \ + TYPE Operators::NAME##_##TYPE##_##TYPE( \ + FunctionContext* c, const TYPE& v1, const TYPE& v2) { \ + if (v1.is_null || v2.is_null) return TYPE::null(); \ + using UNSIGNED = UnsignedType<decltype(TYPE::val)>; \ + const UNSIGNED u1 = BitUtil::ToUnsigned(v1.val), u2 = BitUtil::ToUnsigned(v2.val); \ + const UNSIGNED u0 = u1 OP u2; \ + return TYPE(u0); \ + } + #define BINARY_OP_CHECK_ZERO_FN(NAME, TYPE, OP) \ TYPE Operators::NAME##_##TYPE##_##TYPE(\ FunctionContext* c, const TYPE& v1, const TYPE& v2) {\ @@ -118,11 +143,13 @@ BINARY_PREDICATE_CHAR_NONNULL(OP, v1, v2);\ } -#define BINARY_OP_NUMERIC_TYPES(NAME, OP) \ - BINARY_OP_FN(NAME, TinyIntVal, OP); \ - BINARY_OP_FN(NAME, SmallIntVal, OP);\ - BINARY_OP_FN(NAME, IntVal, OP);\ - BINARY_OP_FN(NAME, BigIntVal, OP);\ +#define BINARY_OP_AS_UNSIGNED_TYPES(NAME, OP) \ + BINARY_OP_AS_UNSIGNED_FN(NAME, TinyIntVal, OP); \ + BINARY_OP_AS_UNSIGNED_FN(NAME, SmallIntVal, OP);\ + BINARY_OP_AS_UNSIGNED_FN(NAME, IntVal, OP);\ + BINARY_OP_AS_UNSIGNED_FN(NAME, BigIntVal, OP); + +#define BINARY_OP_FLOAT_TYPES(NAME, OP) \ BINARY_OP_FN(NAME, FloatVal, OP);\ BINARY_OP_FN(NAME, DoubleVal, OP); @@ -164,9 +191,13 @@ namespace impala { -BINARY_OP_NUMERIC_TYPES(Add, +); -BINARY_OP_NUMERIC_TYPES(Subtract, -); -BINARY_OP_NUMERIC_TYPES(Multiply, *); +BINARY_OP_AS_UNSIGNED_TYPES(Add, +); +BINARY_OP_AS_UNSIGNED_TYPES(Subtract, -); +BINARY_OP_AS_UNSIGNED_TYPES(Multiply, *); + +BINARY_OP_FLOAT_TYPES(Add, +); +BINARY_OP_FLOAT_TYPES(Subtract, -); +BINARY_OP_FLOAT_TYPES(Multiply, *); BINARY_OP_FN(Divide, DoubleVal, /); http://git-wip-us.apache.org/repos/asf/impala/blob/b727bb03/be/src/util/bit-util.h ---------------------------------------------------------------------- diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h index d07dd9d..61b72b2 100644 --- a/be/src/util/bit-util.h +++ b/be/src/util/bit-util.h @@ -52,6 +52,20 @@ struct MakeUnsigned<int128_t> { template <typename T> using UnsignedType = typename MakeUnsigned<T>::type; +/// Nested 'type' corresponds to the signed version of T. +template <typename T> +struct MakeSigned { + using type = std::make_signed_t<T>; +}; + +template <> +struct MakeSigned<__uint128_t> { + using type = __int128_t; +}; + +template <typename T> +using SignedType = typename MakeSigned<T>::type; + // Doubles the width of integer types (e.g. int32_t -> int64_t). // Currently only works with a few signed types. // Feel free to extend it to other types as well. @@ -406,8 +420,50 @@ class BitUtil { return floor + 1; } } + + // There are times when it is useful to treat the bits in a signed integer as if they + // represent an unsigned integer, or vice versa. This is known as "type punning". + // + // For type punning signed values to unsigned values we can use the assurance in the + // standard's [conv.integral] to convert by simply returning the value: "A prvalue of an + // integer type can be converted to a prvalue of another integer type. ... If the + // destination type is unsigned, the resulting value is the least unsigned integer + // congruent to the source integer (modulo 2n where n is the number of bits used to + // represent the unsigned type). [Note: In a two's complement representation, this + // conversion is conceptual and there is no change in the bit pattern (if there is no + // truncation).]" + // + // For the other direction, the conversion is implementation-defined: "If the + // destination type is signed, the value is unchanged if it can be represented in the + // destination type (and bit-field width); otherwise, the value is + // implementation-defined." + // + // In GCC, the docs promise that when "[t]he result of, or the signal raised by, + // converting an integer to a signed integer type when the value cannot be represented + // in an object of that type ... For conversion to a type of width N, the value is + // reduced modulo 2^N to be within range of the type". As such, the same method of + // converting by simply returning works. + // + // Note that Clang does not document its implementation-defined behavior, + // https://bugs.llvm.org/show_bug.cgi?id=11272, so the static_asserts below are + // important + template <typename T> + constexpr static inline SignedType<T> ToSigned(T x) { + return x; + } + template <typename T> + constexpr static inline UnsignedType<T> ToUnsigned(T x) { + return x; + } }; +static_assert(BitUtil::ToSigned<uint16_t>(0xffff) == -1 + && BitUtil::ToSigned<uint16_t>(0x8000) == -0x8000, + "ToSigned is not a two's complement no-op"); +static_assert(BitUtil::ToUnsigned<int16_t>(-1) == 0xffff + && BitUtil::ToUnsigned<int16_t>(-0x8000) == 0x8000, + "ToUnsigned is not a two's complement no-op"); + template<> inline int256_t BitUtil::Sign(int256_t value) { return value < 0 ? -1 : 1;