Fix undefined calls to __builtin_ctz. GCC's __builtin_ctz[l[l]] functions return undefined results when the argument is 0. This patch handles that 0 case, which could otherwise produce results that depend on the architecture.
Change-Id: I8460bc3f7e510ce07b8673387c9440accc432abe Reviewed-on: http://gerrit.cloudera.org:8080/5004 Reviewed-by: Jim Apple <[email protected]> Reviewed-by: Dan Hecht <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6d8fd7e7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6d8fd7e7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6d8fd7e7 Branch: refs/heads/master Commit: 6d8fd7e79c44250ccd7fc88ade44b1bdd6ae2528 Parents: 352833b Author: Jim Apple <[email protected]> Authored: Mon Nov 7 09:36:41 2016 -0800 Committer: Impala Public Jenkins <[email protected]> Committed: Thu Dec 1 05:22:45 2016 +0000 ---------------------------------------------------------------------- be/src/exprs/aggregate-functions-ir.cc | 25 ++++++++++++------------- be/src/udf_samples/hyperloglog-uda.cc | 16 +++++++++------- be/src/util/bit-util.h | 25 ++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6d8fd7e7/be/src/exprs/aggregate-functions-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/aggregate-functions-ir.cc b/be/src/exprs/aggregate-functions-ir.cc index 090a905..63f06bf 100644 --- a/be/src/exprs/aggregate-functions-ir.cc +++ b/be/src/exprs/aggregate-functions-ir.cc @@ -780,8 +780,7 @@ void AggregateFunctions::PcUpdate(FunctionContext* c, const T& input, StringVal* // different seed). for (int i = 0; i < NUM_PC_BITMAPS; ++i) { uint32_t hash_value = AnyValUtil::Hash(input, *c->GetArgType(0), i); - int bit_index = __builtin_ctz(hash_value); - if (UNLIKELY(hash_value == 0)) bit_index = PC_BITMAP_LENGTH - 1; + const int bit_index = BitUtil::CountTrailingZeros(hash_value, PC_BITMAP_LENGTH - 1); // Set bitmap[i, bit_index] to 1 SetDistinctEstimateBit(dst->ptr, i, bit_index); } @@ -798,10 +797,10 @@ void AggregateFunctions::PcsaUpdate(FunctionContext* c, const T& input, StringVa uint32_t row_index = hash_value % NUM_PC_BITMAPS; // We want the zero-based position of the least significant 1-bit in binary - // representation of hash_value. __builtin_ctz does exactly this because it returns - // the number of trailing 0-bits in x (or undefined if x is zero). - int bit_index = __builtin_ctz(hash_value / NUM_PC_BITMAPS); - if (UNLIKELY(hash_value == 0)) bit_index = PC_BITMAP_LENGTH - 1; + // representation of hash_value. BitUtil::CountTrailingZeros(x,y) does exactly this + // because it returns the number of trailing 0-bits in x (or y if x is zero). + const int bit_index = + BitUtil::CountTrailingZeros(hash_value / NUM_PC_BITMAPS, PC_BITMAP_LENGTH - 1); // Set bitmap[row_index, bit_index] to 1 SetDistinctEstimateBit(dst->ptr, row_index, bit_index); @@ -1185,13 +1184,13 @@ void AggregateFunctions::HllUpdate(FunctionContext* ctx, const T& src, StringVal DCHECK_EQ(dst->len, HLL_LEN); uint64_t hash_value = AnyValUtil::Hash64(src, *ctx->GetArgType(0), HashUtil::FNV64_SEED); - if (hash_value != 0) { - // Use the lower bits to index into the number of streams and then - // find the first 1 bit after the index bits. - int idx = hash_value & (HLL_LEN - 1); - uint8_t first_one_bit = __builtin_ctzl(hash_value >> HLL_PRECISION) + 1; - dst->ptr[idx] = ::max(dst->ptr[idx], first_one_bit); - } + // Use the lower bits to index into the number of streams and then find the first 1 bit + // after the index bits. + int idx = hash_value & (HLL_LEN - 1); + const uint8_t first_one_bit = + 1 + BitUtil::CountTrailingZeros( + hash_value >> HLL_PRECISION, sizeof(hash_value) * CHAR_BIT - HLL_PRECISION); + dst->ptr[idx] = ::max(dst->ptr[idx], first_one_bit); } // Specialize for DecimalVal to allow substituting decimal size. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6d8fd7e7/be/src/udf_samples/hyperloglog-uda.cc ---------------------------------------------------------------------- diff --git a/be/src/udf_samples/hyperloglog-uda.cc b/be/src/udf_samples/hyperloglog-uda.cc index c149a0f..d6db712 100644 --- a/be/src/udf_samples/hyperloglog-uda.cc +++ b/be/src/udf_samples/hyperloglog-uda.cc @@ -16,6 +16,7 @@ // under the License. #include <assert.h> +#include <limits.h> #include <math.h> #include <algorithm> #include <sstream> @@ -71,13 +72,14 @@ void HllUpdate(FunctionContext* ctx, const IntVal& src, StringVal* dst) { assert(!dst->is_null); assert(dst->len == pow(2, HLL_PRECISION)); uint64_t hash_value = Hash(src); - if (hash_value != 0) { - // Use the lower bits to index into the number of streams and then - // find the first 1 bit after the index bits. - int idx = hash_value % dst->len; - uint8_t first_one_bit = __builtin_ctzl(hash_value >> HLL_PRECISION) + 1; - dst->ptr[idx] = ::max(dst->ptr[idx], first_one_bit); - } + // Use the lower bits to index into the number of streams and then find the first 1 bit + // after the index bits. + int idx = hash_value % dst->len; + const uint64_t hash_top_bits = hash_value >> HLL_PRECISION; + uint8_t first_one_bit = + 1 + ((hash_top_bits != 0) ? __builtin_ctzll(hash_top_bits) : + (sizeof(hash_value) * CHAR_BIT - HLL_PRECISION)); + dst->ptr[idx] = ::max(dst->ptr[idx], first_one_bit); } void HllMerge(FunctionContext* ctx, const StringVal& src, StringVal* dst) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6d8fd7e7/be/src/util/bit-util.h ---------------------------------------------------------------------- diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h index deb16a9..bc9bcaf 100644 --- a/be/src/util/bit-util.h +++ b/be/src/util/bit-util.h @@ -25,9 +25,12 @@ #include <endian.h> #endif -#include <boost/type_traits/make_unsigned.hpp> +#include <climits> + #include <limits> +#include <boost/type_traits/make_unsigned.hpp> + #include "common/compiler-util.h" #include "util/cpu-info.h" #include "util/sse-util.h" @@ -239,6 +242,26 @@ class BitUtil { constexpr static T UnsetBit(T v, int bitpos) { return v & ~(static_cast<T>(0x1) << bitpos); } + + /// Wrappers around __builtin_ctz, which returns an undefined value when the argument is + /// zero. + static inline int CountTrailingZeros( + unsigned int v, int otherwise = sizeof(unsigned int) * CHAR_BIT) { + if (UNLIKELY(v == 0)) return otherwise; + return __builtin_ctz(v); + } + + static inline int CountTrailingZeros( + unsigned long v, int otherwise = sizeof(unsigned long) * CHAR_BIT) { + if (UNLIKELY(v == 0)) return otherwise; + return __builtin_ctzl(v); + } + + static inline int CountTrailingZeros( + unsigned long long v, int otherwise = sizeof(unsigned long long) * CHAR_BIT) { + if (UNLIKELY(v == 0)) return otherwise; + return __builtin_ctzll(v); + } }; /// An encapsulation class of SIMD byteswap functions
