This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit e54c636385282a7f1453d022adc932438a49b032 Author: Daniel Becker <daniel.bec...@cloudera.com> AuthorDate: Thu Apr 27 15:08:12 2023 +0200 IMPALA-12035: Impala accepts very big numbers but fails to store them correctly The statement select cast(9999999999999999999999 as BIGINT) correctly fails with the following message: ERROR: UDF ERROR: Decimal expression overflowed However, if the number is much bigger, no error is produced: select cast(999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 as BIGINT) The root cause of the problem is this: - In the first case, the literal (9999999999999999999999) can fit into a DECIMAL and therefore is interpreted as such. This DECIMAL can't fit into a BIGINT, and this is checked, so the cast fails. - In the second case, the literal can't fit in a DECIMAL, so it is interpreted as a DOUBLE instead. This is not unreasonable as it fits in the range of DOUBLE, although of course there may be loss of precision. However, there is no check when casting from DOUBLE to BIGINT, and because the value is out of range for BIGINT this invokes undefined behaviour (probably leading to an incorrect value). This change adds checks to casts - from floating point types to integer types - from double to float These casts can invoke undefined behaviour in C++ and were not checked before this change. If the checks fail, an ERROR is raised. Testing: - Added unit tests in expr-tests.cc for the new checks Change-Id: Ibd97dee3c793a5caa95b1fe5d22b27b8c0f8275d Reviewed-on: http://gerrit.cloudera.org:8080/19856 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- be/src/exprs/cast-functions-ir.cc | 154 +++++++++++++++++++ be/src/exprs/cast-functions.h | 7 + be/src/exprs/expr-test.cc | 168 ++++++++++++++++----- be/src/exprs/literal.cc | 18 ++- .../java/org/apache/impala/analysis/CastExpr.java | 21 ++- 5 files changed, 328 insertions(+), 40 deletions(-) diff --git a/be/src/exprs/cast-functions-ir.cc b/be/src/exprs/cast-functions-ir.cc index 0835ae5fc..84246ada5 100644 --- a/be/src/exprs/cast-functions-ir.cc +++ b/be/src/exprs/cast-functions-ir.cc @@ -18,8 +18,10 @@ #include "exprs/cast-functions.h" #include <cmath> +#include <limits> #include <sstream> #include <string> +#include <type_traits> #include <gutil/strings/numbers.h> #include <gutil/strings/substitute.h> @@ -55,9 +57,161 @@ const int MAX_TINYINT_CHARS = 4; // 1 0 const int MAX_BOOLEAN_CHARS = 1; +namespace { + +// This struct is used as a helper in the validation of casts. For a cast from 'FROM_TYPE' +// to 'TO_TYPE' it provides compile time constants about what type of conversion it is. +// These constants are used in the template specifications of the 'Validate()' function to +// group together the relevant cases. +template <class FROM_TYPE, class TO_TYPE> +struct ConversionType { + // std::is_integral includes 'bool' but 'bool' behaves differently from integers in + // conversions. + template <class T> + static constexpr bool is_non_bool_integer = + std::is_integral_v<T> && + !std::is_same_v<T, bool>; + + /////////////////////////////////// + // Conversions that cannot fail. // + /////////////////////////////////// + static constexpr bool same_type_conversion = std::is_same_v<FROM_TYPE, TO_TYPE>; + + static constexpr bool integer_to_integer_conversion = + is_non_bool_integer<FROM_TYPE> && + is_non_bool_integer<TO_TYPE>; + + static constexpr bool boolean_conversion = + std::is_same_v<FROM_TYPE, bool> || + std::is_same_v<TO_TYPE, bool>; + + // Integer to floating point cannot fail because a 32-bit float can represent the min + // and max value of a 64-bit integer. + static constexpr bool integer_to_floatingpoint_conversion = + is_non_bool_integer<FROM_TYPE> && + std::is_floating_point_v<TO_TYPE>; + + static constexpr bool float_to_double_conversion = + std::is_same_v<FROM_TYPE, float> && + std::is_same_v<TO_TYPE, double>; + + static constexpr bool conversion_always_succeeds = + same_type_conversion || + integer_to_integer_conversion || + boolean_conversion || + integer_to_floatingpoint_conversion || + float_to_double_conversion; + + //////////////////////////////// + // Conversions that can fail. // + //////////////////////////////// + static constexpr bool floatingpoint_to_integer_conversion = + std::is_floating_point_v<FROM_TYPE> && + is_non_bool_integer<TO_TYPE>; + + static constexpr bool double_to_float_conversion = + std::is_same_v<FROM_TYPE, double> && + std::is_same_v<TO_TYPE, float>; +}; + +template <class FROM_TYPE, class TO_TYPE, typename std::enable_if_t< + ConversionType<FROM_TYPE, TO_TYPE>::conversion_always_succeeds + >* = nullptr> +bool Validate(FROM_TYPE from, FunctionContext* ctx) { + return true; +} + +template <class FROM_TYPE, class TO_TYPE, typename std::enable_if_t< + ConversionType<FROM_TYPE, TO_TYPE>::floatingpoint_to_integer_conversion + >* = nullptr> +bool Validate(FROM_TYPE from, FunctionContext* ctx) { + // Rules for conversion from floating point types to integral types: + // 1. the fractional part is discarded + // 2. if the destination type can hold the integer part, that is used as the result, + // otherwise the behaviour is undefined. + // See https://en.cppreference.com/w/cpp/language/implicit_conversion + // + // We need to check whether the floating point number 'from' fits within the range of + // the integer type 'TO_TYPE'. We convert the max and min value of 'TO_TYPE' to + // 'FROM_TYPE'. This conversion may not be exact but in this case either the closest + // higher or the closest lower value is selected. This guarentees that if 'from' is + // within these values, it can be converted to 'TO_TYPE'. + // + // Note that we cannot allow equality in the general case because the maximal value of + // 'TO_TYPE' converted to floating point may be larger than the maximal value of + // 'TO_TYPE' because of inexact conversion (and similarly for the minimal value). + // + // A 'double' can exactly represent the maximal and minimal values of 32-bit and smaller + // integers, but not of 64-bit integers. For a 'float' 32-bit integers are too big but + // 16-bit and smaller integers are ok. + constexpr FROM_TYPE upper_limit = static_cast<FROM_TYPE>( + std::numeric_limits<TO_TYPE>::max()); + constexpr FROM_TYPE lower_limit = static_cast<FROM_TYPE>( + std::numeric_limits<TO_TYPE>::lowest()); + + constexpr int INT_SIZE_LIMIT = std::is_same_v<FROM_TYPE, double> ? 4 : 2; + bool is_ok = false; + if (sizeof(TO_TYPE) <= INT_SIZE_LIMIT) { + is_ok = lower_limit <= from && from <= upper_limit; + } else { + is_ok = lower_limit < from && from < upper_limit; + } + + if (UNLIKELY(!is_ok)) { + if (std::isnan(from)) { + ctx->SetError("NaN value cannot be converted to integer type."); + } else if (!std::isfinite(from)) { + ctx->SetError("Non-finite value cannot be converted to integer type."); + } else { + ctx->SetError("Out-of-range floating point value cannot be converted to " + "integer type."); + } + } + + return is_ok; +} + +template <class FROM_TYPE, class TO_TYPE, typename std::enable_if_t< + ConversionType<FROM_TYPE, TO_TYPE>::double_to_float_conversion + >* = nullptr> +bool Validate(FROM_TYPE from, FunctionContext* ctx) { + // Rules for conversion from floating point types to integral types: + // 1. If the source value can be represented exactly in the destination type, it does + // not change. + // 2. If the source value is between two representable values of the destination type, + // the result is one of those two values (it is implementation-defined which one). + // 3. Otherwise, the behavior is undefined. + // See https://en.cppreference.com/w/cpp/language/implicit_conversion + // + // The algorithm is similar to the case of floating point to integer conversion. We + // check whether the double value 'from' is within the range of float. The min and max + // values of float can be exactly represented as double, so we allow equality. + constexpr FROM_TYPE upper_limit = std::numeric_limits<TO_TYPE>::max(); + constexpr FROM_TYPE lower_limit = std::numeric_limits<TO_TYPE>::lowest(); + const bool in_range = lower_limit <= from && from <= upper_limit; + + // In-range values, NaNs and infinite values can be converted to float. + const bool is_ok = in_range || std::isnan(from) || !std::isfinite(from); + + if (UNLIKELY(!is_ok)) { + ctx->SetError( + "Out-of-range double value cannot be converted to float."); + } + + return is_ok; +} + +} // anonymous namespace + #define CAST_FUNCTION(from_type, to_type) \ to_type CastFunctions::CastTo##to_type(FunctionContext* ctx, const from_type& val) { \ if (val.is_null) return to_type::null(); \ + using from_underlying_type = decltype(from_type::val); \ + using to_underlying_type = decltype(to_type::val); \ + const bool valid = Validate<from_underlying_type, to_underlying_type>(val.val, ctx); \ + /* The query will be aborted because 'Validate()' sets an error but we have to return + * something so we return NULL. */\ + if (UNLIKELY(!valid)) return to_type::null(); \ return to_type(val.val); \ } diff --git a/be/src/exprs/cast-functions.h b/be/src/exprs/cast-functions.h index ea9bfa591..61389e00b 100644 --- a/be/src/exprs/cast-functions.h +++ b/be/src/exprs/cast-functions.h @@ -37,6 +37,13 @@ using impala_udf::TimestampVal; using impala_udf::StringVal; using impala_udf::DecimalVal; +/// Some of the numeric (number to number) conversion functions always succeed and have +/// some defined way of handling overflow (e.g. integer to integer conversions). Some of +/// them may fail, such as floating point to integer conversions as well as double to +/// float conversion. See https://en.cppreference.com/w/cpp/language/implicit_conversion. +/// If the conversion fails, an error is raised. +/// +/// Other conversion functions either raise an error or return NULL on failure. class CastFunctions { public: static BooleanVal CastToBooleanVal(FunctionContext* context, const TinyIntVal& val); diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index aa34332d3..a0be31492 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -691,6 +691,16 @@ class ExprTest : public testing::TestWithParam<std::tuple<bool, bool>> { EXPECT_TRUE(GetValue(expr, expr_type) == "NULL") << expr; } + template <class T> + void TestValueOrError(const string& expr, PrimitiveType expr_type, + const T& expected_result, bool expect_error) { + if (expect_error) { + TestError(expr); + } else { + TestValue(expr, expr_type, expected_result); + } + } + void TestIsNotNull(const string& expr, PrimitiveType expr_type) { return TestIsNotNull(expr, ColumnType(expr_type)); } @@ -1185,19 +1195,30 @@ class ExprTest : public testing::TestWithParam<std::tuple<bool, bool>> { void TestSingleLiteralConstruction( PrimitiveType type, const T& value, const string& string_val); - // Test casting stmt to all types. Expected result is val. + // Test casting stmt to all types. 'min_integer_size' is the byte size of the smallest + // signed integer expected to be able to hold the value. 'float_out_of_range' should be + // set to true if the value does not fit in a single precision float. The expected + // result is 'val' for the types that can hold the value and error for other types. template<typename T> - void TestCast(const string& stmt, T val, bool timestamp_out_of_range = false) { + void TestCast(const string& stmt, T val, int min_integer_size = 1, + bool float_out_of_range = false, bool timestamp_out_of_range = false) { TestValue("cast(" + stmt + " as boolean)", TYPE_BOOLEAN, static_cast<bool>(val)); - TestValue("cast(" + stmt + " as tinyint)", TYPE_TINYINT, static_cast<int8_t>(val)); - TestValue("cast(" + stmt + " as smallint)", TYPE_SMALLINT, static_cast<int16_t>(val)); - TestValue("cast(" + stmt + " as int)", TYPE_INT, static_cast<int32_t>(val)); - TestValue("cast(" + stmt + " as integer)", TYPE_INT, static_cast<int32_t>(val)); - TestValue("cast(" + stmt + " as bigint)", TYPE_BIGINT, static_cast<int64_t>(val)); - TestValue("cast(" + stmt + " as float)", TYPE_FLOAT, static_cast<float>(val)); TestValue("cast(" + stmt + " as double)", TYPE_DOUBLE, static_cast<double>(val)); TestValue("cast(" + stmt + " as real)", TYPE_DOUBLE, static_cast<double>(val)); TestStringValue("cast(" + stmt + " as string)", lexical_cast<string>(val)); + + TestValueOrError("cast(" + stmt + " as tinyint)", TYPE_TINYINT, + static_cast<int8_t>(val), min_integer_size > sizeof(int8_t)); + TestValueOrError("cast(" + stmt + " as smallint)", TYPE_SMALLINT, + static_cast<int16_t>(val), min_integer_size > sizeof(int16_t)); + TestValueOrError("cast(" + stmt + " as int)", TYPE_INT, + static_cast<int32_t>(val), min_integer_size > sizeof(int32_t)); + TestValueOrError("cast(" + stmt + " as integer)", TYPE_INT, + static_cast<int32_t>(val), min_integer_size > sizeof(int32_t)); + TestValueOrError("cast(" + stmt + " as bigint)", TYPE_BIGINT, + static_cast<int64_t>(val), min_integer_size > sizeof(int64_t)); + TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT, + static_cast<float>(val), float_out_of_range); if (!timestamp_out_of_range) { TestTimestampValue("cast(" + stmt + " as timestamp)", CreateTestTimestamp(val)); } else { @@ -1242,23 +1263,32 @@ TimestampValue ExprTest::CreateTestTimestamp(int64_t val) { return TimestampValue::FromUnixTime(val, UTCPTR); } -// Test casting 'stmt' to each of the native types. The result should be 'val' -// 'stmt' is a partial stmt that could be of any valid type. +// Test casting 'stmt' to each of the native types. See the general template definition +// for more information. template<> -void ExprTest::TestCast(const string& stmt, const char* val, - bool timestamp_out_of_range) { +void ExprTest::TestCast(const string& stmt, const char* val, int min_integer_size, + bool float_out_of_range, bool timestamp_out_of_range) { try { int8_t val8 = static_cast<int8_t>(lexical_cast<int16_t>(val)); #if 0 // Hive has weird semantics. What do we want to do? TestValue(stmt + " as boolean)", TYPE_BOOLEAN, lexical_cast<bool>(val)); #endif - TestValue("cast(" + stmt + " as tinyint)", TYPE_TINYINT, val8); - TestValue("cast(" + stmt + " as smallint)", TYPE_SMALLINT, lexical_cast<int16_t>(val)); - TestValue("cast(" + stmt + " as int)", TYPE_INT, lexical_cast<int32_t>(val)); - TestValue("cast(" + stmt + " as bigint)", TYPE_BIGINT, lexical_cast<int64_t>(val)); - TestValue("cast(" + stmt + " as float)", TYPE_FLOAT, lexical_cast<float>(val)); + TestValueOrError("cast(" + stmt + " as tinyint)", TYPE_TINYINT, + val8, min_integer_size > sizeof(int8_t)); + TestValueOrError("cast(" + stmt + " as smallint)", TYPE_SMALLINT, + lexical_cast<int16_t>(val), min_integer_size > sizeof(int16_t)); + TestValueOrError("cast(" + stmt + " as int)", TYPE_INT, + lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t)); + TestValueOrError("cast(" + stmt + " as integer)", TYPE_INT, + lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t)); + TestValueOrError("cast(" + stmt + " as bigint)", TYPE_BIGINT, + lexical_cast<int64_t>(val), min_integer_size > sizeof(int64_t)); + TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT, + lexical_cast<float>(val), float_out_of_range); + TestValue("cast(" + stmt + " as double)", TYPE_DOUBLE, lexical_cast<double>(val)); + TestValue("cast(" + stmt + " as real)", TYPE_DOUBLE, lexical_cast<double>(val)); TestStringValue("cast(" + stmt + " as string)", lexical_cast<string>(val)); } catch (bad_lexical_cast& e) { EXPECT_TRUE(false) << e.what(); @@ -3217,21 +3247,20 @@ TEST_P(ExprTest, CastExprs) { TestCast("cast(0.1234567890123 as float)", 0.1234567890123f); TestCast("cast(0.1234567890123 as float)", 0.123456791f); // same as above TestCast("cast(0.00000000001234567890123 as float)", 0.00000000001234567890123f); - TestCast("cast(123456 as float)", 123456.0f); + TestCast("cast(123456 as float)", 123456.0f, 4, false, false); // From http://en.wikipedia.org/wiki/Single-precision_floating-point_format // Min positive normal value TestCast("cast(1.1754944e-38 as float)", 1.1754944e-38f); // Max representable value - TestCast("cast(3.4028234e38 as float)", 3.4028234e38f, true); - + TestCast("cast(3.4028234e38 as float)", 3.4028234e38f, 32, false, true); // From Double TestCast("cast(0.0 as double)", 0.0); TestCast("cast(5.0 as double)", 5.0); TestCast("cast(-5.0 as double)", -5.0); - TestCast("cast(0.123e10 as double)", 0.123e10); - TestCast("cast(123.123e10 as double)", 123.123e10, true); + TestCast("cast(0.123e10 as double)", 0.123e10, 4, false, false); + TestCast("cast(123.123e10 as double)", 123.123e10, 8, false, true); TestCast("cast(1.01234567890123456789 as double)", 1.01234567890123456789); TestCast("cast(1.01234567890123456789 as double)", 1.0123456789012346); // same as above TestCast("cast(0.01234567890123456789 as double)", 0.01234567890123456789); @@ -3240,8 +3269,8 @@ TEST_P(ExprTest, CastExprs) { // casting string to double TestCast("cast('0.43149576573887316' as double)", 0.43149576573887316); TestCast("cast('-0.43149576573887316' as double)", -0.43149576573887316); - TestCast("cast('0.123e10' as double)", 0.123e10); - TestCast("cast('123.123e10' as double)", 123.123e10, true); + TestCast("cast('0.123e10' as double)", 0.123e10, 4, false, false); + TestCast("cast('123.123e10' as double)", 123.123e10, 8, false, true); TestCast("cast('1.01234567890123456789' as double)", 1.01234567890123456789); // From http://en.wikipedia.org/wiki/Double-precision_floating-point_format @@ -3255,8 +3284,10 @@ TEST_P(ExprTest, CastExprs) { TestCast("cast(2.2250738585072014e-308 as double)", 2.2250738585072014e-308); TestCast("cast('2.2250738585072014e-308' as double)", 2.2250738585072014e-308); // Max Double - TestCast("cast(1.7976931348623157e+308 as double)", 1.7976931348623157e308, true); - TestCast("cast('1.7976931348623157e+308' as double)", 1.7976931348623157e308, true); + TestCast("cast(1.7976931348623157e+308 as double)", 1.7976931348623157e308, + 128, true, true); + TestCast("cast('1.7976931348623157e+308' as double)", 1.7976931348623157e308, + 128, true, true); // From String TestCast("'0'", "0"); @@ -3517,6 +3548,81 @@ TEST_P(ExprTest, CastExprs) { #endif } +// Test overflow and boundaries in case of narrowing conversions: from floating point to +// integer types or from double to float. +TEST_P(ExprTest, CastFloatingPointNarrowing) { + // TINYINT + // Valid values on the boundary. + constexpr int64_t int8_max = std::numeric_limits<int8_t>::max(); + constexpr int64_t int8_min = std::numeric_limits<int8_t>::min(); + TestValue(Substitute("cast(cast($0 as float) as tinyint)", int8_max), + TYPE_TINYINT, int8_max); + TestValue(Substitute("cast(cast($0 as float) as tinyint)", -int8_max), + TYPE_TINYINT, -int8_max); + TestValue(Substitute("cast(cast($0 as float) as tinyint)", int8_min), + TYPE_TINYINT, int8_min); + + // Values outside the valid range. + TestError(Substitute("cast(cast($0 as float) as tinyint)", int8_max + 1)); + TestError(Substitute("cast(cast($0 as float) as tinyint)", int8_min - 1)); + + // SMALLINT + // Valid values on the boundary. + constexpr int64_t int16_max = std::numeric_limits<int16_t>::max(); + constexpr int64_t int16_min = std::numeric_limits<int16_t>::min(); + TestValue(Substitute("cast(cast($0 as float) as smallint)", int16_max), + TYPE_SMALLINT, int16_max); + TestValue(Substitute("cast(cast($0 as float) as smallint)", -int16_max), + TYPE_SMALLINT, -int16_max); + TestValue(Substitute("cast(cast($0 as float) as smallint)", int16_min), + TYPE_SMALLINT, int16_min); + + // Values outside the valid range. + TestError(Substitute("cast(cast($0 as float) as smallint)", int16_max + 1)); + TestError(Substitute("cast(cast($0 as float) as smallint)", int16_min - 1)); + + // INT + // Valid values on the boundary. + constexpr int64_t int32_max = std::numeric_limits<int32_t>::max(); + constexpr int64_t int32_min = std::numeric_limits<int32_t>::min(); + TestValue(Substitute("cast(cast($0 as double) as int)", int32_max), + TYPE_INT, int32_max); + TestValue(Substitute("cast(cast($0 as double) as int)", -int32_max), + TYPE_INT, -int32_max); + TestValue(Substitute("cast(cast($0 as double) as int)", int32_min), + TYPE_INT, int32_min); + + // Values outside the valid range. + TestError(Substitute("cast(cast($0 as double) as int)", int32_max + 1)); + TestError(Substitute("cast(cast($0 as double) as int)", int32_min - 1)); + + // BIGINT + // Values outside the valid range: the limit values of BIGINT cannot be represented + // exactly as DOUBLE, and casting them to DOUBLE produces a value that is outside the + // range of BIGINT. + constexpr int64_t int64_max = std::numeric_limits<int64_t>::max(); + constexpr int64_t int64_min = std::numeric_limits<int64_t>::min(); + TestError(Substitute("cast(cast($0 as double) as bigint)", int64_max)); + TestError(Substitute("cast(cast($0 as double) as bigint)", int64_min)); + + // DOUBLE to FLOAT + // Valid values on the boundary. + constexpr double float_max = std::numeric_limits<float>::max(); + constexpr double float_min = std::numeric_limits<float>::lowest(); + TestValue(Substitute("cast(cast($0 as double) as float)", float_max), + TYPE_FLOAT, float_max); + TestValue(Substitute("cast(cast($0 as double) as float)", float_min), + TYPE_FLOAT, float_min); + + // Values outside the valid range. + // 'float_max + 1' is not representable as a float or double. We find the next + // representable double that is greater than 'float_max'. + const double next_double = std::nextafter( + float_max, std::numeric_limits<double>::max()); + TestError(Substitute("cast(cast($0 as double) as float)", next_double)); + TestError(Substitute("cast(cast($0 as double) as float)", -next_double)); +} + // Test casting from/to Date. TEST_P(ExprTest, CastDateExprs) { // From Date to Date @@ -5978,14 +6084,8 @@ TEST_P(ExprTest, MathFunctions) { TestValue("abs(-32768)", TYPE_INT, 32768); TestValue("abs(32767)", TYPE_INT, 32767); TestValue("abs(32768)", TYPE_BIGINT, 32768); -#ifndef __aarch64__ - TestValue("abs(-1 * cast(pow(2, 31) as int))", TYPE_BIGINT, 2147483648); - TestValue("abs(cast(pow(2, 31) as int))", TYPE_BIGINT, 2147483648); -#else - TestValue("abs(-1 * cast(pow(2, 31) as int))", TYPE_BIGINT, 2147483647); - TestValue("abs(cast(pow(2, 31) as int))", TYPE_BIGINT, 2147483647); -#endif - TestValue("abs(2147483647)", TYPE_BIGINT, 2147483647); + TestError("abs(-1 * cast(pow(2, 31) as int))"); + TestError("abs(cast(pow(2, 31) as int))"); TestValue("abs(2147483647)", TYPE_BIGINT, 2147483647); TestValue("abs(-9223372036854775807)", TYPE_BIGINT, 9223372036854775807); TestValue("abs(9223372036854775807)", TYPE_BIGINT, 9223372036854775807); diff --git a/be/src/exprs/literal.cc b/be/src/exprs/literal.cc index 63efb967c..d6d4285d1 100644 --- a/be/src/exprs/literal.cc +++ b/be/src/exprs/literal.cc @@ -17,6 +17,8 @@ #include "literal.h" +#include <cmath> +#include <limits> #include <sstream> #include <boost/date_time/posix_time/posix_time_types.hpp> @@ -64,11 +66,23 @@ Literal::Literal(const TExprNode& node) DCHECK(node.__isset.int_literal); value_.bigint_val = node.int_literal.value; break; - case TYPE_FLOAT: + case TYPE_FLOAT: { DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL); DCHECK(node.__isset.float_literal); - value_.float_val = node.float_literal.value; + // 'node.float_literal.value' is actually a double. Casting from double to float + // invokes undefined behaviour if the value is out of range for float. + double literal_value = node.float_literal.value; + const bool out_of_range = literal_value < std::numeric_limits<float>::lowest() + || literal_value > std::numeric_limits<float>::max(); + if (UNLIKELY(out_of_range && std::isfinite(literal_value))) { + DCHECK(false) << "Value out of range for FLOAT."; + value_.float_val = literal_value > 0 ? std::numeric_limits<float>::infinity() + : -std::numeric_limits<float>::infinity(); + } else { + value_.float_val = literal_value; + } break; + } case TYPE_DOUBLE: DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL); DCHECK(node.__isset.float_literal); diff --git a/fe/src/main/java/org/apache/impala/analysis/CastExpr.java b/fe/src/main/java/org/apache/impala/analysis/CastExpr.java index 81b7e8b36..7e31da5ec 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CastExpr.java +++ b/fe/src/main/java/org/apache/impala/analysis/CastExpr.java @@ -335,10 +335,23 @@ public class CastExpr extends Expr { } if (children_.get(0) instanceof NumericLiteral && type_.isFloatingPointType()) { - // Special case casting a decimal literal to a floating point number. The - // decimal literal can be interpreted as either and we want to avoid casts - // since that can result in loss of accuracy. - ((NumericLiteral)children_.get(0)).explicitlyCastToFloat(type_); + // Special case casting a decimal literal to a floating point number. The decimal + // literal can be interpreted as either and we want to avoid casts since that can + // result in loss of accuracy. However, if 'type_' is FLOAT and the value does not + // fit in a FLOAT, we do not do an unchecked conversion + // ('NumericLiteral.explicitlyCastToFloat()') here but let the conversion fail in + // the BE. + NumericLiteral child = (NumericLiteral) children_.get(0); + final boolean isOverflow = NumericLiteral.isOverflow(child.getValue(), type_); + if (type_.isScalarType(PrimitiveType.FLOAT)) { + if (!isOverflow) { + ((NumericLiteral)children_.get(0)).explicitlyCastToFloat(type_); + } + } else { + Preconditions.checkState(type_.isScalarType(PrimitiveType.DOUBLE)); + Preconditions.checkState(!isOverflow); + ((NumericLiteral)children_.get(0)).explicitlyCastToFloat(type_); + } } if (children_.get(0).getType().isNull()) {