[impala] 01/03: IMPALA-12035: Impala accepts very big numbers but fails to store them correctly

joemcdonnell Thu, 11 May 2023 22:27:47 -0700

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit e54c636385282a7f1453d022adc932438a49b032
Author: Daniel Becker <daniel.bec...@cloudera.com>
AuthorDate: Thu Apr 27 15:08:12 2023 +0200

    IMPALA-12035: Impala accepts very big numbers but fails to store them 
correctly
    
    The statement
     select cast(9999999999999999999999 as BIGINT)
    correctly fails with the following message:
     ERROR: UDF ERROR: Decimal expression overflowed
    
    However, if the number is much bigger, no error is produced:
     select 
cast(999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
       as BIGINT)
    
    The root cause of the problem is this:
    - In the first case, the literal (9999999999999999999999) can fit into a
      DECIMAL and therefore is interpreted as such. This DECIMAL can't fit
      into a BIGINT, and this is checked, so the cast fails.
    
    - In the second case, the literal can't fit in a DECIMAL, so it is
      interpreted as a DOUBLE instead. This is not unreasonable as it fits
      in the range of DOUBLE, although of course there may be loss of
      precision. However, there is no check when casting from DOUBLE to
      BIGINT, and because the value is out of range for BIGINT this invokes
      undefined behaviour (probably leading to an incorrect value).
    
    This change adds checks to casts
     - from floating point types to integer types
     - from double to float
    
    These casts can invoke undefined behaviour in C++ and were not checked
    before this change.
    
    If the checks fail, an ERROR is raised.
    
    Testing:
     - Added unit tests in expr-tests.cc for the new checks
    
    Change-Id: Ibd97dee3c793a5caa95b1fe5d22b27b8c0f8275d
    Reviewed-on: http://gerrit.cloudera.org:8080/19856
    Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
---
 be/src/exprs/cast-functions-ir.cc                  | 154 +++++++++++++++++++
 be/src/exprs/cast-functions.h                      |   7 +
 be/src/exprs/expr-test.cc                          | 168 ++++++++++++++++-----
 be/src/exprs/literal.cc                            |  18 ++-
 .../java/org/apache/impala/analysis/CastExpr.java  |  21 ++-
 5 files changed, 328 insertions(+), 40 deletions(-)

diff --git a/be/src/exprs/cast-functions-ir.cc 
b/be/src/exprs/cast-functions-ir.cc
index 0835ae5fc..84246ada5 100644
--- a/be/src/exprs/cast-functions-ir.cc
+++ b/be/src/exprs/cast-functions-ir.cc
@@ -18,8 +18,10 @@
 #include "exprs/cast-functions.h"
 
 #include <cmath>
+#include <limits>
 #include <sstream>
 #include <string>
+#include <type_traits>
 
 #include <gutil/strings/numbers.h>
 #include <gutil/strings/substitute.h>
@@ -55,9 +57,161 @@ const int MAX_TINYINT_CHARS = 4;
 // 1 0
 const int MAX_BOOLEAN_CHARS = 1;
 
+namespace {
+
+// This struct is used as a helper in the validation of casts. For a cast from 
'FROM_TYPE'
+// to 'TO_TYPE' it provides compile time constants about what type of 
conversion it is.
+// These constants are used in the template specifications of the 'Validate()' 
function to
+// group together the relevant cases.
+template <class FROM_TYPE, class TO_TYPE>
+struct ConversionType {
+  // std::is_integral includes 'bool' but 'bool' behaves differently from 
integers in
+  // conversions.
+  template <class T>
+  static constexpr bool is_non_bool_integer =
+      std::is_integral_v<T> &&
+      !std::is_same_v<T, bool>;
+
+  ///////////////////////////////////
+  // Conversions that cannot fail. //
+  ///////////////////////////////////
+  static constexpr bool same_type_conversion = std::is_same_v<FROM_TYPE, 
TO_TYPE>;
+
+  static constexpr bool integer_to_integer_conversion =
+      is_non_bool_integer<FROM_TYPE> &&
+      is_non_bool_integer<TO_TYPE>;
+
+  static constexpr bool boolean_conversion =
+    std::is_same_v<FROM_TYPE, bool> ||
+    std::is_same_v<TO_TYPE, bool>;
+
+  // Integer to floating point cannot fail because a 32-bit float can 
represent the min
+  // and max value of a 64-bit integer.
+  static constexpr bool integer_to_floatingpoint_conversion =
+      is_non_bool_integer<FROM_TYPE> &&
+      std::is_floating_point_v<TO_TYPE>;
+
+  static constexpr bool float_to_double_conversion =
+      std::is_same_v<FROM_TYPE, float> &&
+      std::is_same_v<TO_TYPE, double>;
+
+  static constexpr bool conversion_always_succeeds =
+      same_type_conversion ||
+      integer_to_integer_conversion ||
+      boolean_conversion ||
+      integer_to_floatingpoint_conversion ||
+      float_to_double_conversion;
+
+  ////////////////////////////////
+  // Conversions that can fail. //
+  ////////////////////////////////
+  static constexpr bool floatingpoint_to_integer_conversion =
+      std::is_floating_point_v<FROM_TYPE> &&
+      is_non_bool_integer<TO_TYPE>;
+
+  static constexpr bool double_to_float_conversion =
+      std::is_same_v<FROM_TYPE, double> &&
+      std::is_same_v<TO_TYPE, float>;
+};
+
+template <class FROM_TYPE, class TO_TYPE, typename std::enable_if_t<
+    ConversionType<FROM_TYPE, TO_TYPE>::conversion_always_succeeds
+    >* = nullptr>
+bool Validate(FROM_TYPE from, FunctionContext* ctx) {
+  return true;
+}
+
+template <class FROM_TYPE, class TO_TYPE, typename std::enable_if_t<
+    ConversionType<FROM_TYPE, TO_TYPE>::floatingpoint_to_integer_conversion
+    >* = nullptr>
+bool Validate(FROM_TYPE from, FunctionContext* ctx) {
+  // Rules for conversion from floating point types to integral types:
+  //  1. the fractional part is discarded
+  //  2. if the destination type can hold the integer part, that is used as 
the result,
+  //     otherwise the behaviour is undefined.
+  // See https://en.cppreference.com/w/cpp/language/implicit_conversion
+  //
+  // We need to check whether the floating point number 'from' fits within the 
range of
+  // the integer type 'TO_TYPE'. We convert the max and min value of 'TO_TYPE' 
to
+  // 'FROM_TYPE'. This conversion may not be exact but in this case either the 
closest
+  // higher or the closest lower value is selected. This guarentees that if 
'from' is
+  // within these values, it can be converted to 'TO_TYPE'.
+  //
+  // Note that we cannot allow equality in the general case because the 
maximal value of
+  // 'TO_TYPE' converted to floating point may be larger than the maximal 
value of
+  // 'TO_TYPE' because of inexact conversion (and similarly for the minimal 
value).
+  //
+  // A 'double' can exactly represent the maximal and minimal values of 32-bit 
and smaller
+  // integers, but not of 64-bit integers. For a 'float' 32-bit integers are 
too big but
+  // 16-bit and smaller integers are ok.
+  constexpr FROM_TYPE upper_limit = static_cast<FROM_TYPE>(
+      std::numeric_limits<TO_TYPE>::max());
+  constexpr FROM_TYPE lower_limit = static_cast<FROM_TYPE>(
+      std::numeric_limits<TO_TYPE>::lowest());
+
+  constexpr int INT_SIZE_LIMIT = std::is_same_v<FROM_TYPE, double> ? 4 : 2;
+  bool is_ok = false;
+  if (sizeof(TO_TYPE) <= INT_SIZE_LIMIT) {
+    is_ok = lower_limit <= from && from <= upper_limit;
+  } else {
+    is_ok = lower_limit < from && from < upper_limit;
+  }
+
+  if (UNLIKELY(!is_ok)) {
+    if (std::isnan(from)) {
+      ctx->SetError("NaN value cannot be converted to integer type.");
+    } else if (!std::isfinite(from)) {
+      ctx->SetError("Non-finite value cannot be converted to integer type.");
+    } else {
+      ctx->SetError("Out-of-range floating point value cannot be converted to "
+          "integer type.");
+    }
+  }
+
+  return is_ok;
+}
+
+template <class FROM_TYPE, class TO_TYPE, typename std::enable_if_t<
+    ConversionType<FROM_TYPE, TO_TYPE>::double_to_float_conversion
+    >* = nullptr>
+bool Validate(FROM_TYPE from, FunctionContext* ctx) {
+  // Rules for conversion from floating point types to integral types:
+  //  1. If the source value can be represented exactly in the destination 
type, it does
+  //     not change.
+  //  2. If the source value is between two representable values of the 
destination type,
+  //     the result is one of those two values (it is implementation-defined 
which one).
+  //  3. Otherwise, the behavior is undefined.
+  // See https://en.cppreference.com/w/cpp/language/implicit_conversion
+  //
+  // The algorithm is similar to the case of floating point to integer 
conversion. We
+  // check whether the double value 'from' is within the range of float. The 
min and max
+  // values of float can be exactly represented as double, so we allow 
equality.
+  constexpr FROM_TYPE upper_limit = std::numeric_limits<TO_TYPE>::max();
+  constexpr FROM_TYPE lower_limit = std::numeric_limits<TO_TYPE>::lowest();
+  const bool in_range = lower_limit <= from && from <= upper_limit;
+
+  // In-range values, NaNs and infinite values can be converted to float.
+  const bool is_ok = in_range || std::isnan(from) || !std::isfinite(from);
+
+  if (UNLIKELY(!is_ok)) {
+      ctx->SetError(
+          "Out-of-range double value cannot be converted to float.");
+  }
+
+  return is_ok;
+}
+
+} // anonymous namespace
+
 #define CAST_FUNCTION(from_type, to_type) \
   to_type CastFunctions::CastTo##to_type(FunctionContext* ctx, const 
from_type& val) { \
     if (val.is_null) return to_type::null(); \
+    using from_underlying_type = decltype(from_type::val); \
+    using to_underlying_type = decltype(to_type::val); \
+    const bool valid = Validate<from_underlying_type, 
to_underlying_type>(val.val, ctx); \
+    /* The query will be aborted because 'Validate()' sets an error but we 
have to return
+     * something so we return NULL. */\
+    if (UNLIKELY(!valid)) return to_type::null(); \
     return to_type(val.val); \
   }
 
diff --git a/be/src/exprs/cast-functions.h b/be/src/exprs/cast-functions.h
index ea9bfa591..61389e00b 100644
--- a/be/src/exprs/cast-functions.h
+++ b/be/src/exprs/cast-functions.h
@@ -37,6 +37,13 @@ using impala_udf::TimestampVal;
 using impala_udf::StringVal;
 using impala_udf::DecimalVal;
 
+/// Some of the numeric (number to number) conversion functions always succeed 
and have
+/// some defined way of handling overflow (e.g. integer to integer 
conversions). Some of
+/// them may fail, such as floating point to integer conversions as well as 
double to
+/// float conversion. See 
https://en.cppreference.com/w/cpp/language/implicit_conversion.
+/// If the conversion fails, an error is raised.
+///
+/// Other conversion functions either raise an error or return NULL on failure.
 class CastFunctions {
  public:
   static BooleanVal CastToBooleanVal(FunctionContext* context, const 
TinyIntVal& val);
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index aa34332d3..a0be31492 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -691,6 +691,16 @@ class ExprTest : public 
testing::TestWithParam<std::tuple<bool, bool>> {
     EXPECT_TRUE(GetValue(expr, expr_type) == "NULL") << expr;
   }
 
+  template <class T>
+  void TestValueOrError(const string& expr, PrimitiveType expr_type,
+      const T& expected_result, bool expect_error) {
+    if (expect_error) {
+      TestError(expr);
+    } else {
+      TestValue(expr, expr_type, expected_result);
+    }
+  }
+
   void TestIsNotNull(const string& expr, PrimitiveType expr_type) {
     return TestIsNotNull(expr, ColumnType(expr_type));
   }
@@ -1185,19 +1195,30 @@ class ExprTest : public 
testing::TestWithParam<std::tuple<bool, bool>> {
   void TestSingleLiteralConstruction(
       PrimitiveType type, const T& value, const string& string_val);
 
-  // Test casting stmt to all types.  Expected result is val.
+  // Test casting stmt to all types. 'min_integer_size' is the byte size of 
the smallest
+  // signed integer expected to be able to hold the value. 
'float_out_of_range' should be
+  // set to true if the value does not fit in a single precision float. The 
expected
+  // result is 'val' for the types that can hold the value and error for other 
types.
   template<typename T>
-  void TestCast(const string& stmt, T val, bool timestamp_out_of_range = 
false) {
+  void TestCast(const string& stmt, T val, int min_integer_size = 1,
+      bool float_out_of_range = false, bool timestamp_out_of_range = false) {
     TestValue("cast(" + stmt + " as boolean)", TYPE_BOOLEAN, 
static_cast<bool>(val));
-    TestValue("cast(" + stmt + " as tinyint)", TYPE_TINYINT, 
static_cast<int8_t>(val));
-    TestValue("cast(" + stmt + " as smallint)", TYPE_SMALLINT, 
static_cast<int16_t>(val));
-    TestValue("cast(" + stmt + " as int)", TYPE_INT, 
static_cast<int32_t>(val));
-    TestValue("cast(" + stmt + " as integer)", TYPE_INT, 
static_cast<int32_t>(val));
-    TestValue("cast(" + stmt + " as bigint)", TYPE_BIGINT, 
static_cast<int64_t>(val));
-    TestValue("cast(" + stmt + " as float)", TYPE_FLOAT, 
static_cast<float>(val));
     TestValue("cast(" + stmt + " as double)", TYPE_DOUBLE, 
static_cast<double>(val));
     TestValue("cast(" + stmt + " as real)", TYPE_DOUBLE, 
static_cast<double>(val));
     TestStringValue("cast(" + stmt + " as string)", lexical_cast<string>(val));
+
+    TestValueOrError("cast(" + stmt + " as tinyint)", TYPE_TINYINT,
+        static_cast<int8_t>(val), min_integer_size > sizeof(int8_t));
+    TestValueOrError("cast(" + stmt + " as smallint)", TYPE_SMALLINT,
+        static_cast<int16_t>(val), min_integer_size > sizeof(int16_t));
+    TestValueOrError("cast(" + stmt + " as int)", TYPE_INT,
+        static_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+    TestValueOrError("cast(" + stmt + " as integer)", TYPE_INT,
+        static_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+    TestValueOrError("cast(" + stmt + " as bigint)", TYPE_BIGINT,
+        static_cast<int64_t>(val), min_integer_size > sizeof(int64_t));
+    TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT,
+        static_cast<float>(val), float_out_of_range);
     if (!timestamp_out_of_range) {
       TestTimestampValue("cast(" + stmt + " as timestamp)", 
CreateTestTimestamp(val));
     } else {
@@ -1242,23 +1263,32 @@ TimestampValue ExprTest::CreateTestTimestamp(int64_t 
val) {
   return TimestampValue::FromUnixTime(val, UTCPTR);
 }
 
-// Test casting 'stmt' to each of the native types.  The result should be 'val'
-// 'stmt' is a partial stmt that could be of any valid type.
+// Test casting 'stmt' to each of the native types. See the general template 
definition
+// for more information.
 template<>
-void ExprTest::TestCast(const string& stmt, const char* val,
-    bool timestamp_out_of_range) {
+void ExprTest::TestCast(const string& stmt, const char* val, int 
min_integer_size,
+    bool float_out_of_range, bool timestamp_out_of_range) {
   try {
     int8_t val8 = static_cast<int8_t>(lexical_cast<int16_t>(val));
 #if 0
     // Hive has weird semantics.  What do we want to do?
     TestValue(stmt + " as boolean)", TYPE_BOOLEAN, lexical_cast<bool>(val));
 #endif
-    TestValue("cast(" + stmt + " as tinyint)", TYPE_TINYINT, val8);
-    TestValue("cast(" + stmt + " as smallint)", TYPE_SMALLINT, 
lexical_cast<int16_t>(val));
-    TestValue("cast(" + stmt + " as int)", TYPE_INT, 
lexical_cast<int32_t>(val));
-    TestValue("cast(" + stmt + " as bigint)", TYPE_BIGINT, 
lexical_cast<int64_t>(val));
-    TestValue("cast(" + stmt + " as float)", TYPE_FLOAT, 
lexical_cast<float>(val));
+    TestValueOrError("cast(" + stmt + " as tinyint)", TYPE_TINYINT,
+        val8, min_integer_size > sizeof(int8_t));
+    TestValueOrError("cast(" + stmt + " as smallint)", TYPE_SMALLINT,
+        lexical_cast<int16_t>(val), min_integer_size > sizeof(int16_t));
+    TestValueOrError("cast(" + stmt + " as int)", TYPE_INT,
+        lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+    TestValueOrError("cast(" + stmt + " as integer)", TYPE_INT,
+        lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+    TestValueOrError("cast(" + stmt + " as bigint)", TYPE_BIGINT,
+        lexical_cast<int64_t>(val), min_integer_size > sizeof(int64_t));
+    TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT,
+        lexical_cast<float>(val), float_out_of_range);
+
     TestValue("cast(" + stmt + " as double)", TYPE_DOUBLE, 
lexical_cast<double>(val));
+    TestValue("cast(" + stmt + " as real)", TYPE_DOUBLE, 
lexical_cast<double>(val));
     TestStringValue("cast(" + stmt + " as string)", lexical_cast<string>(val));
   } catch (bad_lexical_cast& e) {
     EXPECT_TRUE(false) << e.what();
@@ -3217,21 +3247,20 @@ TEST_P(ExprTest, CastExprs) {
   TestCast("cast(0.1234567890123 as float)", 0.1234567890123f);
   TestCast("cast(0.1234567890123 as float)", 0.123456791f); // same as above
   TestCast("cast(0.00000000001234567890123 as float)", 
0.00000000001234567890123f);
-  TestCast("cast(123456 as float)", 123456.0f);
+  TestCast("cast(123456 as float)", 123456.0f, 4, false, false);
 
   // From http://en.wikipedia.org/wiki/Single-precision_floating-point_format
   // Min positive normal value
   TestCast("cast(1.1754944e-38 as float)", 1.1754944e-38f);
   // Max representable value
-  TestCast("cast(3.4028234e38 as float)", 3.4028234e38f, true);
-
+  TestCast("cast(3.4028234e38 as float)", 3.4028234e38f, 32, false, true);
 
   // From Double
   TestCast("cast(0.0 as double)", 0.0);
   TestCast("cast(5.0 as double)", 5.0);
   TestCast("cast(-5.0 as double)", -5.0);
-  TestCast("cast(0.123e10 as double)", 0.123e10);
-  TestCast("cast(123.123e10 as double)", 123.123e10, true);
+  TestCast("cast(0.123e10 as double)", 0.123e10, 4, false, false);
+  TestCast("cast(123.123e10 as double)", 123.123e10, 8, false, true);
   TestCast("cast(1.01234567890123456789 as double)", 1.01234567890123456789);
   TestCast("cast(1.01234567890123456789 as double)", 1.0123456789012346); // 
same as above
   TestCast("cast(0.01234567890123456789 as double)", 0.01234567890123456789);
@@ -3240,8 +3269,8 @@ TEST_P(ExprTest, CastExprs) {
   // casting string to double
   TestCast("cast('0.43149576573887316' as double)", 0.43149576573887316);
   TestCast("cast('-0.43149576573887316' as double)", -0.43149576573887316);
-  TestCast("cast('0.123e10' as double)", 0.123e10);
-  TestCast("cast('123.123e10' as double)", 123.123e10, true);
+  TestCast("cast('0.123e10' as double)", 0.123e10, 4, false, false);
+  TestCast("cast('123.123e10' as double)", 123.123e10, 8, false, true);
   TestCast("cast('1.01234567890123456789' as double)", 1.01234567890123456789);
 
   // From http://en.wikipedia.org/wiki/Double-precision_floating-point_format
@@ -3255,8 +3284,10 @@ TEST_P(ExprTest, CastExprs) {
   TestCast("cast(2.2250738585072014e-308 as double)", 2.2250738585072014e-308);
   TestCast("cast('2.2250738585072014e-308' as double)", 
2.2250738585072014e-308);
   // Max Double
-  TestCast("cast(1.7976931348623157e+308 as double)", 1.7976931348623157e308, 
true);
-  TestCast("cast('1.7976931348623157e+308' as double)", 
1.7976931348623157e308, true);
+  TestCast("cast(1.7976931348623157e+308 as double)", 1.7976931348623157e308,
+      128, true, true);
+  TestCast("cast('1.7976931348623157e+308' as double)", 1.7976931348623157e308,
+      128, true, true);
 
   // From String
   TestCast("'0'", "0");
@@ -3517,6 +3548,81 @@ TEST_P(ExprTest, CastExprs) {
 #endif
 }
 
+// Test overflow and boundaries in case of narrowing conversions: from 
floating point to
+// integer types or from double to float.
+TEST_P(ExprTest, CastFloatingPointNarrowing) {
+  // TINYINT
+  // Valid values on the boundary.
+  constexpr int64_t int8_max = std::numeric_limits<int8_t>::max();
+  constexpr int64_t int8_min = std::numeric_limits<int8_t>::min();
+  TestValue(Substitute("cast(cast($0 as float) as tinyint)", int8_max),
+      TYPE_TINYINT, int8_max);
+  TestValue(Substitute("cast(cast($0 as float) as tinyint)", -int8_max),
+      TYPE_TINYINT, -int8_max);
+  TestValue(Substitute("cast(cast($0 as float) as tinyint)", int8_min),
+      TYPE_TINYINT, int8_min);
+
+  // Values outside the valid range.
+  TestError(Substitute("cast(cast($0 as float) as tinyint)", int8_max + 1));
+  TestError(Substitute("cast(cast($0 as float) as tinyint)", int8_min - 1));
+
+  // SMALLINT
+  // Valid values on the boundary.
+  constexpr int64_t int16_max = std::numeric_limits<int16_t>::max();
+  constexpr int64_t int16_min = std::numeric_limits<int16_t>::min();
+  TestValue(Substitute("cast(cast($0 as float) as smallint)", int16_max),
+      TYPE_SMALLINT, int16_max);
+  TestValue(Substitute("cast(cast($0 as float) as smallint)", -int16_max),
+      TYPE_SMALLINT, -int16_max);
+  TestValue(Substitute("cast(cast($0 as float) as smallint)", int16_min),
+      TYPE_SMALLINT, int16_min);
+
+  // Values outside the valid range.
+  TestError(Substitute("cast(cast($0 as float) as smallint)", int16_max + 1));
+  TestError(Substitute("cast(cast($0 as float) as smallint)", int16_min - 1));
+
+  // INT
+  // Valid values on the boundary.
+  constexpr int64_t int32_max = std::numeric_limits<int32_t>::max();
+  constexpr int64_t int32_min = std::numeric_limits<int32_t>::min();
+  TestValue(Substitute("cast(cast($0 as double) as int)", int32_max),
+      TYPE_INT, int32_max);
+  TestValue(Substitute("cast(cast($0 as double) as int)", -int32_max),
+      TYPE_INT, -int32_max);
+  TestValue(Substitute("cast(cast($0 as double) as int)", int32_min),
+      TYPE_INT, int32_min);
+
+  // Values outside the valid range.
+  TestError(Substitute("cast(cast($0 as double) as int)", int32_max + 1));
+  TestError(Substitute("cast(cast($0 as double) as int)", int32_min - 1));
+
+  // BIGINT
+  // Values outside the valid range: the limit values of BIGINT cannot be 
represented
+  // exactly as DOUBLE, and casting them to DOUBLE produces a value that is 
outside the
+  // range of BIGINT.
+  constexpr int64_t int64_max = std::numeric_limits<int64_t>::max();
+  constexpr int64_t int64_min = std::numeric_limits<int64_t>::min();
+  TestError(Substitute("cast(cast($0 as double) as bigint)", int64_max));
+  TestError(Substitute("cast(cast($0 as double) as bigint)", int64_min));
+
+  // DOUBLE to FLOAT
+  // Valid values on the boundary.
+  constexpr double float_max = std::numeric_limits<float>::max();
+  constexpr double float_min = std::numeric_limits<float>::lowest();
+  TestValue(Substitute("cast(cast($0 as double) as float)", float_max),
+      TYPE_FLOAT, float_max);
+  TestValue(Substitute("cast(cast($0 as double) as float)", float_min),
+      TYPE_FLOAT, float_min);
+
+  // Values outside the valid range.
+  // 'float_max + 1' is not representable as a float or double. We find the 
next
+  // representable double that is greater than 'float_max'.
+  const double next_double = std::nextafter(
+      float_max, std::numeric_limits<double>::max());
+  TestError(Substitute("cast(cast($0 as double) as float)", next_double));
+  TestError(Substitute("cast(cast($0 as double) as float)", -next_double));
+}
+
 // Test casting from/to Date.
 TEST_P(ExprTest, CastDateExprs) {
   // From Date to Date
@@ -5978,14 +6084,8 @@ TEST_P(ExprTest, MathFunctions) {
   TestValue("abs(-32768)", TYPE_INT, 32768);
   TestValue("abs(32767)", TYPE_INT, 32767);
   TestValue("abs(32768)", TYPE_BIGINT, 32768);
-#ifndef __aarch64__
-  TestValue("abs(-1 * cast(pow(2, 31) as int))", TYPE_BIGINT, 2147483648);
-  TestValue("abs(cast(pow(2, 31) as int))", TYPE_BIGINT, 2147483648);
-#else
-  TestValue("abs(-1 * cast(pow(2, 31) as int))", TYPE_BIGINT, 2147483647);
-  TestValue("abs(cast(pow(2, 31) as int))", TYPE_BIGINT, 2147483647);
-#endif
-  TestValue("abs(2147483647)", TYPE_BIGINT, 2147483647);
+  TestError("abs(-1 * cast(pow(2, 31) as int))");
+  TestError("abs(cast(pow(2, 31) as int))");
   TestValue("abs(2147483647)", TYPE_BIGINT, 2147483647);
   TestValue("abs(-9223372036854775807)", TYPE_BIGINT,  9223372036854775807);
   TestValue("abs(9223372036854775807)", TYPE_BIGINT,  9223372036854775807);
diff --git a/be/src/exprs/literal.cc b/be/src/exprs/literal.cc
index 63efb967c..d6d4285d1 100644
--- a/be/src/exprs/literal.cc
+++ b/be/src/exprs/literal.cc
@@ -17,6 +17,8 @@
 
 #include "literal.h"
 
+#include <cmath>
+#include <limits>
 #include <sstream>
 #include <boost/date_time/posix_time/posix_time_types.hpp>
 
@@ -64,11 +66,23 @@ Literal::Literal(const TExprNode& node)
       DCHECK(node.__isset.int_literal);
       value_.bigint_val = node.int_literal.value;
       break;
-    case TYPE_FLOAT:
+    case TYPE_FLOAT: {
       DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
       DCHECK(node.__isset.float_literal);
-      value_.float_val = node.float_literal.value;
+      // 'node.float_literal.value' is actually a double. Casting from double 
to float
+      // invokes undefined behaviour if the value is out of range for float.
+      double literal_value = node.float_literal.value;
+      const bool out_of_range = literal_value < 
std::numeric_limits<float>::lowest()
+          || literal_value > std::numeric_limits<float>::max();
+      if (UNLIKELY(out_of_range && std::isfinite(literal_value))) {
+        DCHECK(false) << "Value out of range for FLOAT.";
+        value_.float_val = literal_value > 0 ? 
std::numeric_limits<float>::infinity()
+          : -std::numeric_limits<float>::infinity();
+      } else {
+        value_.float_val = literal_value;
+      }
       break;
+    }
     case TYPE_DOUBLE:
       DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
       DCHECK(node.__isset.float_literal);
diff --git a/fe/src/main/java/org/apache/impala/analysis/CastExpr.java 
b/fe/src/main/java/org/apache/impala/analysis/CastExpr.java
index 81b7e8b36..7e31da5ec 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CastExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CastExpr.java
@@ -335,10 +335,23 @@ public class CastExpr extends Expr {
     }
 
     if (children_.get(0) instanceof NumericLiteral && 
type_.isFloatingPointType()) {
-      // Special case casting a decimal literal to a floating point number. The
-      // decimal literal can be interpreted as either and we want to avoid 
casts
-      // since that can result in loss of accuracy.
-      ((NumericLiteral)children_.get(0)).explicitlyCastToFloat(type_);
+      // Special case casting a decimal literal to a floating point number. 
The decimal
+      // literal can be interpreted as either and we want to avoid casts since 
that can
+      // result in loss of accuracy. However, if 'type_' is FLOAT and the 
value does not
+      // fit in a FLOAT, we do not do an unchecked conversion
+      // ('NumericLiteral.explicitlyCastToFloat()') here but let the 
conversion fail in
+      // the BE.
+      NumericLiteral child = (NumericLiteral) children_.get(0);
+      final boolean isOverflow = NumericLiteral.isOverflow(child.getValue(), 
type_);
+      if (type_.isScalarType(PrimitiveType.FLOAT)) {
+        if (!isOverflow) {
+          ((NumericLiteral)children_.get(0)).explicitlyCastToFloat(type_);
+        }
+      } else {
+        Preconditions.checkState(type_.isScalarType(PrimitiveType.DOUBLE));
+        Preconditions.checkState(!isOverflow);
+        ((NumericLiteral)children_.get(0)).explicitlyCastToFloat(type_);
+      }
     }
 
     if (children_.get(0).getType().isNull()) {

[impala] 01/03: IMPALA-12035: Impala accepts very big numbers but fails to store them correctly

Reply via email to