(impala) branch master updated: IMPALA-12950: Improve error message in case of out-of-range numeric conversions

laszlog Thu, 25 Apr 2024 12:57:00 -0700

This is an automated email from the ASF dual-hosted git repository.

laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git



The following commit(s) were added to refs/heads/master by this push:
     new 4f033c775 IMPALA-12950: Improve error message in case of out-of-range 
numeric conversions
4f033c775 is described below

commit 4f033c7750e711c498fb80ff851d4dae0eed55ce
Author: Daniel Becker <[email protected]>
AuthorDate: Wed Apr 17 11:47:43 2024 +0200

    IMPALA-12950: Improve error message in case of out-of-range numeric 
conversions
    
    IMPALA-12035 introduced checks for numeric conversions that are unsafe
    and can fail (if the target type cannot store the value, the behaviour
    is undefined):
     - from floating-point types to integer types
     - from double to float
    
    However, it can be difficult to trace which part of the query caused
    this based on the error message. This change adds the source type, the
    destination type and the value to be converted to the error message.
    Unfortunately, at this point in the BE, the original SQL is not
    available, so we cannot reference that.
    
    Testing:
     - extended existing tests in expr-test.cc.
    
    Change-Id: Ieeed52e25f155818c35c11a8a6821708476ffb32
    Reviewed-on: http://gerrit.cloudera.org:8080/21331
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exprs/cast-functions-ir.cc | 41 ++++++++++++++++++++++----
 be/src/exprs/expr-test.cc         | 62 +++++++++++++++++++++++++++++----------
 be/src/udf/udf.h                  |  6 ++--
 3 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/be/src/exprs/cast-functions-ir.cc 
b/be/src/exprs/cast-functions-ir.cc
index fa5ce41f2..7f4bc0a32 100644
--- a/be/src/exprs/cast-functions-ir.cc
+++ b/be/src/exprs/cast-functions-ir.cc
@@ -59,6 +59,27 @@ const int MAX_BOOLEAN_CHARS = 1;
 
 namespace {
 
+template <class T>
+constexpr const char* TypeToName() {
+  if constexpr (std::is_same_v<T, int8_t>) {
+    return "TINYINT";
+  } else if constexpr (std::is_same_v<T, int16_t>) {
+    return "SMALLINT";
+  } else if constexpr (std::is_same_v<T, int32_t>) {
+    return "INT";
+  } else if constexpr (std::is_same_v<T, int64_t>) {
+    return "BIGINT";
+  } else if constexpr (std::is_same_v<T, float>) {
+    return "FLOAT";
+  } else if constexpr (std::is_same_v<T, double>) {
+    return "DOUBLE";
+  } else {
+    // This function doesn't support other types.
+    static_assert(!std::is_same_v<T, T>);
+    return nullptr;
+  }
+}
+
 // This struct is used as a helper in the validation of casts. For a cast from 
'FROM_TYPE'
 // to 'TO_TYPE' it provides compile time constants about what type of 
conversion it is.
 // These constants are used in the template specifications of the 'Validate()' 
function to
@@ -158,14 +179,20 @@ bool Validate(FROM_TYPE from, FunctionContext* ctx) {
   }
 
   if (UNLIKELY(!is_ok)) {
+    constexpr const char* FROM_TYPE_NAME = TypeToName<FROM_TYPE>();
+    constexpr const char* TO_TYPE_NAME = TypeToName<TO_TYPE>();
+    string err;
     if (std::isnan(from)) {
-      ctx->SetError("NaN value cannot be converted to integer type.");
+      err = Substitute("NaN value of type $0 cannot be converted to $1.",
+          FROM_TYPE_NAME, TO_TYPE_NAME);
     } else if (!std::isfinite(from)) {
-      ctx->SetError("Non-finite value cannot be converted to integer type.");
+      err = Substitute("Non-finite value of type $0 cannot be converted to 
$1.",
+          FROM_TYPE_NAME, TO_TYPE_NAME);
     } else {
-      ctx->SetError("Out-of-range floating point value cannot be converted to "
-          "integer type.");
+      err = Substitute("Converting value $0 of type $1 to $2 failed, "
+          "value out of range for destination type.", from, FROM_TYPE_NAME, 
TO_TYPE_NAME);
     }
+    ctx->SetError(err.c_str());
   }
 
   return is_ok;
@@ -194,8 +221,10 @@ bool Validate(FROM_TYPE from, FunctionContext* ctx) {
   const bool is_ok = in_range || std::isnan(from) || !std::isfinite(from);
 
   if (UNLIKELY(!is_ok)) {
-      ctx->SetError(
-          "Out-of-range double value cannot be converted to float.");
+    const string err = Substitute("Converting value $0 of type $1 to $2 
failed, "
+        "value out of range for destination type.", from, 
TypeToName<FROM_TYPE>(),
+        TypeToName<TO_TYPE>());
+    ctx->SetError(err.c_str());
   }
 
   return is_ok;
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index ec326ed5a..1cb0ee88d 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -704,9 +704,9 @@ class ExprTest : public 
testing::TestWithParam<std::tuple<bool, bool>> {
 
   template <class T>
   void TestValueOrError(const string& expr, PrimitiveType expr_type,
-      const T& expected_result, bool expect_error) {
+      const T& expected_result, bool expect_error, const std::string& error) {
     if (expect_error) {
-      TestError(expr);
+      TestErrorString(expr, error);
     } else {
       TestValue(expr, expr_type, expected_result);
     }
@@ -1220,17 +1220,22 @@ class ExprTest : public 
testing::TestWithParam<std::tuple<bool, bool>> {
     TestStringValue("cast(" + stmt + " as string)", lexical_cast<string>(val));
 
     TestValueOrError("cast(" + stmt + " as tinyint)", TYPE_TINYINT,
-        static_cast<int8_t>(val), min_integer_size > sizeof(int8_t));
+        static_cast<int8_t>(val), min_integer_size > sizeof(int8_t),
+        "value out of range for destination type.\n");
     TestValueOrError("cast(" + stmt + " as smallint)", TYPE_SMALLINT,
-        static_cast<int16_t>(val), min_integer_size > sizeof(int16_t));
+        static_cast<int16_t>(val), min_integer_size > sizeof(int16_t),
+        "value out of range for destination type.\n");
     TestValueOrError("cast(" + stmt + " as int)", TYPE_INT,
-        static_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+        static_cast<int32_t>(val), min_integer_size > sizeof(int32_t),
+        "value out of range for destination type.\n");
     TestValueOrError("cast(" + stmt + " as integer)", TYPE_INT,
-        static_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+        static_cast<int32_t>(val), min_integer_size > sizeof(int32_t),
+        "value out of range for destination type.\n");
     TestValueOrError("cast(" + stmt + " as bigint)", TYPE_BIGINT,
-        static_cast<int64_t>(val), min_integer_size > sizeof(int64_t));
-    TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT,
-        static_cast<float>(val), float_out_of_range);
+        static_cast<int64_t>(val), min_integer_size > sizeof(int64_t),
+        " value out of range for destination type.\n");
+    TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT, 
static_cast<float>(val),
+        float_out_of_range, "value out of range for destination type.\n");
     if (!timestamp_out_of_range) {
       TestTimestampValue("cast(" + stmt + " as timestamp)", 
CreateTestTimestamp(val));
     } else {
@@ -1287,17 +1292,22 @@ void ExprTest::TestCast(const string& stmt, const char* 
val, int min_integer_siz
     TestValue(stmt + " as boolean)", TYPE_BOOLEAN, lexical_cast<bool>(val));
 #endif
     TestValueOrError("cast(" + stmt + " as tinyint)", TYPE_TINYINT,
-        val8, min_integer_size > sizeof(int8_t));
+        val8, min_integer_size > sizeof(int8_t),
+        "value out of range for destination type.\n");
     TestValueOrError("cast(" + stmt + " as smallint)", TYPE_SMALLINT,
-        lexical_cast<int16_t>(val), min_integer_size > sizeof(int16_t));
+        lexical_cast<int16_t>(val), min_integer_size > sizeof(int16_t),
+        "value out of range for destination type.\n");
     TestValueOrError("cast(" + stmt + " as int)", TYPE_INT,
-        lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+        lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t),
+        "value out of range for destination type.\n");
     TestValueOrError("cast(" + stmt + " as integer)", TYPE_INT,
-        lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+        lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t),
+        "value out of range for destination type.\n");
     TestValueOrError("cast(" + stmt + " as bigint)", TYPE_BIGINT,
-        lexical_cast<int64_t>(val), min_integer_size > sizeof(int64_t));
-    TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT,
-        lexical_cast<float>(val), float_out_of_range);
+        lexical_cast<int64_t>(val), min_integer_size > sizeof(int64_t),
+        "value out of range for destination type.\n");
+    TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT, 
lexical_cast<float>(val),
+        float_out_of_range, "value out of range for destination type.\n");
 
     TestValue("cast(" + stmt + " as double)", TYPE_DOUBLE, 
lexical_cast<double>(val));
     TestValue("cast(" + stmt + " as real)", TYPE_DOUBLE, 
lexical_cast<double>(val));
@@ -3503,6 +3513,26 @@ TEST_P(ExprTest, CastExprs) {
   TestIsNull("cast(cast(1180591620717411303425 as decimal(38, 0)) as 
timestamp)",
       TYPE_TIMESTAMP);
 
+  // Explicitly test the error message of an out-of-range double-to-integer 
conversion.
+  TestErrorString("cast(cast(500 as DOUBLE) as TINYINT)",
+      "Converting value 500 of type DOUBLE to TINYINT failed, "
+      "value out of range for destination type.\n");
+
+  // Explicitly test the error message of an out-of-range double-to-float 
conversion.
+  TestErrorString("cast(1e40 as FLOAT)",
+      "Converting value 1e+40 of type DOUBLE to FLOAT failed, "
+      "value out of range for destination type.\n");
+
+  // Nan and non-finite floating-point values converted to int.
+  TestErrorString("cast(cast((1/0) as FLOAT) as INT)",
+      "Non-finite value of type FLOAT cannot be converted to INT.\n");
+  TestErrorString("cast((1/0) as INT)",
+      "Non-finite value of type DOUBLE cannot be converted to INT.\n");
+  TestErrorString("cast(cast((0/0) as FLOAT) as INT)",
+      "NaN value of type FLOAT cannot be converted to INT.\n");
+  TestErrorString("cast((0/0) as INT)",
+      "NaN value of type DOUBLE cannot be converted to INT.\n");
+
   // Out of range String <--> Timestamp - invalid boundary cases.
   TestIsNull("cast('1399-12-31 23:59:59' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('10000-01-01 00:00:00' as timestamp)", TYPE_TIMESTAMP);
diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h
index 2f34a44fd..6a6626c74 100644
--- a/be/src/udf/udf.h
+++ b/be/src/udf/udf.h
@@ -162,8 +162,10 @@ class FunctionContext {
   /// Returns the query_id for the current query.
   UniqueId query_id() const;
 
-  /// Sets an error for this UDF. If this is called, this will trigger the
-  /// query to fail.
+  /// Sets an error for this UDF. The error message is copied and the copy is 
owned by
+  /// this object.
+  ///
+  /// If this is called, this will trigger the query to fail.
   void SetError(const char* error_msg);
 
   /// Adds a warning that is returned to the user. This can include things like

(impala) branch master updated: IMPALA-12950: Improve error message in case of out-of-range numeric conversions

Reply via email to