This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 19174e7665 fix: check for precision overflow when parsing float as 
decimal (#7627)
19174e7665 is described below

commit 19174e76656adec353f85ff468654ba210dc9cda
Author: Jonah Gao <[email protected]>
AuthorDate: Mon Sep 25 19:10:31 2023 +0800

    fix: check for precision overflow when parsing float as decimal (#7627)
    
    * fix: check for precision overflow when parsing float as decimal
    
    * fix hard-coded precison and move tests to options.slt
---
 datafusion/sql/src/expr/value.rs               | 36 ++++++++++----
 datafusion/sql/tests/sql_integration.rs        |  2 +-
 datafusion/sqllogictest/test_files/ddl.slt     | 27 ----------
 datafusion/sqllogictest/test_files/options.slt | 69 ++++++++++++++++++++++++++
 4 files changed, 95 insertions(+), 39 deletions(-)

diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index 3d5a39a13a..424aa50994 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -17,6 +17,7 @@
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use arrow::compute::kernels::cast_utils::parse_interval_month_day_nano;
+use arrow::datatypes::DECIMAL128_MAX_PRECISION;
 use arrow_schema::DataType;
 use datafusion_common::{
     not_impl_err, plan_err, DFSchema, DataFusionError, Result, ScalarValue,
@@ -66,23 +67,19 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 let p = str.len() - 1;
                 let s = str.len() - i - 1;
                 let str = str.replace('.', "");
-                let n = str.parse::<i128>().map_err(|_| {
-                    DataFusionError::from(ParserError(format!(
-                        "Cannot parse {str} as i128 when building decimal"
-                    )))
-                })?;
+                let n = parse_decimal_128_without_scale(&str)?;
                 Ok(Expr::Literal(ScalarValue::Decimal128(
                     Some(n),
                     p as u8,
                     s as i8,
                 )))
             } else {
-                let number = n.parse::<i128>().map_err(|_| {
-                    DataFusionError::from(ParserError(format!(
-                        "Cannot parse {n} as i128 when building decimal"
-                    )))
-                })?;
-                Ok(Expr::Literal(ScalarValue::Decimal128(Some(number), 38, 0)))
+                let number = parse_decimal_128_without_scale(str)?;
+                Ok(Expr::Literal(ScalarValue::Decimal128(
+                    Some(number),
+                    str.len() as u8,
+                    0,
+                )))
             }
         } else {
             n.parse::<f64>().map(lit).map_err(|_| {
@@ -394,6 +391,23 @@ const fn try_decode_hex_char(c: u8) -> Option<u8> {
     }
 }
 
+fn parse_decimal_128_without_scale(s: &str) -> Result<i128> {
+    let number = s.parse::<i128>().map_err(|e| {
+        DataFusionError::from(ParserError(format!(
+            "Cannot parse {s} as i128 when building decimal: {e}"
+        )))
+    })?;
+
+    const DECIMAL128_MAX_VALUE: i128 = 10_i128.pow(DECIMAL128_MAX_PRECISION as 
u32) - 1;
+    if number > DECIMAL128_MAX_VALUE {
+        return Err(DataFusionError::from(ParserError(format!(
+            "Cannot parse {s} as i128 when building decimal: precision 
overflow"
+        ))));
+    }
+
+    Ok(number)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index c1d1415dc7..88a041a661 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -54,7 +54,7 @@ fn parse_decimals() {
         ("18446744073709551615", "UInt64(18446744073709551615)"),
         (
             "18446744073709551616",
-            "Decimal128(Some(18446744073709551616),38,0)",
+            "Decimal128(Some(18446744073709551616),20,0)",
         ),
     ];
     for (a, b) in test_data {
diff --git a/datafusion/sqllogictest/test_files/ddl.slt 
b/datafusion/sqllogictest/test_files/ddl.slt
index 1f9bfa0aa2..ed4f4b4a11 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -670,33 +670,6 @@ describe TABLE_WITHOUT_NORMALIZATION
 FIELD1 Int64 YES
 FIELD2 Int64 YES
 
-query R
-select 10000000000000000000.01
-----
-10000000000000000000
-
-query T
-select arrow_typeof(10000000000000000000.01)
-----
-Float64
-
-statement ok
-set datafusion.sql_parser.parse_float_as_decimal = true;
-
-query R
-select 10000000000000000000.01
-----
-10000000000000000000.01
-
-query T
-select arrow_typeof(10000000000000000000.01)
-----
-Decimal128(22, 2)
-
-# Restore those to default value again
-statement ok
-set datafusion.sql_parser.parse_float_as_decimal = false;
-
 statement ok
 set datafusion.sql_parser.enable_ident_normalization = true;
 
diff --git a/datafusion/sqllogictest/test_files/options.slt 
b/datafusion/sqllogictest/test_files/options.slt
index 1f4cc9ab0c..412cc4a54c 100644
--- a/datafusion/sqllogictest/test_files/options.slt
+++ b/datafusion/sqllogictest/test_files/options.slt
@@ -82,3 +82,72 @@ set datafusion.execution.batch_size = 8192;
 
 statement ok
 drop table a
+
+# test datafusion.sql_parser.parse_float_as_decimal
+# default option value is false
+query R
+select 10000000000000000000.01
+----
+10000000000000000000
+
+query T
+select arrow_typeof(10000000000000000000.01)
+----
+Float64
+
+statement ok
+set datafusion.sql_parser.parse_float_as_decimal = true;
+
+query R
+select 10000000000000000000.01
+----
+10000000000000000000.01
+
+query T
+select arrow_typeof(10000000000000000000.01)
+----
+Decimal128(22, 2)
+
+query R
+select 999999999999999999999999999999999999
+----
+999999999999999999999999999999999999
+
+query T
+select arrow_typeof(999999999999999999999999999999999999)
+----
+Decimal128(36, 0)
+
+query R
+select 99999999999999999999999999999999999999
+----
+99999999999999999999999999999999999999
+
+query T
+select arrow_typeof(99999999999999999999999999999999999999)
+----
+Decimal128(38, 0)
+
+query R
+select 9999999999999999999999999999999999.9999
+----
+9999999999999999999999999999999999.9999
+
+query T
+select arrow_typeof(9999999999999999999999999999999999.9999)
+----
+Decimal128(38, 4)
+
+# precision overflow
+statement error SQL error: ParserError\("Cannot parse 
123456789012345678901234567890123456789 as i128 when building decimal: 
precision overflow"\)
+select 123456789.012345678901234567890123456789
+
+# can not fit in i128
+statement error SQL error: ParserError\("Cannot parse 
1234567890123456789012345678901234567890 as i128 when building decimal: number 
too large to fit in target type"\)
+select 123456789.0123456789012345678901234567890
+
+# Restore those to default value again
+statement ok
+set datafusion.sql_parser.parse_float_as_decimal = false;
+
+

Reply via email to