This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 32dfbb0bb5 feat: make parse_float_as_decimal work on negative numbers
(#7648)
32dfbb0bb5 is described below
commit 32dfbb0bb5c51b55ad84eed4fc140840f0a76612
Author: Jonah Gao <[email protected]>
AuthorDate: Tue Sep 26 19:38:27 2023 +0800
feat: make parse_float_as_decimal work on negative numbers (#7648)
---
datafusion/sql/src/expr/unary_op.rs | 27 +++---
datafusion/sql/src/expr/value.rs | 96 ++++++++++++-------
datafusion/sqllogictest/test_files/options.slt | 124 ++++++++++++++++++-------
3 files changed, 164 insertions(+), 83 deletions(-)
diff --git a/datafusion/sql/src/expr/unary_op.rs
b/datafusion/sql/src/expr/unary_op.rs
index 6350f7bcc9..08ff6f2c36 100644
--- a/datafusion/sql/src/expr/unary_op.rs
+++ b/datafusion/sql/src/expr/unary_op.rs
@@ -17,7 +17,7 @@
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
use datafusion_common::{not_impl_err, DFSchema, DataFusionError, Result};
-use datafusion_expr::{lit, Expr};
+use datafusion_expr::Expr;
use sqlparser::ast::{Expr as SQLExpr, UnaryOperator, Value};
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -39,23 +39,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
match expr {
// optimization: if it's a number literal, we apply the
negative operator
// here directly to calculate the new literal.
- SQLExpr::Value(Value::Number(n, _)) => match
n.parse::<i64>() {
- Ok(n) => Ok(lit(-n)),
- Err(_) => Ok(lit(-n
- .parse::<f64>()
- .map_err(|_e| {
- DataFusionError::Plan(format!(
- "negative operator can be only applied to
integer and float operands, got: {n}"))
- })?)),
- },
- SQLExpr::Interval(interval) => self.sql_interval_to_expr(
- true,
- interval,
+ SQLExpr::Value(Value::Number(n, _)) => {
+ self.parse_sql_number(&n, true)
+ }
+ SQLExpr::Interval(interval) => {
+ self.sql_interval_to_expr(true, interval, schema,
planner_context)
+ }
+ // not a literal, apply negative operator on expression
+ _ =>
Ok(Expr::Negative(Box::new(self.sql_expr_to_logical_expr(
+ expr,
schema,
planner_context,
- ),
- // not a literal, apply negative operator on expression
- _ =>
Ok(Expr::Negative(Box::new(self.sql_expr_to_logical_expr(expr, schema,
planner_context)?))),
+ )?))),
}
}
_ => not_impl_err!("Unsupported SQL unary operator {op:?}"),
diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index 424aa50994..c949904cd8 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -27,6 +27,7 @@ use datafusion_expr::{lit, Expr, Operator};
use log::debug;
use sqlparser::ast::{BinaryOperator, Expr as SQLExpr, Interval, Value};
use sqlparser::parser::ParserError::ParserError;
+use std::borrow::Cow;
use std::collections::HashSet;
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -36,7 +37,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
param_data_types: &[DataType],
) -> Result<Expr> {
match value {
- Value::Number(n, _) => self.parse_sql_number(&n),
+ Value::Number(n, _) => self.parse_sql_number(&n, false),
Value::SingleQuotedString(s) | Value::DoubleQuotedString(s) =>
Ok(lit(s)),
Value::Null => Ok(Expr::Literal(ScalarValue::Null)),
Value::Boolean(n) => Ok(lit(n)),
@@ -55,35 +56,36 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}
/// Parse number in sql string, convert to Expr::Literal
- fn parse_sql_number(&self, n: &str) -> Result<Expr> {
- if let Ok(n) = n.parse::<i64>() {
- Ok(lit(n))
- } else if let Ok(n) = n.parse::<u64>() {
- Ok(lit(n))
- } else if self.options.parse_float_as_decimal {
- // remove leading zeroes
- let str = n.trim_start_matches('0');
- if let Some(i) = str.find('.') {
- let p = str.len() - 1;
- let s = str.len() - i - 1;
- let str = str.replace('.', "");
- let n = parse_decimal_128_without_scale(&str)?;
- Ok(Expr::Literal(ScalarValue::Decimal128(
- Some(n),
- p as u8,
- s as i8,
- )))
- } else {
- let number = parse_decimal_128_without_scale(str)?;
- Ok(Expr::Literal(ScalarValue::Decimal128(
- Some(number),
- str.len() as u8,
- 0,
- )))
+ pub(super) fn parse_sql_number(
+ &self,
+ unsigned_number: &str,
+ negative: bool,
+ ) -> Result<Expr> {
+ let signed_number: Cow<str> = if negative {
+ Cow::Owned(format!("-{unsigned_number}"))
+ } else {
+ Cow::Borrowed(unsigned_number)
+ };
+
+ // Try to parse as i64 first, then u64 if negative is false, then
decimal or f64
+
+ if let Ok(n) = signed_number.parse::<i64>() {
+ return Ok(lit(n));
+ }
+
+ if !negative {
+ if let Ok(n) = unsigned_number.parse::<u64>() {
+ return Ok(lit(n));
}
+ }
+
+ if self.options.parse_float_as_decimal {
+ parse_decimal_128(unsigned_number, negative)
} else {
- n.parse::<f64>().map(lit).map_err(|_| {
- DataFusionError::from(ParserError(format!("Cannot parse {n} as
f64")))
+ signed_number.parse::<f64>().map(lit).map_err(|_| {
+ DataFusionError::from(ParserError(format!(
+ "Cannot parse {signed_number} as f64"
+ )))
})
}
}
@@ -391,21 +393,45 @@ const fn try_decode_hex_char(c: u8) -> Option<u8> {
}
}
-fn parse_decimal_128_without_scale(s: &str) -> Result<i128> {
- let number = s.parse::<i128>().map_err(|e| {
+/// Parse Decimal128 from a string
+///
+/// TODO: support parsing from scientific notation
+fn parse_decimal_128(unsigned_number: &str, negative: bool) -> Result<Expr> {
+ // remove leading zeroes
+ let trimmed = unsigned_number.trim_start_matches('0');
+ // parse precision and scale, remove decimal point if exists
+ let (precision, scale, replaced_str) = if trimmed == "." {
+ // special cases for numbers such as “0.”, “000.”, and so on.
+ (1, 0, Cow::Borrowed("0"))
+ } else if let Some(i) = trimmed.find('.') {
+ (
+ trimmed.len() - 1,
+ trimmed.len() - i - 1,
+ Cow::Owned(trimmed.replace('.', "")),
+ )
+ } else {
+ // no decimal point, keep as is
+ (trimmed.len(), 0, Cow::Borrowed(trimmed))
+ };
+
+ let number = replaced_str.parse::<i128>().map_err(|e| {
DataFusionError::from(ParserError(format!(
- "Cannot parse {s} as i128 when building decimal: {e}"
+ "Cannot parse {replaced_str} as i128 when building decimal: {e}"
)))
})?;
- const DECIMAL128_MAX_VALUE: i128 = 10_i128.pow(DECIMAL128_MAX_PRECISION as
u32) - 1;
- if number > DECIMAL128_MAX_VALUE {
+ // check precision overflow
+ if precision as u8 > DECIMAL128_MAX_PRECISION {
return Err(DataFusionError::from(ParserError(format!(
- "Cannot parse {s} as i128 when building decimal: precision
overflow"
+ "Cannot parse {replaced_str} as i128 when building decimal:
precision overflow"
))));
}
- Ok(number)
+ Ok(Expr::Literal(ScalarValue::Decimal128(
+ Some(if negative { -number } else { number }),
+ precision as u8,
+ scale as i8,
+ )))
}
#[cfg(test)]
diff --git a/datafusion/sqllogictest/test_files/options.slt
b/datafusion/sqllogictest/test_files/options.slt
index 412cc4a54c..5fbb2102f4 100644
--- a/datafusion/sqllogictest/test_files/options.slt
+++ b/datafusion/sqllogictest/test_files/options.slt
@@ -84,69 +84,129 @@ statement ok
drop table a
# test datafusion.sql_parser.parse_float_as_decimal
+#
# default option value is false
-query R
-select 10000000000000000000.01
+query RR
+select 10000000000000000000.01, -10000000000000000000.01
----
-10000000000000000000
+10000000000000000000 -10000000000000000000
-query T
-select arrow_typeof(10000000000000000000.01)
+query TT
+select arrow_typeof(10000000000000000000.01),
arrow_typeof(-10000000000000000000.01)
+----
+Float64 Float64
+
+# select 0, i64::MIN, i64::MIN-1, i64::MAX, i64::MAX + 1, u64::MAX, u64::MAX +
1
+query IIRIIIR
+select 0, -9223372036854775808, -9223372036854775809, 9223372036854775807,
+ 9223372036854775808, 18446744073709551615, 18446744073709551616
----
-Float64
+0 -9223372036854775808 -9223372036854776000 9223372036854775807
9223372036854775808 18446744073709551615 18446744073709552000
+
+query TTTTTTT
+select arrow_typeof(0), arrow_typeof(-9223372036854775808),
arrow_typeof(-9223372036854775809),
+ arrow_typeof(9223372036854775807), arrow_typeof(9223372036854775808),
+ arrow_typeof(18446744073709551615), arrow_typeof(18446744073709551616)
+----
+Int64 Int64 Float64 Int64 UInt64 UInt64 Float64
+
statement ok
set datafusion.sql_parser.parse_float_as_decimal = true;
-query R
-select 10000000000000000000.01
+query RR
+select 10000000000000000000.01, -10000000000000000000.01
+----
+10000000000000000000.01 -10000000000000000000.01
+
+query TT
+select arrow_typeof(10000000000000000000.01),
arrow_typeof(-10000000000000000000.01)
----
-10000000000000000000.01
+Decimal128(22, 2) Decimal128(22, 2)
-query T
-select arrow_typeof(10000000000000000000.01)
+# select 0, i64::MIN, i64::MIN-1, i64::MAX, i64::MAX + 1, u64::MAX, u64::MAX +
1
+query IIRIIIR
+select 0, -9223372036854775808, -9223372036854775809, 9223372036854775807,
+ 9223372036854775808, 18446744073709551615, 18446744073709551616
----
-Decimal128(22, 2)
+0 -9223372036854775808 -9223372036854775809 9223372036854775807
9223372036854775808 18446744073709551615 18446744073709551616
-query R
-select 999999999999999999999999999999999999
+query TTTTTTT
+select arrow_typeof(0), arrow_typeof(-9223372036854775808),
arrow_typeof(-9223372036854775809),
+ arrow_typeof(9223372036854775807), arrow_typeof(9223372036854775808),
+ arrow_typeof(18446744073709551615), arrow_typeof(18446744073709551616)
----
-999999999999999999999999999999999999
+Int64 Int64 Decimal128(19, 0) Int64 UInt64 UInt64 Decimal128(20, 0)
-query T
-select arrow_typeof(999999999999999999999999999999999999)
+# special cases
+query RRRR
+select .0 as c1, 0. as c2, 0000. as c3, 00000.00 as c4
----
-Decimal128(36, 0)
+0 0 0 0
-query R
-select 99999999999999999999999999999999999999
+query TTTT
+select arrow_typeof(.0) as c1, arrow_typeof(0.) as c2, arrow_typeof(0000.) as
c3, arrow_typeof(00000.00) as c4
----
-99999999999999999999999999999999999999
+Decimal128(1, 1) Decimal128(1, 0) Decimal128(1, 0) Decimal128(2, 2)
-query T
-select arrow_typeof(99999999999999999999999999999999999999)
+query RR
+select 999999999999999999999999999999999999,
-999999999999999999999999999999999999
----
-Decimal128(38, 0)
+999999999999999999999999999999999999 -999999999999999999999999999999999999
-query R
-select 9999999999999999999999999999999999.9999
+query TT
+select arrow_typeof(999999999999999999999999999999999999),
arrow_typeof(-999999999999999999999999999999999999)
----
-9999999999999999999999999999999999.9999
+Decimal128(36, 0) Decimal128(36, 0)
-query T
-select arrow_typeof(9999999999999999999999999999999999.9999)
+query RR
+select 99999999999999999999999999999999999999,
-99999999999999999999999999999999999999
----
-Decimal128(38, 4)
+99999999999999999999999999999999999999 -99999999999999999999999999999999999999
+
+query TT
+select arrow_typeof(99999999999999999999999999999999999999),
arrow_typeof(-99999999999999999999999999999999999999)
+----
+Decimal128(38, 0) Decimal128(38, 0)
+
+query RR
+select 9999999999999999999999999999999999.9999,
-9999999999999999999999999999999999.9999
+----
+9999999999999999999999999999999999.9999
-9999999999999999999999999999999999.9999
+
+query TT
+select arrow_typeof(9999999999999999999999999999999999.9999),
arrow_typeof(-9999999999999999999999999999999999.9999)
+----
+Decimal128(38, 4) Decimal128(38, 4)
+
+# leading zeroes
+query RRR
+select 00009999999999999999999999999999999999.9999,
-00009999999999999999999999999999999999.9999, 0018446744073709551616
+----
+9999999999999999999999999999999999.9999
-9999999999999999999999999999999999.9999 18446744073709551616
+
+query TTT
+select arrow_typeof(00009999999999999999999999999999999999.9999),
+ arrow_typeof(-00009999999999999999999999999999999999.9999),
+ arrow_typeof(0018446744073709551616)
+----
+Decimal128(38, 4) Decimal128(38, 4) Decimal128(20, 0)
# precision overflow
-statement error SQL error: ParserError\("Cannot parse
123456789012345678901234567890123456789 as i128 when building decimal:
precision overflow"\)
+statement error DataFusion error: SQL error: ParserError\("Cannot parse
123456789012345678901234567890123456789 as i128 when building decimal:
precision overflow"\)
select 123456789.012345678901234567890123456789
+statement error SQL error: ParserError\("Cannot parse
123456789012345678901234567890123456789 as i128 when building decimal:
precision overflow"\)
+select -123456789.012345678901234567890123456789
+
# can not fit in i128
statement error SQL error: ParserError\("Cannot parse
1234567890123456789012345678901234567890 as i128 when building decimal: number
too large to fit in target type"\)
select 123456789.0123456789012345678901234567890
-# Restore those to default value again
+statement error SQL error: ParserError\("Cannot parse
1234567890123456789012345678901234567890 as i128 when building decimal: number
too large to fit in target type"\)
+select -123456789.0123456789012345678901234567890
+
+# Restore option to default value
statement ok
set datafusion.sql_parser.parse_float_as_decimal = false;