This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new c3e27c04 Fix overflow in date_parser (#529)
c3e27c04 is described below
commit c3e27c0405cd2ce3888dc1943323113735c7859e
Author: Emil Ejbyfeldt <[email protected]>
AuthorDate: Fri Jun 7 07:11:29 2024 +0200
Fix overflow in date_parser (#529)
The date_parser was introduced in #383 and is mostly a direct port of
code in Spark. Since the code uses the JVM it has defined integer
overflow as wrapping. The proposed fixed is to use std::num::Wrapping to
get the same wrapping behavior in rust. The overflown value will still
be disgarded in a later check that uses `current_segment_digits` so
allowing the overflow does not lead to correctness issues.
This resolves one of the overflows discussed in #481
---
core/src/execution/datafusion/expressions/cast.rs | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/core/src/execution/datafusion/expressions/cast.rs
b/core/src/execution/datafusion/expressions/cast.rs
index 7e8a96f2..a39587b6 100644
--- a/core/src/execution/datafusion/expressions/cast.rs
+++ b/core/src/execution/datafusion/expressions/cast.rs
@@ -19,6 +19,7 @@ use std::{
any::Any,
fmt::{Debug, Display, Formatter},
hash::{Hash, Hasher},
+ num::Wrapping,
sync::Arc,
};
@@ -1570,7 +1571,7 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) ->
CometResult<Option<i32>>
let mut date_segments = [1, 1, 1];
let mut sign = 1;
let mut current_segment = 0;
- let mut current_segment_value = 0;
+ let mut current_segment_value = Wrapping(0);
let mut current_segment_digits = 0;
let bytes = date_str.as_bytes();
@@ -1597,16 +1598,16 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) ->
CometResult<Option<i32>>
return return_result(date_str, eval_mode);
}
//if valid update corresponding segment with the current segment
value.
- date_segments[current_segment as usize] = current_segment_value;
- current_segment_value = 0;
+ date_segments[current_segment as usize] = current_segment_value.0;
+ current_segment_value = Wrapping(0);
current_segment_digits = 0;
current_segment += 1;
} else if !b.is_ascii_digit() {
return return_result(date_str, eval_mode);
} else {
//increment value of current segment by the next digit
- let parsed_value = (b - b'0') as i32;
- current_segment_value = current_segment_value * 10 + parsed_value;
+ let parsed_value = Wrapping((b - b'0') as i32);
+ current_segment_value = current_segment_value * Wrapping(10) +
parsed_value;
current_segment_digits += 1;
}
j += 1;
@@ -1622,7 +1623,7 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) ->
CometResult<Option<i32>>
return return_result(date_str, eval_mode);
}
- date_segments[current_segment as usize] = current_segment_value;
+ date_segments[current_segment as usize] = current_segment_value.0;
match NaiveDate::from_ymd_opt(
sign * date_segments[0],
@@ -1836,6 +1837,8 @@ mod tests {
Some(" 202 "),
Some("\n 2020-\r8 "),
Some("2020-01-01T"),
+ // Overflows i32
+ Some("-4607172990231812908"),
]));
for eval_mode in &[EvalMode::Legacy, EvalMode::Try] {
@@ -1857,7 +1860,8 @@ mod tests {
None,
None,
None,
- Some(18262)
+ Some(18262),
+ None
]
);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]