This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new c3e27c04 Fix overflow in date_parser (#529)
c3e27c04 is described below

commit c3e27c0405cd2ce3888dc1943323113735c7859e
Author: Emil Ejbyfeldt <[email protected]>
AuthorDate: Fri Jun 7 07:11:29 2024 +0200

    Fix overflow in date_parser (#529)
    
    The date_parser was introduced in #383 and is mostly a direct port of
    code in Spark. Since the code uses the JVM it has defined integer
    overflow as wrapping. The proposed fixed is to use std::num::Wrapping to
    get the same wrapping behavior in rust.  The overflown value will still
    be disgarded in a later check that uses `current_segment_digits` so
    allowing the overflow does not lead to correctness issues.
    
    This resolves one of the overflows discussed in #481
---
 core/src/execution/datafusion/expressions/cast.rs | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/core/src/execution/datafusion/expressions/cast.rs 
b/core/src/execution/datafusion/expressions/cast.rs
index 7e8a96f2..a39587b6 100644
--- a/core/src/execution/datafusion/expressions/cast.rs
+++ b/core/src/execution/datafusion/expressions/cast.rs
@@ -19,6 +19,7 @@ use std::{
     any::Any,
     fmt::{Debug, Display, Formatter},
     hash::{Hash, Hasher},
+    num::Wrapping,
     sync::Arc,
 };
 
@@ -1570,7 +1571,7 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> 
CometResult<Option<i32>>
     let mut date_segments = [1, 1, 1];
     let mut sign = 1;
     let mut current_segment = 0;
-    let mut current_segment_value = 0;
+    let mut current_segment_value = Wrapping(0);
     let mut current_segment_digits = 0;
     let bytes = date_str.as_bytes();
 
@@ -1597,16 +1598,16 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> 
CometResult<Option<i32>>
                 return return_result(date_str, eval_mode);
             }
             //if valid update corresponding segment with the current segment 
value.
-            date_segments[current_segment as usize] = current_segment_value;
-            current_segment_value = 0;
+            date_segments[current_segment as usize] = current_segment_value.0;
+            current_segment_value = Wrapping(0);
             current_segment_digits = 0;
             current_segment += 1;
         } else if !b.is_ascii_digit() {
             return return_result(date_str, eval_mode);
         } else {
             //increment value of current segment by the next digit
-            let parsed_value = (b - b'0') as i32;
-            current_segment_value = current_segment_value * 10 + parsed_value;
+            let parsed_value = Wrapping((b - b'0') as i32);
+            current_segment_value = current_segment_value * Wrapping(10) + 
parsed_value;
             current_segment_digits += 1;
         }
         j += 1;
@@ -1622,7 +1623,7 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> 
CometResult<Option<i32>>
         return return_result(date_str, eval_mode);
     }
 
-    date_segments[current_segment as usize] = current_segment_value;
+    date_segments[current_segment as usize] = current_segment_value.0;
 
     match NaiveDate::from_ymd_opt(
         sign * date_segments[0],
@@ -1836,6 +1837,8 @@ mod tests {
             Some(" 202 "),
             Some("\n 2020-\r8 "),
             Some("2020-01-01T"),
+            // Overflows i32
+            Some("-4607172990231812908"),
         ]));
 
         for eval_mode in &[EvalMode::Legacy, EvalMode::Try] {
@@ -1857,7 +1860,8 @@ mod tests {
                     None,
                     None,
                     None,
-                    Some(18262)
+                    Some(18262),
+                    None
                 ]
             );
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to