Re: [PR] feat: Implement Spark-compatible CAST from string to integral types [datafusion-comet]

via GitHub Sat, 27 Apr 2024 11:08:36 -0700


andygrove commented on code in PR #307:
URL: https://github.com/apache/datafusion-comet/pull/307#discussion_r1581875517



##########
core/src/execution/datafusion/expressions/cast.rs:
##########
@@ -142,6 +226,311 @@ impl Cast {
     }
 }
 
+fn cast_string_to_i8(str: &str, eval_mode: EvalMode) -> 
CometResult<Option<i8>> {
+    Ok(cast_string_to_int_with_range_check(
+        str,
+        eval_mode,
+        "TINYINT",
+        i8::MIN as i32,
+        i8::MAX as i32,
+    )?
+    .map(|v| v as i8))
+}
+
+fn cast_string_to_i16(str: &str, eval_mode: EvalMode) -> 
CometResult<Option<i16>> {
+    Ok(cast_string_to_int_with_range_check(
+        str,
+        eval_mode,
+        "SMALLINT",
+        i16::MIN as i32,
+        i16::MAX as i32,
+    )?
+    .map(|v| v as i16))
+}
+
+fn cast_string_to_i32(str: &str, eval_mode: EvalMode) -> 
CometResult<Option<i32>> {
+    let mut accum = CastStringToInt32::default();
+    do_cast_string_to_int(&mut accum, str, eval_mode, "INT")?;
+    Ok(accum.result)
+}
+
+fn cast_string_to_i64(str: &str, eval_mode: EvalMode) -> 
CometResult<Option<i64>> {
+    let mut accum = CastStringToInt64::default();
+    do_cast_string_to_int(&mut accum, str, eval_mode, "BIGINT")?;
+    Ok(accum.result)
+}
+
+fn cast_string_to_int_with_range_check(
+    str: &str,
+    eval_mode: EvalMode,
+    type_name: &str,
+    min: i32,
+    max: i32,
+) -> CometResult<Option<i32>> {
+    let mut accum = CastStringToInt32::default();
+    do_cast_string_to_int(&mut accum, str, eval_mode, type_name)?;
+    match accum.result {
+        None => Ok(None),
+        Some(v) if v >= min && v <= max => Ok(Some(v)),
+        _ if eval_mode == EvalMode::Ansi => Err(invalid_value(str, "STRING", 
type_name)),
+        _ => Ok(None),
+    }
+}
+
+/// We support parsing strings to i32 and i64 to match Spark's logic. Support 
for i8 and i16 is
+/// implemented by first parsing as i32 and then downcasting. The 
CastStringToInt trait is
+/// introduced so that we can have the parsing logic delegate either to an i32 
or i64 accumulator
+/// and avoid the need to use macros here.
+trait CastStringToInt {
+    fn accumulate(
+        &mut self,
+        eval_mode: EvalMode,
+        type_name: &str,
+        str: &str,
+        digit: u32,
+    ) -> CometResult<()>;
+
+    fn reset(&mut self);
+
+    fn finish(
+        &mut self,
+        eval_mode: EvalMode,
+        type_name: &str,
+        str: &str,
+        negative: bool,
+    ) -> CometResult<()>;
+}
+struct CastStringToInt32 {
+    negative: bool,
+    result: Option<i32>,
+    radix: i32,
+}
+
+impl Default for CastStringToInt32 {
+    fn default() -> Self {
+        Self {
+            negative: false,
+            result: Some(0),
+            radix: 10,
+        }
+    }
+}
+
+impl CastStringToInt for CastStringToInt32 {
+    fn accumulate(
+        &mut self,
+        eval_mode: EvalMode,
+        type_name: &str,
+        str: &str,
+        digit: u32,
+    ) -> CometResult<()> {
+        // We are going to process the new digit and accumulate the result. 
However, before doing
+        // this, if the result is already smaller than the 
stopValue(Integer.MIN_VALUE / radix),
+        // then result * 10 will definitely be smaller than minValue, and we 
can stop
+        if let Some(r) = self.result {
+            let stop_value = i32::MIN / self.radix;
+            if r < stop_value {
+                self.reset();
+                return none_or_err(eval_mode, type_name, str);
+            }
+        }
+        // Since the previous result is less than or equal to 
stopValue(Integer.MIN_VALUE / radix),
+        // we can just use `result > 0` to check overflow. If result 
overflows, we should stop
+        let v = self.result.unwrap_or(0) * self.radix;
+        match v.checked_sub(digit as i32) {
+            Some(x) if x <= 0 => self.result = Some(x),
+            _ => {
+                self.reset();
+                return none_or_err(eval_mode, type_name, str);
+            }
+        }
+        Ok(())
+    }
+    fn reset(&mut self) {
+        self.result = None;
+    }
+
+    fn finish(
+        &mut self,
+        eval_mode: EvalMode,
+        type_name: &str,
+        str: &str,
+        negative: bool,
+    ) -> CometResult<()> {
+        if !negative {
+            if let Some(r) = self.result {
+                let negated = r.checked_neg().unwrap_or(-1);
+                if negated < 0 {
+                    self.reset();
+                    return none_or_err(eval_mode, type_name, str);
+                }
+                self.result = Some(negated);
+            }
+        }
+        Ok(())
+    }
+}
+
+struct CastStringToInt64 {
+    negative: bool,
+    result: Option<i64>,
+    radix: i64,
+}
+
+impl Default for CastStringToInt64 {
+    fn default() -> Self {
+        Self {
+            negative: false,
+            result: Some(0),
+            radix: 10,
+        }
+    }
+}
+
+impl CastStringToInt for CastStringToInt64 {
+    fn accumulate(
+        &mut self,
+        eval_mode: EvalMode,
+        type_name: &str,
+        str: &str,
+        digit: u32,
+    ) -> CometResult<()> {

Review Comment:
   I reimplemented this and it no longer uses options



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Re: [PR] feat: Implement Spark-compatible CAST from string to integral types [datafusion-comet]

Reply via email to