andygrove commented on code in PR #335:
URL: https://github.com/apache/datafusion-comet/pull/335#discussion_r1586766148


##########
core/src/execution/datafusion/expressions/cast.rs:
##########
@@ -510,9 +558,246 @@ impl PhysicalExpr for Cast {
     }
 }
 
+fn timestamp_parser(value: &str, eval_mode: EvalMode) -> 
CometResult<Option<i64>> {
+    let value = value.trim();
+    if value.is_empty() {
+        return Ok(None);
+    }
+
+    // Define regex patterns and corresponding parsing functions
+    let patterns = &[
+        (
+            Regex::new(r"^\d{4}$").unwrap(),
+            parse_str_to_year_timestamp as fn(&str) -> 
CometResult<Option<i64>>,
+        ),
+        (
+            Regex::new(r"^\d{4}-\d{2}$").unwrap(),
+            parse_str_to_month_timestamp,
+        ),
+        (
+            Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(),
+            parse_str_to_day_timestamp,
+        ),
+        (
+            Regex::new(r"^\d{4}-\d{2}-\d{2}T\d{1,2}$").unwrap(),
+            parse_str_to_hour_timestamp,
+        ),
+        (
+            Regex::new(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}$").unwrap(),
+            parse_str_to_minute_timestamp,
+        ),
+        (
+            Regex::new(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$").unwrap(),
+            parse_str_to_second_timestamp,
+        ),
+        (
+            
Regex::new(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6}$").unwrap(),
+            parse_str_to_microsecond_timestamp,
+        ),
+        (
+            Regex::new(r"^T\d{1,2}$").unwrap(),
+            parse_str_to_time_only_timestamp,
+        ),
+    ];
+
+    let mut timestamp = None;
+
+    // Iterate through patterns and try matching
+    for (pattern, parse_func) in patterns {
+        if pattern.is_match(value) {
+            timestamp = parse_func(value)?;
+            break;
+        }
+    }
+
+    if timestamp.is_none() {
+        if eval_mode == EvalMode::Ansi {
+            return Err(CometError::CastInvalidValue {
+                value: value.to_string(),
+                from_type: "STRING".to_string(),
+                to_type: "TIMESTAMP".to_string(),
+            });
+        } else {
+            return Ok(None);
+        }
+    }
+    Ok(Some(timestamp.unwrap()))
+}
+
+fn parse_ymd_timestamp(year: i32, month: u32, day: u32) -> 
CometResult<Option<i64>> {
+    let datetime = chrono::Utc
+        .with_ymd_and_hms(year, month, day, 0, 0, 0)
+        .unwrap()
+        .with_timezone(&chrono::Utc);
+    Ok(Some(datetime.timestamp_micros()))
+}
+
+fn parse_hms_timestamp(
+    year: i32,
+    month: u32,
+    day: u32,
+    hour: u32,
+    minute: u32,
+    second: u32,
+    microsecond: u32,
+) -> CometResult<Option<i64>> {
+    let datetime = chrono::Utc
+        .with_ymd_and_hms(year, month, day, hour, minute, second)
+        .unwrap()
+        .with_timezone(&chrono::Utc)
+        .with_nanosecond(microsecond * 1000);
+    Ok(Some(datetime.unwrap().timestamp_micros()))
+}
+
+fn get_timestamp_values(value: &str, timestamp_type: &str) -> 
CometResult<Option<i64>> {
+    let values: Vec<_> = value
+        .split(|c| c == 'T' || c == '-' || c == ':' || c == '.')
+        .collect();
+    let year = values[0].parse::<i32>().unwrap_or_default();
+    let month = values.get(1).map_or(1, |m| m.parse::<u32>().unwrap_or(1));
+    let day = values.get(2).map_or(1, |d| d.parse::<u32>().unwrap_or(1));
+    let hour = values.get(3).map_or(0, |h| h.parse::<u32>().unwrap_or(0));
+    let minute = values.get(4).map_or(0, |m| m.parse::<u32>().unwrap_or(0));
+    let second = values.get(5).map_or(0, |s| s.parse::<u32>().unwrap_or(0));
+    let microsecond = values.get(6).map_or(0, |ms| 
ms.parse::<u32>().unwrap_or(0));
+
+    match timestamp_type {
+        "year" => parse_ymd_timestamp(year, 1, 1),
+        "month" => parse_ymd_timestamp(year, month, 1),
+        "day" => parse_ymd_timestamp(year, month, day),
+        "hour" => parse_hms_timestamp(year, month, day, hour, 0, 0, 0),
+        "minute" => parse_hms_timestamp(year, month, day, hour, minute, 0, 0),
+        "second" => parse_hms_timestamp(year, month, day, hour, minute, 
second, 0),
+        "microsecond" => parse_hms_timestamp(year, month, day, hour, minute, 
second, microsecond),
+        _ => Err(CometError::CastInvalidValue {
+            value: value.to_string(),
+            from_type: "STRING".to_string(),
+            to_type: "TIMESTAMP".to_string(),
+        }),
+    }
+}
+
+fn parse_str_to_year_timestamp(value: &str) -> CometResult<Option<i64>> {
+    get_timestamp_values(value, "year")
+}
+
+fn parse_str_to_month_timestamp(value: &str) -> CometResult<Option<i64>> {
+    get_timestamp_values(value, "month")
+}
+
+fn parse_str_to_day_timestamp(value: &str) -> CometResult<Option<i64>> {
+    get_timestamp_values(value, "day")
+}
+
+fn parse_str_to_hour_timestamp(value: &str) -> CometResult<Option<i64>> {
+    get_timestamp_values(value, "hour")
+}
+
+fn parse_str_to_minute_timestamp(value: &str) -> CometResult<Option<i64>> {
+    get_timestamp_values(value, "minute")
+}
+
+fn parse_str_to_second_timestamp(value: &str) -> CometResult<Option<i64>> {
+    get_timestamp_values(value, "second")
+}
+
+fn parse_str_to_microsecond_timestamp(value: &str) -> CometResult<Option<i64>> 
{
+    get_timestamp_values(value, "microsecond")
+}
+
+fn parse_str_to_time_only_timestamp(value: &str) -> CometResult<Option<i64>> {
+    let values: Vec<&str> = value.split('T').collect();
+    let time_values: Vec<u32> = values[1]
+        .split(':')
+        .map(|v| v.parse::<u32>().unwrap_or(0))
+        .collect();
+
+    let datetime = chrono::Utc::now();
+    let timestamp = datetime
+        .with_hour(time_values.first().copied().unwrap_or_default())
+        .unwrap()

Review Comment:
   I managed to hit this `unwrap` on `None` causing a panic. Can you return an 
`Err` instead if any of these `with_` calls returns a `None`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to