alamb commented on code in PR #3668:
URL: https://github.com/apache/arrow-rs/pull/3668#discussion_r1100716690


##########
arrow-cast/src/cast.rs:
##########
@@ -155,13 +154,12 @@ pub fn can_cast_types(from_type: &DataType, to_type: 
&DataType) -> bool {
         (_, Boolean) => DataType::is_numeric(from_type) || from_type == &Utf8 
|| from_type == &LargeUtf8,
         (Boolean, _) => DataType::is_numeric(to_type) || to_type == &Utf8 || 
to_type == &LargeUtf8,
 
-        (Utf8, LargeUtf8) => true,

Review Comment:
   for anyone else reviewing, these case got folded down into the other Utf8 
and LargeUtf cases



##########
arrow-cast/src/cast.rs:
##########
@@ -2171,172 +2111,25 @@ where
     from.unary_opt::<_, R>(num::cast::cast::<T::Native, R::Native>)
 }
 
-fn as_time_with_string_op<
-    A: ArrayAccessor<Item = T::Native>,
-    OffsetSize,
-    T: ArrowTemporalType,
-    F,
->(
-    iter: ArrayIter<A>,
-    mut builder: GenericStringBuilder<OffsetSize>,
-    op: F,
-) -> ArrayRef
-where
-    OffsetSize: OffsetSizeTrait,
-    F: Fn(NaiveDateTime) -> String,
-    i64: From<T::Native>,
-{
-    iter.into_iter().for_each(|value| {
-        if let Some(value) = value {
-            match as_datetime::<T>(<i64 as From<_>>::from(value)) {
-                Some(dt) => builder.append_value(op(dt)),
-                None => builder.append_null(),
+fn value_to_string<O: OffsetSizeTrait>(
+    array: &dyn Array,
+) -> Result<ArrayRef, ArrowError> {
+    let mut builder = GenericStringBuilder::<O>::new();
+    let options = FormatOptions::default();

Review Comment:
   eventually we could even pass in `FormatOptions` to the cast kernel 
(definitely not this PR)!



##########
arrow-cast/src/cast.rs:
##########
@@ -2171,172 +2111,25 @@ where
     from.unary_opt::<_, R>(num::cast::cast::<T::Native, R::Native>)
 }
 
-fn as_time_with_string_op<
-    A: ArrayAccessor<Item = T::Native>,
-    OffsetSize,
-    T: ArrowTemporalType,
-    F,
->(
-    iter: ArrayIter<A>,
-    mut builder: GenericStringBuilder<OffsetSize>,
-    op: F,
-) -> ArrayRef
-where
-    OffsetSize: OffsetSizeTrait,
-    F: Fn(NaiveDateTime) -> String,
-    i64: From<T::Native>,
-{
-    iter.into_iter().for_each(|value| {
-        if let Some(value) = value {
-            match as_datetime::<T>(<i64 as From<_>>::from(value)) {
-                Some(dt) => builder.append_value(op(dt)),
-                None => builder.append_null(),
+fn value_to_string<O: OffsetSizeTrait>(
+    array: &dyn Array,
+) -> Result<ArrayRef, ArrowError> {
+    let mut builder = GenericStringBuilder::<O>::new();
+    let options = FormatOptions::default();
+    let formatter = ArrayFormatter::try_new(array, &options)?;
+    let data = array.data();
+    for i in 0..data.len() {
+        match data.is_null(i) {
+            true => builder.append_null(),
+            false => {
+                formatter.value(i).write(&mut builder)?;
+                builder.append_value("");

Review Comment:
   ```suggestion
                   // tell the builder the row is finished
                   builder.append_value("");
   ```



##########
arrow-cast/src/cast.rs:
##########
@@ -182,11 +181,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: 
&DataType) -> bool {
             | Time64(TimeUnit::Nanosecond)
             | Timestamp(TimeUnit::Nanosecond, None)
         ) => true,
-        (LargeUtf8, _) => DataType::is_numeric(to_type) && to_type != &Float16,
-        (Timestamp(_, _), Utf8) | (Timestamp(_, _), LargeUtf8) => true,

Review Comment:
   What is the rationale for this change? Is the Timestamp nanosecond with no 
timezone seems to be covered by the cases above, but timestamp with other units 
and Non null timezone is not (obviously) covered



##########
arrow-cast/src/cast.rs:
##########
@@ -5521,8 +5237,8 @@ mod tests {
         let b = cast(&array, &DataType::Utf8).unwrap();
         let c = b.as_any().downcast_ref::<StringArray>().unwrap();
         assert_eq!(&DataType::Utf8, c.data_type());
-        assert_eq!("1997-05-19 00:00:00", c.value(0));
-        assert_eq!("2018-12-25 00:00:00", c.value(1));
+        assert_eq!("1997-05-19T00:00:00", c.value(0));

Review Comment:
   Why did these change? This seems like a non trivial change, potentially, for 
downstream crates, right? Is there some way to get the old behavior back (like 
maybe passing FormatOptions to the cast kernel) 🤔 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to