alamb commented on a change in pull request #1301:
URL: https://github.com/apache/arrow-rs/pull/1301#discussion_r816288190



##########
File path: arrow/src/csv/reader.rs
##########
@@ -711,68 +701,6 @@ impl Parser for Int16Type {}
 
 impl Parser for Int8Type {}

Review comment:
       is there a reason to leave these `Parser` impls in `reader.rs` rather 
than moving them to `reader_parser.rs` as well?

##########
File path: arrow/src/json/reader.rs
##########
@@ -1123,6 +1148,10 @@ impl Decoder {
             .iter()
             .filter(|field| projection.is_empty() || 
projection.contains(field.name()))
             .map(|field| {
+                let format_string = match field.metadata() {
+                    Some(meta) => meta.get("format_string"),
+                    _ => None,
+                };

Review comment:
       I don't think it matters, but I think you can rewrite this a bit more 
succinctly like this (untested):
   
   ```suggestion
                   let format_string = field.metadata()
                     .map(|meta| meta.get("format_string"));
   ```

##########
File path: arrow/src/json/reader.rs
##########
@@ -942,6 +943,30 @@ impl Decoder {
         ))
     }
 
+    #[allow(clippy::unnecessary_wraps)]
+    fn build_primitive_array_using_format_string<T>(
+        &self,
+        rows: &[Value],
+        col_name: &str,
+        format_string: &str,
+    ) -> Result<ArrayRef>
+    where
+        T: ArrowNumericType + Parser,
+        T::Native: num::NumCast,

Review comment:
       Do we need this trait bound? It may be left over from 
`build_primitive_array` 🤔 
   
   It seemed to compile just fine like this for me:
   
   ```suggestion
           T: Parser,
   ```

##########
File path: arrow/src/json/reader.rs
##########
@@ -942,6 +943,30 @@ impl Decoder {
         ))
     }
 
+    #[allow(clippy::unnecessary_wraps)]

Review comment:
       I know this is just copy/pasted from `build_primitive_array`, but I 
think we could follow this clippy lint rather than ignore it (perhaps as a 
follow on PR)

##########
File path: arrow/src/json/reader.rs
##########
@@ -1123,6 +1148,10 @@ impl Decoder {
             .iter()
             .filter(|field| projection.is_empty() || 
projection.contains(field.name()))
             .map(|field| {
+                let format_string = match field.metadata() {
+                    Some(meta) => meta.get("format_string"),

Review comment:
       I recommend making `"format_string"` a symbolic constant.

##########
File path: arrow/src/util/reader_parser.rs
##########
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::datatypes::*;
+
+/// Specialized parsing implementations
+/// used by csv and json reader
+pub trait Parser: ArrowPrimitiveType {
+    fn parse(string: &str) -> Option<Self::Native> {
+        string.parse::<Self::Native>().ok()
+    }
+
+    fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
+        Self::parse(string)
+    }
+}
+
+/// Number of days between 0001-01-01 and 1970-01-01
+const EPOCH_DAYS_FROM_CE: i32 = 719_163;
+
+impl Parser for Date32Type {
+    fn parse(string: &str) -> Option<i32> {
+        use chrono::Datelike;
+
+        match Self::DATA_TYPE {
+            DataType::Date32 => {
+                let date = string.parse::<chrono::NaiveDate>().ok()?;
+                Self::Native::from_i32(date.num_days_from_ce() - 
EPOCH_DAYS_FROM_CE)
+            }
+            _ => None,
+        }
+    }
+
+    fn parse_formatted(string: &str, format: &str) -> Option<i32> {
+        use chrono::Datelike;
+        match Self::DATA_TYPE {
+            DataType::Date32 => {
+                let date = chrono::NaiveDate::parse_from_str(string, 
format).ok()?;
+                Self::Native::from_i32(date.num_days_from_ce() - 
EPOCH_DAYS_FROM_CE)
+            }
+            _ => None,
+        }
+    }
+}
+
+impl Parser for Date64Type {
+    fn parse(string: &str) -> Option<i64> {
+        match Self::DATA_TYPE {
+            DataType::Date64 => {
+                let date_time = string.parse::<chrono::NaiveDateTime>().ok()?;
+                Self::Native::from_i64(date_time.timestamp_millis())
+            }
+            _ => None,
+        }
+    }
+
+    fn parse_formatted(string: &str, format: &str) -> Option<i64> {
+        match Self::DATA_TYPE {
+            DataType::Date64 => {
+                use chrono::format::Fixed;
+                use chrono::format::StrftimeItems;
+                let fmt = StrftimeItems::new(format);
+                let has_zone = fmt.into_iter().any(|item| match item {

Review comment:
       I realize this PR just moves this code around, but I wonder if we could 
reuse `string_to_timestamp_nanos` as a follow on PR
   
   
https://sourcegraph.com/github.com/apache/arrow-rs/-/blob/arrow/src/compute/kernels/cast_utils.rs?L69&subtree=true

##########
File path: arrow/src/json/reader.rs
##########
@@ -1875,6 +1926,15 @@ mod tests {
             .unwrap();
         assert!(2.0 - bb.value(0) < f32::EPSILON);
         assert!(-3.5 - bb.value(1) < f32::EPSILON);
+        let ee = batch
+            .column(e.0)
+            .as_any()
+            .downcast_ref::<Date32Array>()
+            .unwrap();
+        dbg!(ee);
+        assert_eq!(1, ee.value(0));
+        assert_eq!(-1, ee.value(1));
+        assert!(!ee.is_valid(2));

Review comment:
       The test looks good to me now

##########
File path: arrow/src/util/reader_parser.rs
##########
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::datatypes::*;
+
+/// Specialized parsing implementations
+/// used by csv and json reader
+pub trait Parser: ArrowPrimitiveType {
+    fn parse(string: &str) -> Option<Self::Native> {
+        string.parse::<Self::Native>().ok()
+    }
+
+    fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
+        Self::parse(string)
+    }
+}
+
+/// Number of days between 0001-01-01 and 1970-01-01
+const EPOCH_DAYS_FROM_CE: i32 = 719_163;
+
+impl Parser for Date32Type {
+    fn parse(string: &str) -> Option<i32> {
+        use chrono::Datelike;
+
+        match Self::DATA_TYPE {
+            DataType::Date32 => {
+                let date = string.parse::<chrono::NaiveDate>().ok()?;
+                Self::Native::from_i32(date.num_days_from_ce() - 
EPOCH_DAYS_FROM_CE)
+            }
+            _ => None,
+        }
+    }
+
+    fn parse_formatted(string: &str, format: &str) -> Option<i32> {

Review comment:
       Perhaps what @Dandandan  was alluding to was to try and avoid the cost 
of parsing the format string for each element (e.g. try and reuse 
`StrftimeItems` somehow rather than recreating them all the time in 
https://docs.rs/chrono/latest/src/chrono/datetime.rs.html#386-390)
   
   However, it is probably worth nothing that master already does the "parse 
the format string each time" so it is probably ok to leave that behavior in 
this PR (we could file a follow on issue for someone to look at if they are 
interested)

##########
File path: arrow/src/util/reader_parser.rs
##########
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::datatypes::*;
+
+/// Specialized parsing implementations
+/// used by csv and json reader
+pub trait Parser: ArrowPrimitiveType {
+    fn parse(string: &str) -> Option<Self::Native> {
+        string.parse::<Self::Native>().ok()
+    }
+
+    fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
+        Self::parse(string)
+    }
+}
+
+/// Number of days between 0001-01-01 and 1970-01-01
+const EPOCH_DAYS_FROM_CE: i32 = 719_163;
+
+impl Parser for Date32Type {
+    fn parse(string: &str) -> Option<i32> {
+        use chrono::Datelike;
+
+        match Self::DATA_TYPE {
+            DataType::Date32 => {
+                let date = string.parse::<chrono::NaiveDate>().ok()?;
+                Self::Native::from_i32(date.num_days_from_ce() - 
EPOCH_DAYS_FROM_CE)
+            }
+            _ => None,
+        }
+    }
+
+    fn parse_formatted(string: &str, format: &str) -> Option<i32> {
+        use chrono::Datelike;
+        match Self::DATA_TYPE {
+            DataType::Date32 => {
+                let date = chrono::NaiveDate::parse_from_str(string, 
format).ok()?;
+                Self::Native::from_i32(date.num_days_from_ce() - 
EPOCH_DAYS_FROM_CE)
+            }
+            _ => None,
+        }
+    }
+}
+
+impl Parser for Date64Type {
+    fn parse(string: &str) -> Option<i64> {
+        match Self::DATA_TYPE {
+            DataType::Date64 => {
+                let date_time = string.parse::<chrono::NaiveDateTime>().ok()?;
+                Self::Native::from_i64(date_time.timestamp_millis())
+            }
+            _ => None,
+        }
+    }
+
+    fn parse_formatted(string: &str, format: &str) -> Option<i64> {
+        match Self::DATA_TYPE {

Review comment:
       the match is strange here -- won't Self::DATA_TYPE always be 
`DataType::Date64`?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to