alamb commented on a change in pull request #1112:
URL: https://github.com/apache/arrow-rs/pull/1112#discussion_r777108645



##########
File path: arrow/src/csv/reader.rs
##########
@@ -520,47 +554,60 @@ fn parse(
                 DataType::Decimal(precision, scale) => {
                     build_decimal_array(line_number, rows, i, *precision, 
*scale)
                 }
-                DataType::Int8 => 
build_primitive_array::<Int8Type>(line_number, rows, i),
+                DataType::Int8 => {
+                    build_primitive_array::<Int8Type>(line_number, rows, i, 
None)
+                }
                 DataType::Int16 => {
-                    build_primitive_array::<Int16Type>(line_number, rows, i)
+                    build_primitive_array::<Int16Type>(line_number, rows, i, 
None)
                 }
                 DataType::Int32 => {
-                    build_primitive_array::<Int32Type>(line_number, rows, i)
+                    build_primitive_array::<Int32Type>(line_number, rows, i, 
None)
                 }
                 DataType::Int64 => {
-                    build_primitive_array::<Int64Type>(line_number, rows, i)
+                    build_primitive_array::<Int64Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt8 => {
-                    build_primitive_array::<UInt8Type>(line_number, rows, i)
+                    build_primitive_array::<UInt8Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt16 => {
-                    build_primitive_array::<UInt16Type>(line_number, rows, i)
+                    build_primitive_array::<UInt16Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt32 => {
-                    build_primitive_array::<UInt32Type>(line_number, rows, i)
+                    build_primitive_array::<UInt32Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt64 => {
-                    build_primitive_array::<UInt64Type>(line_number, rows, i)
+                    build_primitive_array::<UInt64Type>(line_number, rows, i, 
None)
                 }
                 DataType::Float32 => {
-                    build_primitive_array::<Float32Type>(line_number, rows, i)
+                    build_primitive_array::<Float32Type>(line_number, rows, i, 
None)
                 }
                 DataType::Float64 => {
-                    build_primitive_array::<Float64Type>(line_number, rows, i)
+                    build_primitive_array::<Float64Type>(line_number, rows, i, 
None)
                 }
                 DataType::Date32 => {
-                    build_primitive_array::<Date32Type>(line_number, rows, i)
-                }
-                DataType::Date64 => {
-                    build_primitive_array::<Date64Type>(line_number, rows, i)
+                    build_primitive_array::<Date32Type>(line_number, rows, i, 
None)
                 }
-                DataType::Timestamp(TimeUnit::Microsecond, _) => 
build_primitive_array::<
-                    TimestampMicrosecondType,
-                >(
-                    line_number, rows, i
+                DataType::Date64 => build_primitive_array::<Date64Type>(
+                    line_number,
+                    rows,
+                    i,
+                    datetime_format.clone(),
                 ),
+                DataType::Timestamp(TimeUnit::Microsecond, _) => {
+                    build_primitive_array::<TimestampMicrosecondType>(
+                        line_number,
+                        rows,
+                        i,
+                        None,

Review comment:
       `datefime_format`?

##########
File path: arrow/src/csv/reader.rs
##########
@@ -316,6 +323,8 @@ pub struct Reader<R: Read> {
     batch_size: usize,
     /// Vector that can hold the `StringRecord`s of the batches
     batch_records: Vec<StringRecord>,
+    /// datetime format used to parse datetime values, (format understood by 
chrono)

Review comment:
       Can you please provide the link to the appropriate chrono documentation?

##########
File path: arrow/src/csv/reader.rs
##########
@@ -520,47 +554,60 @@ fn parse(
                 DataType::Decimal(precision, scale) => {
                     build_decimal_array(line_number, rows, i, *precision, 
*scale)
                 }
-                DataType::Int8 => 
build_primitive_array::<Int8Type>(line_number, rows, i),
+                DataType::Int8 => {
+                    build_primitive_array::<Int8Type>(line_number, rows, i, 
None)
+                }
                 DataType::Int16 => {
-                    build_primitive_array::<Int16Type>(line_number, rows, i)
+                    build_primitive_array::<Int16Type>(line_number, rows, i, 
None)
                 }
                 DataType::Int32 => {
-                    build_primitive_array::<Int32Type>(line_number, rows, i)
+                    build_primitive_array::<Int32Type>(line_number, rows, i, 
None)
                 }
                 DataType::Int64 => {
-                    build_primitive_array::<Int64Type>(line_number, rows, i)
+                    build_primitive_array::<Int64Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt8 => {
-                    build_primitive_array::<UInt8Type>(line_number, rows, i)
+                    build_primitive_array::<UInt8Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt16 => {
-                    build_primitive_array::<UInt16Type>(line_number, rows, i)
+                    build_primitive_array::<UInt16Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt32 => {
-                    build_primitive_array::<UInt32Type>(line_number, rows, i)
+                    build_primitive_array::<UInt32Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt64 => {
-                    build_primitive_array::<UInt64Type>(line_number, rows, i)
+                    build_primitive_array::<UInt64Type>(line_number, rows, i, 
None)
                 }
                 DataType::Float32 => {
-                    build_primitive_array::<Float32Type>(line_number, rows, i)
+                    build_primitive_array::<Float32Type>(line_number, rows, i, 
None)
                 }
                 DataType::Float64 => {
-                    build_primitive_array::<Float64Type>(line_number, rows, i)
+                    build_primitive_array::<Float64Type>(line_number, rows, i, 
None)
                 }
                 DataType::Date32 => {
-                    build_primitive_array::<Date32Type>(line_number, rows, i)
-                }
-                DataType::Date64 => {
-                    build_primitive_array::<Date64Type>(line_number, rows, i)
+                    build_primitive_array::<Date32Type>(line_number, rows, i, 
None)

Review comment:
       should this also have `datetime_format`?

##########
File path: arrow/src/csv/reader.rs
##########
@@ -520,47 +554,60 @@ fn parse(
                 DataType::Decimal(precision, scale) => {
                     build_decimal_array(line_number, rows, i, *precision, 
*scale)
                 }
-                DataType::Int8 => 
build_primitive_array::<Int8Type>(line_number, rows, i),
+                DataType::Int8 => {
+                    build_primitive_array::<Int8Type>(line_number, rows, i, 
None)
+                }
                 DataType::Int16 => {
-                    build_primitive_array::<Int16Type>(line_number, rows, i)
+                    build_primitive_array::<Int16Type>(line_number, rows, i, 
None)
                 }
                 DataType::Int32 => {
-                    build_primitive_array::<Int32Type>(line_number, rows, i)
+                    build_primitive_array::<Int32Type>(line_number, rows, i, 
None)
                 }
                 DataType::Int64 => {
-                    build_primitive_array::<Int64Type>(line_number, rows, i)
+                    build_primitive_array::<Int64Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt8 => {
-                    build_primitive_array::<UInt8Type>(line_number, rows, i)
+                    build_primitive_array::<UInt8Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt16 => {
-                    build_primitive_array::<UInt16Type>(line_number, rows, i)
+                    build_primitive_array::<UInt16Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt32 => {
-                    build_primitive_array::<UInt32Type>(line_number, rows, i)
+                    build_primitive_array::<UInt32Type>(line_number, rows, i, 
None)
                 }
                 DataType::UInt64 => {
-                    build_primitive_array::<UInt64Type>(line_number, rows, i)
+                    build_primitive_array::<UInt64Type>(line_number, rows, i, 
None)
                 }
                 DataType::Float32 => {
-                    build_primitive_array::<Float32Type>(line_number, rows, i)
+                    build_primitive_array::<Float32Type>(line_number, rows, i, 
None)
                 }
                 DataType::Float64 => {
-                    build_primitive_array::<Float64Type>(line_number, rows, i)
+                    build_primitive_array::<Float64Type>(line_number, rows, i, 
None)
                 }
                 DataType::Date32 => {
-                    build_primitive_array::<Date32Type>(line_number, rows, i)
-                }
-                DataType::Date64 => {
-                    build_primitive_array::<Date64Type>(line_number, rows, i)
+                    build_primitive_array::<Date32Type>(line_number, rows, i, 
None)
                 }
-                DataType::Timestamp(TimeUnit::Microsecond, _) => 
build_primitive_array::<
-                    TimestampMicrosecondType,
-                >(
-                    line_number, rows, i
+                DataType::Date64 => build_primitive_array::<Date64Type>(
+                    line_number,
+                    rows,
+                    i,
+                    datetime_format.clone(),
                 ),
+                DataType::Timestamp(TimeUnit::Microsecond, _) => {
+                    build_primitive_array::<TimestampMicrosecondType>(
+                        line_number,
+                        rows,
+                        i,
+                        None,
+                    )
+                }
                 DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-                    
build_primitive_array::<TimestampNanosecondType>(line_number, rows, i)
+                    build_primitive_array::<TimestampNanosecondType>(
+                        line_number,
+                        rows,
+                        i,
+                        None,

Review comment:
       also here?

##########
File path: arrow/src/csv/reader.rs
##########
@@ -1041,6 +1116,20 @@ impl ReaderBuilder {
         self
     }
 
+    /// Set the datetime regex used to parse the string to Date64Type
+    /// this regex is used while infering schema
+    pub fn with_datetime_re(mut self, datetime_re: Regex) -> Self {
+        self.datetime_re = Some(datetime_re);
+        self
+    }
+
+    /// Set the datetime regex used to parse the string to Date64Type

Review comment:
       Can you also add a link here to the datetime format docs that are 
understood




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to