alamb commented on a change in pull request #1112: URL: https://github.com/apache/arrow-rs/pull/1112#discussion_r777108645
########## File path: arrow/src/csv/reader.rs ########## @@ -520,47 +554,60 @@ fn parse( DataType::Decimal(precision, scale) => { build_decimal_array(line_number, rows, i, *precision, *scale) } - DataType::Int8 => build_primitive_array::<Int8Type>(line_number, rows, i), + DataType::Int8 => { + build_primitive_array::<Int8Type>(line_number, rows, i, None) + } DataType::Int16 => { - build_primitive_array::<Int16Type>(line_number, rows, i) + build_primitive_array::<Int16Type>(line_number, rows, i, None) } DataType::Int32 => { - build_primitive_array::<Int32Type>(line_number, rows, i) + build_primitive_array::<Int32Type>(line_number, rows, i, None) } DataType::Int64 => { - build_primitive_array::<Int64Type>(line_number, rows, i) + build_primitive_array::<Int64Type>(line_number, rows, i, None) } DataType::UInt8 => { - build_primitive_array::<UInt8Type>(line_number, rows, i) + build_primitive_array::<UInt8Type>(line_number, rows, i, None) } DataType::UInt16 => { - build_primitive_array::<UInt16Type>(line_number, rows, i) + build_primitive_array::<UInt16Type>(line_number, rows, i, None) } DataType::UInt32 => { - build_primitive_array::<UInt32Type>(line_number, rows, i) + build_primitive_array::<UInt32Type>(line_number, rows, i, None) } DataType::UInt64 => { - build_primitive_array::<UInt64Type>(line_number, rows, i) + build_primitive_array::<UInt64Type>(line_number, rows, i, None) } DataType::Float32 => { - build_primitive_array::<Float32Type>(line_number, rows, i) + build_primitive_array::<Float32Type>(line_number, rows, i, None) } DataType::Float64 => { - build_primitive_array::<Float64Type>(line_number, rows, i) + build_primitive_array::<Float64Type>(line_number, rows, i, None) } DataType::Date32 => { - build_primitive_array::<Date32Type>(line_number, rows, i) - } - DataType::Date64 => { - build_primitive_array::<Date64Type>(line_number, rows, i) + build_primitive_array::<Date32Type>(line_number, rows, i, None) } - DataType::Timestamp(TimeUnit::Microsecond, _) => build_primitive_array::< - TimestampMicrosecondType, - >( - line_number, rows, i + DataType::Date64 => build_primitive_array::<Date64Type>( + line_number, + rows, + i, + datetime_format.clone(), ), + DataType::Timestamp(TimeUnit::Microsecond, _) => { + build_primitive_array::<TimestampMicrosecondType>( + line_number, + rows, + i, + None, Review comment: `datefime_format`? ########## File path: arrow/src/csv/reader.rs ########## @@ -316,6 +323,8 @@ pub struct Reader<R: Read> { batch_size: usize, /// Vector that can hold the `StringRecord`s of the batches batch_records: Vec<StringRecord>, + /// datetime format used to parse datetime values, (format understood by chrono) Review comment: Can you please provide the link to the appropriate chrono documentation? ########## File path: arrow/src/csv/reader.rs ########## @@ -520,47 +554,60 @@ fn parse( DataType::Decimal(precision, scale) => { build_decimal_array(line_number, rows, i, *precision, *scale) } - DataType::Int8 => build_primitive_array::<Int8Type>(line_number, rows, i), + DataType::Int8 => { + build_primitive_array::<Int8Type>(line_number, rows, i, None) + } DataType::Int16 => { - build_primitive_array::<Int16Type>(line_number, rows, i) + build_primitive_array::<Int16Type>(line_number, rows, i, None) } DataType::Int32 => { - build_primitive_array::<Int32Type>(line_number, rows, i) + build_primitive_array::<Int32Type>(line_number, rows, i, None) } DataType::Int64 => { - build_primitive_array::<Int64Type>(line_number, rows, i) + build_primitive_array::<Int64Type>(line_number, rows, i, None) } DataType::UInt8 => { - build_primitive_array::<UInt8Type>(line_number, rows, i) + build_primitive_array::<UInt8Type>(line_number, rows, i, None) } DataType::UInt16 => { - build_primitive_array::<UInt16Type>(line_number, rows, i) + build_primitive_array::<UInt16Type>(line_number, rows, i, None) } DataType::UInt32 => { - build_primitive_array::<UInt32Type>(line_number, rows, i) + build_primitive_array::<UInt32Type>(line_number, rows, i, None) } DataType::UInt64 => { - build_primitive_array::<UInt64Type>(line_number, rows, i) + build_primitive_array::<UInt64Type>(line_number, rows, i, None) } DataType::Float32 => { - build_primitive_array::<Float32Type>(line_number, rows, i) + build_primitive_array::<Float32Type>(line_number, rows, i, None) } DataType::Float64 => { - build_primitive_array::<Float64Type>(line_number, rows, i) + build_primitive_array::<Float64Type>(line_number, rows, i, None) } DataType::Date32 => { - build_primitive_array::<Date32Type>(line_number, rows, i) - } - DataType::Date64 => { - build_primitive_array::<Date64Type>(line_number, rows, i) + build_primitive_array::<Date32Type>(line_number, rows, i, None) Review comment: should this also have `datetime_format`? ########## File path: arrow/src/csv/reader.rs ########## @@ -520,47 +554,60 @@ fn parse( DataType::Decimal(precision, scale) => { build_decimal_array(line_number, rows, i, *precision, *scale) } - DataType::Int8 => build_primitive_array::<Int8Type>(line_number, rows, i), + DataType::Int8 => { + build_primitive_array::<Int8Type>(line_number, rows, i, None) + } DataType::Int16 => { - build_primitive_array::<Int16Type>(line_number, rows, i) + build_primitive_array::<Int16Type>(line_number, rows, i, None) } DataType::Int32 => { - build_primitive_array::<Int32Type>(line_number, rows, i) + build_primitive_array::<Int32Type>(line_number, rows, i, None) } DataType::Int64 => { - build_primitive_array::<Int64Type>(line_number, rows, i) + build_primitive_array::<Int64Type>(line_number, rows, i, None) } DataType::UInt8 => { - build_primitive_array::<UInt8Type>(line_number, rows, i) + build_primitive_array::<UInt8Type>(line_number, rows, i, None) } DataType::UInt16 => { - build_primitive_array::<UInt16Type>(line_number, rows, i) + build_primitive_array::<UInt16Type>(line_number, rows, i, None) } DataType::UInt32 => { - build_primitive_array::<UInt32Type>(line_number, rows, i) + build_primitive_array::<UInt32Type>(line_number, rows, i, None) } DataType::UInt64 => { - build_primitive_array::<UInt64Type>(line_number, rows, i) + build_primitive_array::<UInt64Type>(line_number, rows, i, None) } DataType::Float32 => { - build_primitive_array::<Float32Type>(line_number, rows, i) + build_primitive_array::<Float32Type>(line_number, rows, i, None) } DataType::Float64 => { - build_primitive_array::<Float64Type>(line_number, rows, i) + build_primitive_array::<Float64Type>(line_number, rows, i, None) } DataType::Date32 => { - build_primitive_array::<Date32Type>(line_number, rows, i) - } - DataType::Date64 => { - build_primitive_array::<Date64Type>(line_number, rows, i) + build_primitive_array::<Date32Type>(line_number, rows, i, None) } - DataType::Timestamp(TimeUnit::Microsecond, _) => build_primitive_array::< - TimestampMicrosecondType, - >( - line_number, rows, i + DataType::Date64 => build_primitive_array::<Date64Type>( + line_number, + rows, + i, + datetime_format.clone(), ), + DataType::Timestamp(TimeUnit::Microsecond, _) => { + build_primitive_array::<TimestampMicrosecondType>( + line_number, + rows, + i, + None, + ) + } DataType::Timestamp(TimeUnit::Nanosecond, _) => { - build_primitive_array::<TimestampNanosecondType>(line_number, rows, i) + build_primitive_array::<TimestampNanosecondType>( + line_number, + rows, + i, + None, Review comment: also here? ########## File path: arrow/src/csv/reader.rs ########## @@ -1041,6 +1116,20 @@ impl ReaderBuilder { self } + /// Set the datetime regex used to parse the string to Date64Type + /// this regex is used while infering schema + pub fn with_datetime_re(mut self, datetime_re: Regex) -> Self { + self.datetime_re = Some(datetime_re); + self + } + + /// Set the datetime regex used to parse the string to Date64Type Review comment: Can you also add a link here to the datetime format docs that are understood -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org