tustvold commented on code in PR #4133:
URL: https://github.com/apache/arrow-rs/pull/4133#discussion_r1177049717
##########
arrow-csv/src/reader/mod.rs:
##########
@@ -194,32 +194,150 @@ impl InferredDataType {
}
/// Updates the [`InferredDataType`] with the given string
- fn update(&mut self, string: &str, datetime_re: Option<&Regex>) {
+ fn update(&mut self, string: &str) {
self.packed |= if string.starts_with('"') {
1 << 8 // Utf8
} else if let Some(m) = REGEX_SET.matches(string).into_iter().next() {
1 << m
} else {
- match datetime_re {
- // Timestamp(Nanosecond)
- Some(d) if d.is_match(string) => 1 << 7,
- _ => 1 << 8, // Utf8
- }
+ 1 << 8 // Utf8
}
}
}
-/// This is a collection of options for csv reader when the builder pattern
cannot be used
-/// and the parameters need to be passed around
-#[derive(Debug, Default, Clone)]
-struct ReaderOptions {
+/// The format specification for the CSV file
+#[derive(Debug, Clone, Default)]
+pub struct Format {
has_header: bool,
delimiter: Option<u8>,
escape: Option<u8>,
quote: Option<u8>,
terminator: Option<u8>,
- max_read_records: Option<usize>,
- datetime_re: Option<Regex>,
+}
+
+impl Format {
+ pub fn with_header(mut self, has_header: bool) -> Self {
+ self.has_header = has_header;
+ self
+ }
+
+ pub fn with_delimiter(mut self, delimiter: u8) -> Self {
+ self.delimiter = Some(delimiter);
+ self
+ }
+
+ pub fn with_escape(mut self, escape: u8) -> Self {
+ self.escape = Some(escape);
+ self
+ }
+
+ pub fn with_quote(mut self, quote: u8) -> Self {
+ self.quote = Some(quote);
+ self
+ }
+
+ pub fn with_terminator(mut self, terminator: u8) -> Self {
+ self.terminator = Some(terminator);
+ self
+ }
+
+ /// Infer schema of CSV records from the provided `reader`
+ ///
+ /// If `max_records` is `None`, all records will be read, otherwise the up
to `max_records`
+ /// records are read to infer the schema
+ ///
+ /// Returns inferred schema and number of records read
+ pub fn infer_schema<R: Read>(
+ &self,
+ reader: R,
+ max_records: Option<usize>,
+ ) -> Result<(Schema, usize), ArrowError> {
+ let mut csv_reader = self.build_reader(reader);
Review Comment:
This is the logic from infer_reader_schema_with_csv_options
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]