This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new ece8a7c Adding ability to parse float from number with leading
decimal (#831)
ece8a7c is described below
commit ece8a7c3a92c8a524b33deff3d84143afdb09cf4
Author: Brian Rackle <[email protected]>
AuthorDate: Tue Nov 9 14:59:37 2021 -0800
Adding ability to parse float from number with leading decimal (#831)
* Adding ability to parse float from number with leading decimal
* Fixing deprecated std::usize::MAX constant per
https://doc.rust-lang.org/core/usize/constant.MAX.html and making consistent
with other usages
* Add test case for 2. and issue link
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow/src/csv/reader.rs | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs
index b68ac1b..21924f7 100644
--- a/arrow/src/csv/reader.rs
+++ b/arrow/src/csv/reader.rs
@@ -60,7 +60,7 @@ use crate::record_batch::RecordBatch;
use csv_crate::{ByteRecord, StringRecord};
lazy_static! {
- static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap();
+ static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d*\.\d+)$").unwrap();
static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d+)$").unwrap();
static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$")
.case_insensitive(true)
@@ -271,7 +271,7 @@ pub fn infer_schema_from_files(
has_header: bool,
) -> Result<Schema> {
let mut schemas = vec![];
- let mut records_to_read = max_read_records.unwrap_or(std::usize::MAX);
+ let mut records_to_read = max_read_records.unwrap_or(usize::MAX);
for fname in files.iter() {
let (schema, records_read) = infer_file_schema(
@@ -1342,6 +1342,9 @@ mod tests {
assert_eq!(infer_field_schema("\"123\""), DataType::Utf8);
assert_eq!(infer_field_schema("10"), DataType::Int64);
assert_eq!(infer_field_schema("10.2"), DataType::Float64);
+ assert_eq!(infer_field_schema(".2"), DataType::Float64);
+ // Should be parsed as Float or Int. See
https://github.com/apache/arrow-rs/issues/929
+ assert_eq!(infer_field_schema("2."), DataType::Utf8);
assert_eq!(infer_field_schema("true"), DataType::Boolean);
assert_eq!(infer_field_schema("false"), DataType::Boolean);
assert_eq!(infer_field_schema("2020-11-08"), DataType::Date32);