This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new ece8a7c  Adding ability to parse float from number with leading 
decimal (#831)
ece8a7c is described below

commit ece8a7c3a92c8a524b33deff3d84143afdb09cf4
Author: Brian Rackle <[email protected]>
AuthorDate: Tue Nov 9 14:59:37 2021 -0800

    Adding ability to parse float from number with leading decimal (#831)
    
    * Adding ability to parse float from number with leading decimal
    
    * Fixing deprecated std::usize::MAX constant per 
https://doc.rust-lang.org/core/usize/constant.MAX.html and making consistent 
with other usages
    
    * Add test case for 2. and issue link
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 arrow/src/csv/reader.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs
index b68ac1b..21924f7 100644
--- a/arrow/src/csv/reader.rs
+++ b/arrow/src/csv/reader.rs
@@ -60,7 +60,7 @@ use crate::record_batch::RecordBatch;
 use csv_crate::{ByteRecord, StringRecord};
 
 lazy_static! {
-    static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap();
+    static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d*\.\d+)$").unwrap();
     static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d+)$").unwrap();
     static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$")
         .case_insensitive(true)
@@ -271,7 +271,7 @@ pub fn infer_schema_from_files(
     has_header: bool,
 ) -> Result<Schema> {
     let mut schemas = vec![];
-    let mut records_to_read = max_read_records.unwrap_or(std::usize::MAX);
+    let mut records_to_read = max_read_records.unwrap_or(usize::MAX);
 
     for fname in files.iter() {
         let (schema, records_read) = infer_file_schema(
@@ -1342,6 +1342,9 @@ mod tests {
         assert_eq!(infer_field_schema("\"123\""), DataType::Utf8);
         assert_eq!(infer_field_schema("10"), DataType::Int64);
         assert_eq!(infer_field_schema("10.2"), DataType::Float64);
+        assert_eq!(infer_field_schema(".2"), DataType::Float64);
+        // Should be parsed as Float or Int. See 
https://github.com/apache/arrow-rs/issues/929
+        assert_eq!(infer_field_schema("2."), DataType::Utf8);
         assert_eq!(infer_field_schema("true"), DataType::Boolean);
         assert_eq!(infer_field_schema("false"), DataType::Boolean);
         assert_eq!(infer_field_schema("2020-11-08"), DataType::Date32);

Reply via email to