This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new e72875e  Update DECIMAL_RE to allow scientific notation in auto 
inferred schemas (#1216)
e72875e is described below

commit e72875e4b465f9d4eae1b852051e977488f97796
Author: Patrick More <[email protected]>
AuthorDate: Sat Jan 22 12:18:10 2022 -0800

    Update DECIMAL_RE to allow scientific notation in auto inferred schemas 
(#1216)
    
    * Update DECIMAL_RE to allow scientific notation in auto inferred schemas
    
    * Fixed format lint
---
 arrow/src/csv/reader.rs           | 8 +++++---
 arrow/test/data/various_types.csv | 4 +++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs
index 269e55f..0ade29c 100644
--- a/arrow/src/csv/reader.rs
+++ b/arrow/src/csv/reader.rs
@@ -64,7 +64,8 @@ use std::ops::Neg;
 lazy_static! {
     static ref PARSE_DECIMAL_RE: Regex =
         Regex::new(r"^-?(\d+\.?\d*|\d*\.?\d+)$").unwrap();
-    static ref DECIMAL_RE: Regex = 
Regex::new(r"^-?(\d*\.\d+|\d+\.\d*)$").unwrap();
+    static ref DECIMAL_RE: Regex =
+        
Regex::new(r"^-?((\d*\.\d+|\d+\.\d*)([eE]-?\d+)?|\d+([eE]-?\d+))$").unwrap();
     static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d+)$").unwrap();
     static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$")
         .case_insensitive(true)
@@ -1570,7 +1571,7 @@ mod tests {
         let mut csv = builder.build(file).unwrap();
         let batch = csv.next().unwrap().unwrap();
 
-        assert_eq!(5, batch.num_rows());
+        assert_eq!(7, batch.num_rows());
         assert_eq!(6, batch.num_columns());
 
         let schema = batch.schema();
@@ -1872,6 +1873,7 @@ mod tests {
         writeln!(csv1, "c1,c2,c3")?;
         writeln!(csv1, "1,\"foo\",0.5")?;
         writeln!(csv1, "3,\"bar\",1")?;
+        writeln!(csv1, "3,\"bar\",2e-06")?;
         // reading csv2 will set c2 to optional
         writeln!(csv2, "c1,c2,c3,c4")?;
         writeln!(csv2, "10,,3.14,true")?;
@@ -1887,7 +1889,7 @@ mod tests {
                 csv4.path().to_str().unwrap().to_string(),
             ],
             b',',
-            Some(3), // only csv1 and csv2 should be read
+            Some(4), // only csv1 and csv2 should be read
             true,
         )?;
 
diff --git a/arrow/test/data/various_types.csv 
b/arrow/test/data/various_types.csv
index 8f4466f..570d07f 100644
--- a/arrow/test/data/various_types.csv
+++ b/arrow/test/data/various_types.csv
@@ -3,4 +3,6 @@ c_int|c_float|c_string|c_bool|c_date|c_datetime
 2|2.2|"2.22"|true|2020-11-08|2020-11-08T01:00:00
 3||"3.33"|true|1969-12-31|1969-11-08T02:00:00
 4|4.4||false||
-5|6.6|""|false|1990-01-01|1990-01-01T03:00:00
\ No newline at end of file
+5|6.6|""|false|1990-01-01|1990-01-01T03:00:00
+4|4e6||false||
+4|4.0e-6||false||
\ No newline at end of file

Reply via email to