[arrow] branch master updated: ARROW-3839: [Rust] Add ability to infer schema in CSV reader

agrove Tue, 08 Jan 2019 15:49:31 -0800

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new ac45f32  ARROW-3839: [Rust] Add ability to infer schema in CSV reader
ac45f32 is described below

commit ac45f3210a194049ef35f49847dbc4ff5e70d48f
Author: Neville Dipale <[email protected]>
AuthorDate: Tue Jan 8 16:49:12 2019 -0700

    ARROW-3839: [Rust] Add ability to infer schema in CSV reader
    
    Resubmission of #3128
    
    Author: Neville Dipale <[email protected]>
    
    Closes #3349 from nevi-me/rust/infer-csv-schema and squashes the following 
commits:
    
    0838199 <Neville Dipale> ARROW-3839:  Add ability to infer schema in CSV 
reader
---
 ci/rust-build-main.bat                          |   1 +
 ci/travis_script_rust.sh                        |   1 +
 rust/arrow/Cargo.toml                           |   2 +
 rust/arrow/examples/read_csv_infer_schema.rs    |  66 +++++
 rust/arrow/src/csv/mod.rs                       |   1 +
 rust/arrow/src/csv/reader.rs                    | 373 +++++++++++++++++++++++-
 rust/arrow/src/datatypes.rs                     |   4 +-
 rust/arrow/src/error.rs                         |  37 +++
 rust/arrow/test/data/uk_cities_with_headers.csv |  38 +++
 rust/arrow/test/data/various_types.csv          |   6 +
 10 files changed, 524 insertions(+), 5 deletions(-)

diff --git a/ci/rust-build-main.bat b/ci/rust-build-main.bat
index ac5c9e7..b36a97a 100644
--- a/ci/rust-build-main.bat
+++ b/ci/rust-build-main.bat
@@ -40,5 +40,6 @@ cd arrow
 cargo run --example builders --target %TARGET% --release || exit /B
 cargo run --example dynamic_types --target %TARGET% --release || exit /B
 cargo run --example read_csv --target %TARGET% --release || exit /B
+cargo run --example read_csv_infer_schema --target %TARGET% --release || exit 
/B
 
 popd
diff --git a/ci/travis_script_rust.sh b/ci/travis_script_rust.sh
index 8e3c8c3..c25d64e 100755
--- a/ci/travis_script_rust.sh
+++ b/ci/travis_script_rust.sh
@@ -39,5 +39,6 @@ cd arrow
 cargo run --example builders
 cargo run --example dynamic_types
 cargo run --example read_csv
+cargo run --example read_csv_infer_schema
 
 popd
diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
index 77e8d53..38e7e5e 100644
--- a/rust/arrow/Cargo.toml
+++ b/rust/arrow/Cargo.toml
@@ -43,6 +43,8 @@ serde_json = "1.0.13"
 rand = "0.5"
 csv = "1.0.0"
 num = "0.2"
+regex = "1.1"
+lazy_static = "1.2"
 
 [dev-dependencies]
 criterion = "0.2"
diff --git a/rust/arrow/examples/read_csv_infer_schema.rs 
b/rust/arrow/examples/read_csv_infer_schema.rs
new file mode 100644
index 0000000..9dd2d2a
--- /dev/null
+++ b/rust/arrow/examples/read_csv_infer_schema.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate arrow;
+
+use arrow::array::{BinaryArray, Float64Array};
+use arrow::csv;
+use std::fs::File;
+
+fn main() {
+    let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
+    let builder = csv::ReaderBuilder::new()
+        .has_headers(true)
+        .infer_schema(Some(100));
+    let mut csv = builder.build(file).unwrap();
+    let batch = csv.next().unwrap().unwrap();
+
+    println!(
+        "Loaded {} rows containing {} columns",
+        batch.num_rows(),
+        batch.num_columns()
+    );
+
+    println!("Inferred schema: {:?}", batch.schema());
+
+    let city = batch
+        .column(0)
+        .as_any()
+        .downcast_ref::<BinaryArray>()
+        .unwrap();
+    let lat = batch
+        .column(1)
+        .as_any()
+        .downcast_ref::<Float64Array>()
+        .unwrap();
+    let lng = batch
+        .column(2)
+        .as_any()
+        .downcast_ref::<Float64Array>()
+        .unwrap();
+
+    for i in 0..batch.num_rows() {
+        let city_name: String = 
String::from_utf8(city.value(i).to_vec()).unwrap();
+
+        println!(
+            "City: {}, Latitude: {}, Longitude: {}",
+            city_name,
+            lat.value(i),
+            lng.value(i)
+        );
+    }
+}
diff --git a/rust/arrow/src/csv/mod.rs b/rust/arrow/src/csv/mod.rs
index 9f2bd1d..6521b19 100644
--- a/rust/arrow/src/csv/mod.rs
+++ b/rust/arrow/src/csv/mod.rs
@@ -18,3 +18,4 @@
 pub mod reader;
 
 pub use self::reader::Reader;
+pub use self::reader::ReaderBuilder;
diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs
index 57c7dde..49e0302 100644
--- a/rust/arrow/src/csv/reader.rs
+++ b/rust/arrow/src/csv/reader.rs
@@ -40,8 +40,11 @@
 //! let batch = csv.next().unwrap().unwrap();
 //! ```
 
+use lazy_static::lazy_static;
+use regex::{Regex, RegexBuilder};
+use std::collections::HashSet;
 use std::fs::File;
-use std::io::BufReader;
+use std::io::{BufReader, Seek, SeekFrom};
 use std::sync::Arc;
 
 use csv as csv_crate;
@@ -54,6 +57,130 @@ use crate::record_batch::RecordBatch;
 
 use self::csv_crate::{StringRecord, StringRecordsIntoIter};
 
+lazy_static! {
+    static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap();
+    static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d*.)$").unwrap();
+    static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$")
+        .case_insensitive(true)
+        .build()
+        .unwrap();
+}
+
+/// Infer the data type of a record
+fn infer_field_schema(string: &str) -> DataType {
+    // when quoting is enabled in the reader, these quotes aren't escaped, we 
default to Utf8 for them
+    if string.starts_with("\"") {
+        return DataType::Utf8;
+    }
+    // match regex in a particular order
+    if BOOLEAN_RE.is_match(string) {
+        return DataType::Boolean;
+    } else if DECIMAL_RE.is_match(string) {
+        return DataType::Float64;
+    } else if INTEGER_RE.is_match(string) {
+        return DataType::Int64;
+    } else {
+        return DataType::Utf8;
+    }
+}
+
+/// Infer the schema of a CSV file by reading through the first n records of 
the file,
+/// with `max_read_records` controlling the maximum number of records to read.
+///
+/// If `max_read_records` is not set, the whole file is read to infer its 
schema.
+fn infer_file_schema(
+    mut file: File,
+    delimiter: u8,
+    max_read_records: Option<usize>,
+    has_headers: bool,
+) -> Result<Schema> {
+    let mut csv_reader = csv::ReaderBuilder::new()
+        .delimiter(delimiter)
+        .from_reader(BufReader::new(file.try_clone()?));
+
+    // get or create header names
+    // when has_headers is false, creates default column names with column_ 
prefix
+    let headers: Vec<String> = if has_headers {
+        let headers = &csv_reader.headers()?.clone();
+        headers.iter().map(|s| s.to_string()).collect()
+    } else {
+        let first_record_count = &csv_reader.headers()?.len();
+        (0..*first_record_count)
+            .map(|i| format!("column_{}", i + 1))
+            .into_iter()
+            .collect()
+    };
+
+    // save the csv reader position after reading headers
+    let position = csv_reader.position().clone();
+
+    let header_length = headers.len();
+    // keep track of inferred field types
+    let mut column_types: Vec<HashSet<DataType>> = vec![HashSet::new(); 
header_length];
+    // keep track of columns with nulls
+    let mut nulls: Vec<bool> = vec![false; header_length];
+
+    // return csv reader position to after headers
+    csv_reader.seek(position)?;
+
+    let mut fields = vec![];
+
+    for result in csv_reader
+        .into_records()
+        .take(max_read_records.unwrap_or(std::usize::MAX))
+    {
+        let record = result?;
+
+        for i in 0..header_length {
+            let string: Option<&str> = record.get(i);
+            match string {
+                Some(s) => {
+                    if s == "" {
+                        nulls[i] = true;
+                    } else {
+                        column_types[i].insert(infer_field_schema(s));
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+
+    // build schema from inference results
+    for i in 0..header_length {
+        let possibilities = &column_types[i];
+        let has_nulls = nulls[i];
+        let field_name = &headers[i];
+
+        // determine data type based on possible types
+        // if there are incompatible types, use DataType::Utf8
+        match possibilities.len() {
+            1 => {
+                for dtype in possibilities.iter() {
+                    fields.push(Field::new(&field_name, dtype.clone(), 
has_nulls));
+                }
+            }
+            2 => {
+                if possibilities.contains(&DataType::Int64)
+                    && possibilities.contains(&DataType::Float64)
+                {
+                    // we have an integer and double, fall down to double
+                    fields.push(Field::new(&field_name, DataType::Float64, 
has_nulls));
+                } else {
+                    // default to Utf8 for conflicting datatypes (e.g bool and 
int)
+                    fields.push(Field::new(&field_name, DataType::Utf8, 
has_nulls));
+                }
+            }
+            _ => fields.push(Field::new(&field_name, DataType::Utf8, 
has_nulls)),
+        }
+    }
+
+    // return the file seek back to the start
+    file.seek(SeekFrom::Start(0))?;
+
+    Ok(Schema::new(fields))
+}
+
 /// CSV file reader
 pub struct Reader {
     /// Explicit schema for the CSV file
@@ -68,6 +195,8 @@ pub struct Reader {
 
 impl Reader {
     /// Create a new CsvReader
+    ///
+    /// To customise the Reader, such as to enable schema inference, use 
`ReaderBuilder`
     pub fn new(
         file: File,
         schema: Arc<Schema>,
@@ -78,10 +207,9 @@ impl Reader {
         let csv_reader = csv::ReaderBuilder::new()
             .has_headers(has_headers)
             .from_reader(BufReader::new(file));
-
         let record_iter = csv_reader.into_records();
         Reader {
-            schema: schema.clone(),
+            schema,
             projection,
             record_iter,
             batch_size,
@@ -194,6 +322,141 @@ impl Reader {
     }
 }
 
+/// CSV file reader builder
+pub struct ReaderBuilder {
+    /// Optional schema for the CSV file
+    ///
+    /// If the schema is not supplied, the reader will try to infer the schema
+    /// based on the CSV structure.
+    schema: Option<Arc<Schema>>,
+    /// Whether the file has headers or not
+    ///
+    /// If schema inference is run on a file with no headers, default column 
names
+    /// are created.
+    has_headers: bool,
+    /// An optional column delimiter. Defauits to `b','`
+    delimiter: Option<u8>,
+    /// Optional maximum number of records to read during schema inference
+    ///
+    /// If a number is not provided, all the records are read.
+    max_records: Option<usize>,
+    /// Batch size (number of records to load each time)
+    ///
+    /// The default batch size when using the `ReaderBuilder` is 1024 records
+    batch_size: usize,
+    /// Optional projection for which columns to load (zero-based column 
indices)
+    projection: Option<Vec<usize>>,
+}
+
+impl Default for ReaderBuilder {
+    fn default() -> ReaderBuilder {
+        ReaderBuilder {
+            schema: None,
+            has_headers: false,
+            delimiter: None,
+            max_records: None,
+            batch_size: 1024,
+            projection: None,
+        }
+    }
+}
+
+impl ReaderBuilder {
+    /// Create a new builder for configuring CSV parsing options.
+    ///
+    /// To convert a builder into a reader, call `Reader::from_builder`
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// extern crate arrow;
+    ///
+    /// use arrow::csv;
+    /// use std::fs::File;
+    ///
+    /// fn example() -> csv::Reader {
+    ///     let file = 
File::open("test/data/uk_cities_with_headers.csv").unwrap();
+    ///
+    ///     // create a builder, inferring the schema with the first 100 
records
+    ///     let builder = csv::ReaderBuilder::new().infer_schema(Some(100));
+    ///
+    ///     let reader = builder.build(file).unwrap();
+    ///
+    ///     reader
+    /// }
+    /// ```
+    pub fn new() -> ReaderBuilder {
+        ReaderBuilder::default()
+    }
+
+    /// Set the CSV file's schema
+    pub fn with_schema(mut self, schema: Arc<Schema>) -> Self {
+        self.schema = Some(schema);
+        self
+    }
+
+    /// Set whether the CSV file has headers
+    pub fn has_headers(mut self, has_headers: bool) -> Self {
+        self.has_headers = has_headers;
+        self
+    }
+
+    /// Set the CSV file's column delimiter as a byte character
+    pub fn with_delimiter(mut self, delimiter: u8) -> Self {
+        self.delimiter = Some(delimiter);
+        self
+    }
+
+    /// Set the CSV reader to infer the schema of the file
+    pub fn infer_schema(mut self, max_records: Option<usize>) -> Self {
+        // remove any schema that is set
+        self.schema = None;
+        self.max_records = max_records;
+        self
+    }
+
+    /// Set the batch size (number of records to load at one time)
+    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
+        self.batch_size = batch_size;
+        self
+    }
+
+    /// Set the reader's column projection
+    pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
+        self.projection = Some(projection);
+        self
+    }
+
+    /// Create a new `Reader` from the `ReaderBuilder`
+    pub fn build(self, file: File) -> Result<Reader> {
+        // check if schema should be inferred
+        let schema = match self.schema {
+            Some(schema) => schema,
+            None => {
+                let inferred_schema = infer_file_schema(
+                    file.try_clone().unwrap(),
+                    self.delimiter.unwrap_or(b','),
+                    self.max_records,
+                    self.has_headers,
+                )?;
+
+                Arc::new(inferred_schema)
+            }
+        };
+        let csv_reader = csv::ReaderBuilder::new()
+            .delimiter(self.delimiter.unwrap_or(b','))
+            .has_headers(self.has_headers)
+            .from_reader(BufReader::new(file));
+        let record_iter = csv_reader.into_records();
+        Ok(Reader {
+            schema,
+            projection: self.projection.clone(),
+            record_iter,
+            batch_size: self.batch_size,
+        })
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -237,6 +500,75 @@ mod tests {
     }
 
     #[test]
+    fn test_csv_with_schema_inference() {
+        let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
+
+        let builder = 
ReaderBuilder::new().has_headers(true).infer_schema(None);
+
+        let mut csv = builder.build(file).unwrap();
+        let batch = csv.next().unwrap().unwrap();
+        assert_eq!(37, batch.num_rows());
+        assert_eq!(3, batch.num_columns());
+
+        // access data from a primitive array
+        let lat = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<Float64Array>()
+            .unwrap();
+        assert_eq!(57.653484, lat.value(0));
+
+        // access data from a string array (ListArray<u8>)
+        let city = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .unwrap();
+
+        let city_name: String = 
String::from_utf8(city.value(13).to_vec()).unwrap();
+
+        assert_eq!("Aberdeen, Aberdeen City, UK", city_name);
+    }
+
+    #[test]
+    fn test_csv_with_schema_inference_no_headers() {
+        let file = File::open("test/data/uk_cities.csv").unwrap();
+
+        let builder = ReaderBuilder::new().infer_schema(None);
+
+        let mut csv = builder.build(file).unwrap();
+        let batch = csv.next().unwrap().unwrap();
+
+        // csv field names should be 'column_{number}'
+        let schema = batch.schema();
+        assert_eq!("column_1", schema.field(0).name());
+        assert_eq!("column_2", schema.field(1).name());
+        assert_eq!("column_3", schema.field(2).name());
+
+        assert_eq!(37, batch.num_rows());
+        assert_eq!(3, batch.num_columns());
+
+        // access data from a primitive array
+        let lat = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<Float64Array>()
+            .unwrap();
+        assert_eq!(57.653484, lat.value(0));
+
+        // access data from a string array (ListArray<u8>)
+        let city = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .unwrap();
+
+        let city_name: String = 
String::from_utf8(city.value(13).to_vec()).unwrap();
+
+        assert_eq!("Aberdeen, Aberdeen City, UK", city_name);
+    }
+
+    #[test]
     fn test_csv_with_projection() {
         let schema = Schema::new(vec![
             Field::new("city", DataType::Utf8, false),
@@ -272,4 +604,39 @@ mod tests {
         assert_eq!(false, batch.column(1).is_null(4));
     }
 
+    #[test]
+    fn test_nulls_with_inference() {
+        let file = File::open("test/data/various_types.csv").unwrap();
+
+        let builder = ReaderBuilder::new()
+            .infer_schema(None)
+            .has_headers(true)
+            .with_delimiter(b'|')
+            .with_batch_size(512)
+            .with_projection(vec![0, 1, 2, 3]);
+
+        let mut csv = builder.build(file).unwrap();
+        let batch = csv.next().unwrap().unwrap();
+
+        assert_eq!(5, batch.num_rows());
+        assert_eq!(4, batch.num_columns());
+
+        let schema = batch.schema();
+
+        assert_eq!(&DataType::Int64, schema.field(0).data_type());
+        assert_eq!(&DataType::Float64, schema.field(1).data_type());
+        assert_eq!(&DataType::Float64, schema.field(2).data_type());
+        assert_eq!(&DataType::Boolean, schema.field(3).data_type());
+
+        assert_eq!(false, schema.field(0).is_nullable());
+        assert_eq!(true, schema.field(1).is_nullable());
+        assert_eq!(true, schema.field(2).is_nullable());
+        assert_eq!(false, schema.field(3).is_nullable());
+
+        assert_eq!(false, batch.column(1).is_null(0));
+        assert_eq!(false, batch.column(1).is_null(1));
+        assert_eq!(true, batch.column(1).is_null(2));
+        assert_eq!(false, batch.column(1).is_null(3));
+        assert_eq!(false, batch.column(1).is_null(4));
+    }
 }
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index 0627b45..05db6ce 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -42,7 +42,7 @@ use crate::error::{ArrowError, Result};
 /// Nested types can themselves be nested within other arrays.
 /// For more information on these types please see
 /// [here](https://arrow.apache.org/docs/memory_layout.html).
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
 pub enum DataType {
     Boolean,
     Int8,
@@ -64,7 +64,7 @@ pub enum DataType {
 /// Contains the meta-data for a single relative type.
 ///
 /// The `Schema` object is an ordered collection of `Field` objects.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Field {
     name: String,
     data_type: DataType,
diff --git a/rust/arrow/src/error.rs b/rust/arrow/src/error.rs
index 559b2d7..b75111f 100644
--- a/rust/arrow/src/error.rs
+++ b/rust/arrow/src/error.rs
@@ -15,12 +15,49 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::error::Error;
+
+use csv as csv_crate;
+
 #[derive(Debug, Clone, PartialEq)]
 pub enum ArrowError {
     MemoryError(String),
     ParseError(String),
     ComputeError(String),
     DivideByZero,
+    CsvError(String),
+    IoError(String),
+}
+
+impl From<::std::io::Error> for ArrowError {
+    fn from(error: ::std::io::Error) -> Self {
+        ArrowError::IoError(error.description().to_string())
+    }
+}
+
+impl From<csv_crate::Error> for ArrowError {
+    fn from(error: csv_crate::Error) -> Self {
+        match error.kind() {
+            csv_crate::ErrorKind::Io(error) => {
+                ArrowError::CsvError(error.description().to_string())
+            }
+            csv_crate::ErrorKind::Utf8 {pos: _, err} => {
+                ArrowError::CsvError(format!("Encountered UTF-8 error while 
reading CSV file: {:?}", err.description()))
+            }
+            csv_crate::ErrorKind::UnequalLengths {pos: _, expected_len, len} 
=> {
+              ArrowError::CsvError(
+                  format!(
+                      "Encountered unequal lengths between records on CSV 
file. Expected {} records, found {} records",
+                      len,
+                      expected_len
+                  )
+              )
+            }
+            _ => {
+                ArrowError::CsvError("Error reading CSV file".to_string())
+            }
+        }
+    }
 }
 
 pub type Result<T> = ::std::result::Result<T, ArrowError>;
diff --git a/rust/arrow/test/data/uk_cities_with_headers.csv 
b/rust/arrow/test/data/uk_cities_with_headers.csv
new file mode 100644
index 0000000..92f5a17
--- /dev/null
+++ b/rust/arrow/test/data/uk_cities_with_headers.csv
@@ -0,0 +1,38 @@
+city,lat,lng
+"Elgin, Scotland, the UK",57.653484,-3.335724
+"Stoke-on-Trent, Staffordshire, the UK",53.002666,-2.179404
+"Solihull, Birmingham, UK",52.412811,-1.778197
+"Cardiff, Cardiff county, UK",51.481583,-3.179090
+"Eastbourne, East Sussex, UK",50.768036,0.290472
+"Oxford, Oxfordshire, UK",51.752022,-1.257677
+"London, UK",51.509865,-0.118092
+"Swindon, Swindon, UK",51.568535,-1.772232
+"Gravesend, Kent, UK",51.441883,0.370759
+"Northampton, Northamptonshire, UK",52.240479,-0.902656
+"Rugby, Warwickshire, UK",52.370876,-1.265032
+"Sutton Coldfield, West Midlands, UK",52.570385,-1.824042
+"Harlow, Essex, UK",51.772938,0.102310
+"Aberdeen, Aberdeen City, UK",57.149651,-2.099075
+"Swansea, Swansea, UK",51.621441,-3.943646
+"Chesterfield, Derbyshire, UK",53.235046,-1.421629
+"Londonderry, Derry, UK",55.006763,-7.318268
+"Salisbury, Wiltshire, UK",51.068787,-1.794472
+"Weymouth, Dorset, UK",50.614429,-2.457621
+"Wolverhampton, West Midlands, UK",52.591370,-2.110748
+"Preston, Lancashire, UK",53.765762,-2.692337
+"Bournemouth, UK",50.720806,-1.904755
+"Doncaster, South Yorkshire, UK",53.522820,-1.128462
+"Ayr, South Ayrshire, UK",55.458565,-4.629179
+"Hastings, East Sussex, UK",50.854259,0.573453
+"Bedford, UK",52.136436,-0.460739
+"Basildon, Essex, UK",51.572376,0.470009
+"Chippenham, Wiltshire, UK",51.458057,-2.116074
+"Belfast, UK",54.607868,-5.926437
+"Uckfield, East Sussex, UK",50.967941,0.085831
+"Worthing, West Sussex, UK",50.825024,-0.383835
+"Leeds, West Yorkshire, UK",53.801277,-1.548567
+"Kendal, Cumbria, UK",54.328506,-2.743870
+"Plymouth, UK",50.376289,-4.143841
+"Haverhill, Suffolk, UK",52.080875,0.444517
+"Frankton, Warwickshire, UK",52.328415,-1.377561
+"Inverness, the UK",57.477772,-4.224721
\ No newline at end of file
diff --git a/rust/arrow/test/data/various_types.csv 
b/rust/arrow/test/data/various_types.csv
new file mode 100644
index 0000000..322d9c3
--- /dev/null
+++ b/rust/arrow/test/data/various_types.csv
@@ -0,0 +1,6 @@
+c_int|c_float|c_string|c_bool
+1|1.1|"1.11"|true
+2|2.2|"2.22"|true
+3||"3.33"|true
+4|4.4||false
+5|6.6|""|false
\ No newline at end of file

[arrow] branch master updated: ARROW-3839: [Rust] Add ability to infer schema in CSV reader

Reply via email to