This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 8159294  fix  issue#2058 file_format/json.rs attempt to subtract with 
overflow (#2066)
8159294 is described below

commit 81592947e8814327ebdbd1fbc3d4a090796e37a3
Author: silence-coding <[email protected]>
AuthorDate: Sat Mar 26 01:01:09 2022 +0800

    fix  issue#2058 file_format/json.rs attempt to subtract with overflow 
(#2066)
    
    * fix  issue#2058 file_format/json.rs attempt to subtract with overflow
    
    * issue#2058 add infer_schema_with_limit test
    
    Co-authored-by: p00512853 <[email protected]>
---
 datafusion/src/datasource/file_format/json.rs  | 22 ++++++++++++++++++++--
 datafusion/tests/jsons/schema_infer_limit.json |  4 ++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/datafusion/src/datasource/file_format/json.rs 
b/datafusion/src/datasource/file_format/json.rs
index 0a347fa..77d4898 100644
--- a/datafusion/src/datasource/file_format/json.rs
+++ b/datafusion/src/datasource/file_format/json.rs
@@ -68,13 +68,15 @@ impl FileFormat for JsonFormat {
             let iter = ValueIter::new(&mut reader, None);
             let schema = infer_json_schema_from_iterator(iter.take_while(|_| {
                 let should_take = records_to_read > 0;
-                records_to_read -= 1;
+                if should_take {
+                    records_to_read -= 1;
+                }
                 should_take
             }))?;
+            schemas.push(schema);
             if records_to_read == 0 {
                 break;
             }
-            schemas.push(schema);
         }
 
         let schema = Schema::try_merge(schemas)?;
@@ -228,4 +230,20 @@ mod tests {
             .await?;
         Ok(exec)
     }
+
+    #[tokio::test]
+    async fn infer_schema_with_limit() {
+        let filename = "tests/jsons/schema_infer_limit.json";
+        let format = JsonFormat::default().with_schema_infer_max_rec(Some(3));
+        let file_schema = format
+            
.infer_schema(local_object_reader_stream(vec![filename.to_owned()]))
+            .await
+            .expect("Schema inference");
+        let fields = file_schema
+            .fields()
+            .iter()
+            .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
+            .collect::<Vec<_>>();
+        assert_eq!(vec!["a: Int64", "b: Float64", "c: Boolean"], fields);
+    }
 }
diff --git a/datafusion/tests/jsons/schema_infer_limit.json 
b/datafusion/tests/jsons/schema_infer_limit.json
new file mode 100644
index 0000000..bfacf2f
--- /dev/null
+++ b/datafusion/tests/jsons/schema_infer_limit.json
@@ -0,0 +1,4 @@
+{"a":1}
+{"a":-10, "b":-3.5}
+{"a":2, "b":0.6, "c":false}
+{"a":1, "b":2.0, "c":false, "d":"4"}
\ No newline at end of file

Reply via email to