This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 8159294 fix issue#2058 file_format/json.rs attempt to subtract with
overflow (#2066)
8159294 is described below
commit 81592947e8814327ebdbd1fbc3d4a090796e37a3
Author: silence-coding <[email protected]>
AuthorDate: Sat Mar 26 01:01:09 2022 +0800
fix issue#2058 file_format/json.rs attempt to subtract with overflow
(#2066)
* fix issue#2058 file_format/json.rs attempt to subtract with overflow
* issue#2058 add infer_schema_with_limit test
Co-authored-by: p00512853 <[email protected]>
---
datafusion/src/datasource/file_format/json.rs | 22 ++++++++++++++++++++--
datafusion/tests/jsons/schema_infer_limit.json | 4 ++++
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/datafusion/src/datasource/file_format/json.rs
b/datafusion/src/datasource/file_format/json.rs
index 0a347fa..77d4898 100644
--- a/datafusion/src/datasource/file_format/json.rs
+++ b/datafusion/src/datasource/file_format/json.rs
@@ -68,13 +68,15 @@ impl FileFormat for JsonFormat {
let iter = ValueIter::new(&mut reader, None);
let schema = infer_json_schema_from_iterator(iter.take_while(|_| {
let should_take = records_to_read > 0;
- records_to_read -= 1;
+ if should_take {
+ records_to_read -= 1;
+ }
should_take
}))?;
+ schemas.push(schema);
if records_to_read == 0 {
break;
}
- schemas.push(schema);
}
let schema = Schema::try_merge(schemas)?;
@@ -228,4 +230,20 @@ mod tests {
.await?;
Ok(exec)
}
+
+ #[tokio::test]
+ async fn infer_schema_with_limit() {
+ let filename = "tests/jsons/schema_infer_limit.json";
+ let format = JsonFormat::default().with_schema_infer_max_rec(Some(3));
+ let file_schema = format
+
.infer_schema(local_object_reader_stream(vec![filename.to_owned()]))
+ .await
+ .expect("Schema inference");
+ let fields = file_schema
+ .fields()
+ .iter()
+ .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
+ .collect::<Vec<_>>();
+ assert_eq!(vec!["a: Int64", "b: Float64", "c: Boolean"], fields);
+ }
}
diff --git a/datafusion/tests/jsons/schema_infer_limit.json
b/datafusion/tests/jsons/schema_infer_limit.json
new file mode 100644
index 0000000..bfacf2f
--- /dev/null
+++ b/datafusion/tests/jsons/schema_infer_limit.json
@@ -0,0 +1,4 @@
+{"a":1}
+{"a":-10, "b":-3.5}
+{"a":2, "b":0.6, "c":false}
+{"a":1, "b":2.0, "c":false, "d":"4"}
\ No newline at end of file