zhuqi-lucas commented on code in PR #19924:
URL: https://github.com/apache/datafusion/pull/19924#discussion_r2785436542
##########
datafusion/datasource-json/src/file_format.rs:
##########
@@ -166,6 +186,57 @@ impl JsonFormat {
self.options.compression = file_compression_type.into();
self
}
+
+ /// Set whether to read as newline-delimited JSON (NDJSON).
+ ///
+ /// When `true` (default), expects newline-delimited format:
+ /// ```text
+ /// {"a": 1}
+ /// {"a": 2}
+ /// ```
+ ///
+ /// When `false`, expects JSON array format:
+ /// ```text
+ /// [{"a": 1}, {"a": 2}]
+ /// ```
+ pub fn with_newline_delimited(mut self, newline_delimited: bool) -> Self {
+ self.options.newline_delimited = newline_delimited;
+ self
+ }
+
+ /// Returns whether this format expects newline-delimited JSON.
+ pub fn is_newline_delimited(&self) -> bool {
+ self.options.newline_delimited
+ }
+}
+
+/// Infer schema from JSON array format using streaming conversion.
+///
+/// This function converts JSON array format to NDJSON on-the-fly and uses
+/// arrow-json's schema inference. It properly tracks the number of records
+/// processed for correct `records_to_read` management.
+///
+/// # Returns
+/// A tuple of (Schema, records_consumed) where records_consumed is the
+/// number of records that were processed for schema inference.
+fn infer_schema_from_json_array<R: Read>(
+ reader: R,
+ max_records: usize,
+) -> Result<(Schema, usize)> {
+ let ndjson_reader = JsonArrayToNdjsonReader::new(reader);
+
+ let iter = ValueIter::new(ndjson_reader, None);
+ let mut count = 0;
+
+ let schema = infer_json_schema_from_iterator(iter.take_while(|_| {
+ let should_take = count < max_records;
+ if should_take {
+ count += 1;
+ }
+ should_take
+ }))?;
+
Review Comment:
Addressed in latest commit!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]