alamb commented on code in PR #9799:
URL: https://github.com/apache/arrow-datafusion/pull/9799#discussion_r1540400631
##########
datafusion/core/src/datasource/physical_plan/json.rs:
##########
@@ -884,4 +898,48 @@ mod tests {
Ok(())
}
+ fn compress_file(path: &str, output_path: &str) -> io::Result<()> {
+ let input_file = File::open(path)?;
+ let mut reader = BufReader::new(input_file);
+
+ let output_file = File::create(output_path)?;
+ let writer = std::io::BufWriter::new(output_file);
+
+ let mut encoder = GzEncoder::new(writer, Compression::default());
+ io::copy(&mut reader, &mut encoder)?;
+
+ encoder.finish()?;
+ Ok(())
+ }
+ #[tokio::test]
+ async fn test_disable_parallel_for_json_gz() -> Result<()> {
+ let config = SessionConfig::new()
+ .with_repartition_file_scans(true)
+ .with_repartition_file_min_size(0)
Review Comment:
👍
##########
datafusion/core/src/datasource/physical_plan/json.rs:
##########
@@ -150,12 +151,22 @@ impl ExecutionPlan for NdJsonExec {
target_partitions: usize,
config: &datafusion_common::config::ConfigOptions,
) -> Result<Option<Arc<dyn ExecutionPlan>>> {
- let repartition_file_min_size =
config.optimizer.repartition_file_min_size;
+ let repartition_file_min_size =
+ if self.file_compression_type == FileCompressionType::GZIP {
+ OptimizerOptions::default().repartition_file_min_size
Review Comment:
I wonder it it would be simpler code to return `None` here
```suggestion
None
```
That way the rest of the code could be left alone.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]