alamb commented on code in PR #7336:
URL: https://github.com/apache/arrow-datafusion/pull/7336#discussion_r1300583693


##########
datafusion/common/Cargo.toml:
##########
@@ -34,19 +34,34 @@ path = "src/lib.rs"
 
 [features]
 avro = ["apache-avro"]
-default = []
+compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression"]
+default = ["compression"]
 pyarrow = ["pyo3", "arrow/pyarrow"]
 
 [dependencies]
 apache-avro = { version = "0.15", default-features = false, features = 
["snappy"], optional = true }
 arrow = { workspace = true }
 arrow-array = { workspace = true }
+async-compression = { version = "0.4.0", features = ["bzip2", "gzip", "xz", 
"zstd", "futures-io", "tokio"], optional = true }
+bytes = "1.4"
+bzip2 = { version = "0.4.3", optional = true }
 chrono = { version = "0.4", default-features = false }
+flate2 = { version = "1.0.24", optional = true }
+futures = "0.3"
 num_cpus = "1.13.0"
 object_store = { version = "0.6.1", default-features = false, optional = true }
 parquet = { workspace = true, optional = true }
 pyo3 = { version = "0.19.0", optional = true }
 sqlparser = { workspace = true }
+tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", 
"sync", "fs", "parking_lot"] }

Review Comment:
   The only thing I am slightly worried about is adding dependencies to 
`datafusion_common`, but given how tokio is basically used all over the place 
in DataFusion this doesn't seem like it would result in a practical difference 
to to me



##########
benchmarks/src/tpch/run.rs:
##########
@@ -19,17 +19,16 @@ use super::get_query_sql;
 use crate::{BenchmarkRun, CommonOpt};
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::{self, pretty_format_batches};
-use datafusion::datasource::file_format::csv::{CsvFormat, 
DEFAULT_CSV_EXTENSION};
-use datafusion::datasource::file_format::parquet::{
-    ParquetFormat, DEFAULT_PARQUET_EXTENSION,
-};
+use datafusion::datasource::file_format::csv::CsvFormat;
+use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::file_format::FileFormat;
 use datafusion::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
 };
 use datafusion::datasource::{MemTable, TableProvider};
 use datafusion::physical_plan::display::DisplayableExecutionPlan;
 use datafusion::physical_plan::{collect, displayable};
+use datafusion_common::{DEFAULT_CSV_EXTENSION, DEFAULT_PARQUET_EXTENSION};

Review Comment:
   ❤️ 
   
   



##########
benchmarks/src/tpch/run.rs:
##########
@@ -19,17 +19,16 @@ use super::get_query_sql;
 use crate::{BenchmarkRun, CommonOpt};
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::{self, pretty_format_batches};
-use datafusion::datasource::file_format::csv::{CsvFormat, 
DEFAULT_CSV_EXTENSION};
-use datafusion::datasource::file_format::parquet::{
-    ParquetFormat, DEFAULT_PARQUET_EXTENSION,
-};
+use datafusion::datasource::file_format::csv::CsvFormat;
+use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::file_format::FileFormat;
 use datafusion::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
 };
 use datafusion::datasource::{MemTable, TableProvider};
 use datafusion::physical_plan::display::DisplayableExecutionPlan;
 use datafusion::physical_plan::{collect, displayable};
+use datafusion_common::{DEFAULT_CSV_EXTENSION, DEFAULT_PARQUET_EXTENSION};

Review Comment:
   ❤️ 
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to