[GitHub] [arrow-datafusion] alamb commented on a diff in pull request #7336: Refactor: Consolidate OutputFileFormat and FileType into datafusion_common

via GitHub Mon, 21 Aug 2023 12:57:41 -0700


alamb commented on code in PR #7336:
URL: https://github.com/apache/arrow-datafusion/pull/7336#discussion_r1300583693



##########
datafusion/common/Cargo.toml:
##########
@@ -34,19 +34,34 @@ path = "src/lib.rs"
 
 [features]
 avro = ["apache-avro"]
-default = []
+compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression"]
+default = ["compression"]
 pyarrow = ["pyo3", "arrow/pyarrow"]
 
 [dependencies]
 apache-avro = { version = "0.15", default-features = false, features = 
["snappy"], optional = true }
 arrow = { workspace = true }
 arrow-array = { workspace = true }
+async-compression = { version = "0.4.0", features = ["bzip2", "gzip", "xz", 
"zstd", "futures-io", "tokio"], optional = true }
+bytes = "1.4"
+bzip2 = { version = "0.4.3", optional = true }
 chrono = { version = "0.4", default-features = false }
+flate2 = { version = "1.0.24", optional = true }
+futures = "0.3"
 num_cpus = "1.13.0"
 object_store = { version = "0.6.1", default-features = false, optional = true }
 parquet = { workspace = true, optional = true }
 pyo3 = { version = "0.19.0", optional = true }
 sqlparser = { workspace = true }
+tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", 
"sync", "fs", "parking_lot"] }

Review Comment:
   The only thing I am slightly worried about is adding dependencies to 
`datafusion_common`, but given how tokio is basically used all over the place 
in DataFusion this doesn't seem like it would result in a practical difference 
to to me



##########
benchmarks/src/tpch/run.rs:
##########
@@ -19,17 +19,16 @@ use super::get_query_sql;
 use crate::{BenchmarkRun, CommonOpt};
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::{self, pretty_format_batches};
-use datafusion::datasource::file_format::csv::{CsvFormat, 
DEFAULT_CSV_EXTENSION};
-use datafusion::datasource::file_format::parquet::{
-    ParquetFormat, DEFAULT_PARQUET_EXTENSION,
-};
+use datafusion::datasource::file_format::csv::CsvFormat;
+use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::file_format::FileFormat;
 use datafusion::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
 };
 use datafusion::datasource::{MemTable, TableProvider};
 use datafusion::physical_plan::display::DisplayableExecutionPlan;
 use datafusion::physical_plan::{collect, displayable};
+use datafusion_common::{DEFAULT_CSV_EXTENSION, DEFAULT_PARQUET_EXTENSION};

Review Comment:
   ❤️ 
   
   



##########
benchmarks/src/tpch/run.rs:
##########
@@ -19,17 +19,16 @@ use super::get_query_sql;
 use crate::{BenchmarkRun, CommonOpt};
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::{self, pretty_format_batches};
-use datafusion::datasource::file_format::csv::{CsvFormat, 
DEFAULT_CSV_EXTENSION};
-use datafusion::datasource::file_format::parquet::{
-    ParquetFormat, DEFAULT_PARQUET_EXTENSION,
-};
+use datafusion::datasource::file_format::csv::CsvFormat;
+use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::file_format::FileFormat;
 use datafusion::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
 };
 use datafusion::datasource::{MemTable, TableProvider};
 use datafusion::physical_plan::display::DisplayableExecutionPlan;
 use datafusion::physical_plan::{collect, displayable};
+use datafusion_common::{DEFAULT_CSV_EXTENSION, DEFAULT_PARQUET_EXTENSION};

Review Comment:
   ❤️ 
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow-datafusion] alamb commented on a diff in pull request #7336: Refactor: Consolidate OutputFileFormat and FileType into datafusion_common

Reply via email to