This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 9ba214a Re-export Arrow and Parquet crates from DataFusion (#39)
9ba214a is described below
commit 9ba214a52ed78c57d3d6363c61a88893d41fe906
Author: Ruan Pearce-Authers <[email protected]>
AuthorDate: Sat Apr 24 17:32:59 2021 +0100
Re-export Arrow and Parquet crates from DataFusion (#39)
* Re-export Arrow and Parquet crates
* Switch benchmarks crate to use DF-exported Arrow and Parquet deps
* Switch datafusion-examples crate to use DF-exported Arrow dep
---
benchmarks/Cargo.toml | 2 --
benchmarks/src/bin/nyctaxi.rs | 5 +++--
benchmarks/src/bin/tpch.rs | 25 +++++++++++-----------
datafusion-examples/Cargo.toml | 1 -
datafusion-examples/examples/csv_sql.rs | 4 ++--
datafusion-examples/examples/dataframe.rs | 4 ++--
.../examples/dataframe_in_memory.rs | 8 +++----
datafusion-examples/examples/flight_client.rs | 6 +++---
datafusion-examples/examples/flight_server.rs | 6 +++---
datafusion-examples/examples/parquet_sql.rs | 4 ++--
datafusion-examples/examples/simple_udaf.rs | 4 ++--
datafusion-examples/examples/simple_udf.rs | 4 ++--
datafusion/src/lib.rs | 5 ++++-
13 files changed, 40 insertions(+), 38 deletions(-)
diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
index 3562266..25a385e 100644
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -31,8 +31,6 @@ simd = ["datafusion/simd"]
snmalloc = ["snmalloc-rs"]
[dependencies]
-arrow = { git = "https://github.com/apache/arrow-rs", rev =
"c3fe3bab9905739fdda75301dab07a18c91731bd" }
-parquet = { git = "https://github.com/apache/arrow-rs", rev =
"c3fe3bab9905739fdda75301dab07a18c91731bd" }
datafusion = { path = "../datafusion" }
ballista = { path = "../ballista/rust/client" }
structopt = { version = "0.3", default-features = false }
diff --git a/benchmarks/src/bin/nyctaxi.rs b/benchmarks/src/bin/nyctaxi.rs
index 005efca..b2a62a0 100644
--- a/benchmarks/src/bin/nyctaxi.rs
+++ b/benchmarks/src/bin/nyctaxi.rs
@@ -22,8 +22,9 @@ use std::path::PathBuf;
use std::process;
use std::time::Instant;
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::util::pretty;
+use datafusion::arrow::datatypes::{DataType, Field, Schema};
+use datafusion::arrow::util::pretty;
+
use datafusion::error::Result;
use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs
index fd9f052..543e84f 100644
--- a/benchmarks/src/bin/tpch.rs
+++ b/benchmarks/src/bin/tpch.rs
@@ -28,17 +28,21 @@ use std::{
use futures::StreamExt;
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::util::pretty;
use ballista::context::BallistaContext;
+
+use datafusion::arrow::datatypes::{DataType, Field, Schema};
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::arrow::util::pretty;
+
use datafusion::datasource::parquet::ParquetTable;
use datafusion::datasource::{CsvFile, MemTable, TableProvider};
use datafusion::error::{DataFusionError, Result};
use datafusion::logical_plan::LogicalPlan;
use datafusion::physical_plan::collect;
use datafusion::prelude::*;
-use parquet::basic::Compression;
-use parquet::file::properties::WriterProperties;
+
+use datafusion::parquet::basic::Compression;
+use datafusion::parquet::file::properties::WriterProperties;
use structopt::StructOpt;
#[cfg(feature = "snmalloc")]
@@ -149,9 +153,7 @@ async fn main() -> Result<()> {
}
}
-async fn benchmark_datafusion(
- opt: BenchmarkOpt,
-) -> Result<Vec<arrow::record_batch::RecordBatch>> {
+async fn benchmark_datafusion(opt: BenchmarkOpt) -> Result<Vec<RecordBatch>> {
println!("Running benchmarks with the following options: {:?}", opt);
let config = ExecutionConfig::new()
.with_concurrency(opt.concurrency)
@@ -186,7 +188,7 @@ async fn benchmark_datafusion(
let mut millis = vec![];
// run benchmark
- let mut result: Vec<arrow::record_batch::RecordBatch> =
Vec::with_capacity(1);
+ let mut result: Vec<RecordBatch> = Vec::with_capacity(1);
for i in 0..opt.iterations {
let start = Instant::now();
let plan = create_logical_plan(&mut ctx, opt.query)?;
@@ -299,7 +301,7 @@ async fn execute_query(
ctx: &mut ExecutionContext,
plan: &LogicalPlan,
debug: bool,
-) -> Result<Vec<arrow::record_batch::RecordBatch>> {
+) -> Result<Vec<RecordBatch>> {
if debug {
println!("Logical plan:\n{:?}", plan);
}
@@ -523,9 +525,8 @@ mod tests {
use std::env;
use std::sync::Arc;
- use arrow::array::*;
- use arrow::record_batch::RecordBatch;
- use arrow::util::display::array_value_to_string;
+ use datafusion::arrow::array::*;
+ use datafusion::arrow::util::display::array_value_to_string;
use datafusion::logical_plan::Expr;
use datafusion::logical_plan::Expr::Cast;
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 929bdf2..0445f38 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -29,7 +29,6 @@ publish = false
[dev-dependencies]
-arrow = { git = "https://github.com/apache/arrow-rs", rev =
"c3fe3bab9905739fdda75301dab07a18c91731bd" }
arrow-flight = { git = "https://github.com/apache/arrow-rs", rev =
"c3fe3bab9905739fdda75301dab07a18c91731bd" }
datafusion = { path = "../datafusion" }
prost = "0.7"
diff --git a/datafusion-examples/examples/csv_sql.rs
b/datafusion-examples/examples/csv_sql.rs
index 63fd36d..76c8796 100644
--- a/datafusion-examples/examples/csv_sql.rs
+++ b/datafusion-examples/examples/csv_sql.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::util::pretty;
+use datafusion::arrow::util::pretty;
use datafusion::error::Result;
use datafusion::prelude::*;
@@ -27,7 +27,7 @@ async fn main() -> Result<()> {
// create local execution context
let mut ctx = ExecutionContext::new();
- let testdata = arrow::util::test_util::arrow_test_data();
+ let testdata = datafusion::arrow::util::test_util::arrow_test_data();
// register csv file with the execution context
ctx.register_csv(
diff --git a/datafusion-examples/examples/dataframe.rs
b/datafusion-examples/examples/dataframe.rs
index cba4d87..dcf6bc3 100644
--- a/datafusion-examples/examples/dataframe.rs
+++ b/datafusion-examples/examples/dataframe.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::util::pretty;
+use datafusion::arrow::util::pretty;
use datafusion::error::Result;
use datafusion::prelude::*;
@@ -27,7 +27,7 @@ async fn main() -> Result<()> {
// create local execution context
let mut ctx = ExecutionContext::new();
- let testdata = arrow::util::test_util::parquet_test_data();
+ let testdata = datafusion::arrow::util::test_util::parquet_test_data();
let filename = &format!("{}/alltypes_plain.parquet", testdata);
diff --git a/datafusion-examples/examples/dataframe_in_memory.rs
b/datafusion-examples/examples/dataframe_in_memory.rs
index de8552a..0c65a74 100644
--- a/datafusion-examples/examples/dataframe_in_memory.rs
+++ b/datafusion-examples/examples/dataframe_in_memory.rs
@@ -17,10 +17,10 @@
use std::sync::Arc;
-use arrow::array::{Int32Array, StringArray};
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::record_batch::RecordBatch;
-use arrow::util::pretty;
+use datafusion::arrow::array::{Int32Array, StringArray};
+use datafusion::arrow::datatypes::{DataType, Field, Schema};
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::arrow::util::pretty;
use datafusion::datasource::MemTable;
use datafusion::error::Result;
diff --git a/datafusion-examples/examples/flight_client.rs
b/datafusion-examples/examples/flight_client.rs
index 2c2954d..5334782 100644
--- a/datafusion-examples/examples/flight_client.rs
+++ b/datafusion-examples/examples/flight_client.rs
@@ -18,8 +18,8 @@
use std::convert::TryFrom;
use std::sync::Arc;
-use arrow::datatypes::Schema;
-use arrow::util::pretty;
+use datafusion::arrow::datatypes::Schema;
+use datafusion::arrow::util::pretty;
use arrow_flight::flight_descriptor;
use arrow_flight::flight_service_client::FlightServiceClient;
@@ -31,7 +31,7 @@ use arrow_flight::{FlightDescriptor, Ticket};
/// This example is run along-side the example `flight_server`.
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
- let testdata = arrow::util::test_util::parquet_test_data();
+ let testdata = datafusion::arrow::util::test_util::parquet_test_data();
// Create Flight client
let mut client =
FlightServiceClient::connect("http://localhost:50051").await?;
diff --git a/datafusion-examples/examples/flight_server.rs
b/datafusion-examples/examples/flight_server.rs
index 79660dd..8496bcb 100644
--- a/datafusion-examples/examples/flight_server.rs
+++ b/datafusion-examples/examples/flight_server.rs
@@ -66,7 +66,7 @@ impl FlightService for FlightServiceImpl {
let table = ParquetTable::try_new(&request.path[0],
num_cpus::get()).unwrap();
- let options = arrow::ipc::writer::IpcWriteOptions::default();
+ let options =
datafusion::arrow::ipc::writer::IpcWriteOptions::default();
let schema_result =
arrow_flight::utils::flight_schema_from_arrow_schema(
table.schema().as_ref(),
&options,
@@ -87,7 +87,7 @@ impl FlightService for FlightServiceImpl {
// create local execution context
let mut ctx = ExecutionContext::new();
- let testdata = arrow::util::test_util::parquet_test_data();
+ let testdata =
datafusion::arrow::util::test_util::parquet_test_data();
// register parquet file with the execution context
ctx.register_parquet(
@@ -106,7 +106,7 @@ impl FlightService for FlightServiceImpl {
}
// add an initial FlightData message that sends schema
- let options = arrow::ipc::writer::IpcWriteOptions::default();
+ let options =
datafusion::arrow::ipc::writer::IpcWriteOptions::default();
let schema_flight_data =
arrow_flight::utils::flight_data_from_arrow_schema(
&df.schema().clone().into(),
diff --git a/datafusion-examples/examples/parquet_sql.rs
b/datafusion-examples/examples/parquet_sql.rs
index 8043d32..f679b22 100644
--- a/datafusion-examples/examples/parquet_sql.rs
+++ b/datafusion-examples/examples/parquet_sql.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::util::pretty;
+use datafusion::arrow::util::pretty;
use datafusion::error::Result;
use datafusion::prelude::*;
@@ -27,7 +27,7 @@ async fn main() -> Result<()> {
// create local execution context
let mut ctx = ExecutionContext::new();
- let testdata = arrow::util::test_util::parquet_test_data();
+ let testdata = datafusion::arrow::util::test_util::parquet_test_data();
// register parquet file with the execution context
ctx.register_parquet(
diff --git a/datafusion-examples/examples/simple_udaf.rs
b/datafusion-examples/examples/simple_udaf.rs
index 8086dfc..49d09ff 100644
--- a/datafusion-examples/examples/simple_udaf.rs
+++ b/datafusion-examples/examples/simple_udaf.rs
@@ -17,7 +17,7 @@
/// In this example we will declare a single-type, single return type UDAF
that computes the geometric mean.
/// The geometric mean is described here:
https://en.wikipedia.org/wiki/Geometric_mean
-use arrow::{
+use datafusion::arrow::{
array::Float32Array, array::Float64Array, datatypes::DataType,
record_batch::RecordBatch,
};
@@ -28,7 +28,7 @@ use std::sync::Arc;
// create local execution context with an in-memory table
fn create_context() -> Result<ExecutionContext> {
- use arrow::datatypes::{Field, Schema};
+ use datafusion::arrow::datatypes::{Field, Schema};
use datafusion::datasource::MemTable;
// define a schema.
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Float32,
false)]));
diff --git a/datafusion-examples/examples/simple_udf.rs
b/datafusion-examples/examples/simple_udf.rs
index bfef108..0ffec44 100644
--- a/datafusion-examples/examples/simple_udf.rs
+++ b/datafusion-examples/examples/simple_udf.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::{
+use datafusion::arrow::{
array::{ArrayRef, Float32Array, Float64Array},
datatypes::DataType,
record_batch::RecordBatch,
@@ -28,7 +28,7 @@ use std::sync::Arc;
// create local execution context with an in-memory table
fn create_context() -> Result<ExecutionContext> {
- use arrow::datatypes::{Field, Schema};
+ use datafusion::arrow::datatypes::{Field, Schema};
use datafusion::datasource::MemTable;
// define a schema.
let schema = Arc::new(Schema::new(vec![
diff --git a/datafusion/src/lib.rs b/datafusion/src/lib.rs
index 44a8a68..252d168 100644
--- a/datafusion/src/lib.rs
+++ b/datafusion/src/lib.rs
@@ -183,7 +183,6 @@
//!
//! you can find examples of each of them in examples section.
-extern crate arrow;
extern crate sqlparser;
pub mod catalog;
@@ -200,6 +199,10 @@ pub mod scalar;
pub mod sql;
pub mod variable;
+// re-export dependencies from arrow-rs to minimise version maintenance for
crate users
+pub use arrow;
+pub use parquet;
+
#[cfg(test)]
pub mod test;