This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new a984f08989 Update arrow 49.0.0 and object_store 0.8.0 (#8029)
a984f08989 is described below
commit a984f08989a1d59b04992f6325a2b707629e8873
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Sat Nov 18 10:52:48 2023 +0000
Update arrow 49.0.0 and object_store 0.8.0 (#8029)
* POC: Remove ListingTable Append Support (#7994)
* Prepare object_store 0.8.0
* Fix datafusion-cli test
* Update arrow version
* Update tests
* Update pin
* Unignore fifo test
* Update lockfile
---
Cargo.toml | 17 +++---
datafusion-cli/Cargo.lock | 71 +++++++++++-----------
datafusion-cli/Cargo.toml | 4 +-
datafusion-cli/src/exec.rs | 7 ++-
datafusion/core/src/catalog/listing_schema.rs | 7 +--
.../core/src/datasource/file_format/arrow.rs | 2 +
datafusion/core/src/datasource/file_format/csv.rs | 1 +
datafusion/core/src/datasource/file_format/mod.rs | 16 +++--
.../core/src/datasource/file_format/parquet.rs | 20 ++++--
datafusion/core/src/datasource/listing/helpers.rs | 3 +-
datafusion/core/src/datasource/listing/mod.rs | 2 +
datafusion/core/src/datasource/listing/url.rs | 12 ++--
.../core/src/datasource/physical_plan/mod.rs | 1 +
.../core/src/datasource/physical_plan/parquet.rs | 1 +
.../datasource/physical_plan/parquet/row_groups.rs | 1 +
datafusion/core/src/test/object_store.rs | 1 +
datafusion/core/src/test_util/parquet.rs | 1 +
datafusion/core/tests/parquet/custom_reader.rs | 1 +
datafusion/core/tests/parquet/page_pruning.rs | 1 +
datafusion/core/tests/parquet/schema_coercion.rs | 1 +
datafusion/core/tests/path_partition.rs | 30 +++++----
datafusion/execution/src/cache/cache_unit.rs | 2 +
datafusion/physical-expr/src/expressions/cast.rs | 6 +-
.../physical-expr/src/expressions/try_cast.rs | 6 +-
datafusion/proto/src/physical_plan/from_proto.rs | 1 +
datafusion/sqllogictest/test_files/copy.slt | 18 +++---
datafusion/substrait/src/physical_plan/consumer.rs | 1 +
27 files changed, 132 insertions(+), 102 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index f25c24fd3e..60befdf1cf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -49,12 +49,12 @@ rust-version = "1.70"
version = "33.0.0"
[workspace.dependencies]
-arrow = { version = "~48.0.1", features = ["prettyprint"] }
-arrow-array = { version = "~48.0.1", default-features = false, features =
["chrono-tz"] }
-arrow-buffer = { version = "~48.0.1", default-features = false }
-arrow-flight = { version = "~48.0.1", features = ["flight-sql-experimental"] }
-arrow-ord = { version = "~48.0.1", default-features = false }
-arrow-schema = { version = "~48.0.1", default-features = false }
+arrow = { version = "49.0.0", features = ["prettyprint"] }
+arrow-array = { version = "49.0.0", default-features = false, features =
["chrono-tz"] }
+arrow-buffer = { version = "49.0.0", default-features = false }
+arrow-flight = { version = "49.0.0", features = ["flight-sql-experimental"] }
+arrow-ord = { version = "49.0.0", default-features = false }
+arrow-schema = { version = "49.0.0", default-features = false }
async-trait = "0.1.73"
bigdecimal = "0.4.1"
bytes = "1.4"
@@ -79,9 +79,9 @@ indexmap = "2.0.0"
itertools = "0.12"
log = "^0.4"
num_cpus = "1.13.0"
-object_store = { version = "0.7.0", default-features = false }
+object_store = { version = "0.8.0", default-features = false }
parking_lot = "0.12"
-parquet = { version = "~48.0.1", default-features = false, features =
["arrow", "async", "object_store"] }
+parquet = { version = "49.0.0", default-features = false, features = ["arrow",
"async", "object_store"] }
rand = "0.8"
rstest = "0.18.0"
serde_json = "1"
@@ -108,4 +108,3 @@ opt-level = 3
overflow-checks = false
panic = 'unwind'
rpath = false
-
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 4bc61a48a3..06bc14c5b6 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -130,9 +130,9 @@ checksum =
"96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]]
name = "arrow"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8919668503a4f2d8b6da96fa7c16e93046bfb3412ffcfa1e5dc7d2e3adcb378"
+checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614"
dependencies = [
"ahash",
"arrow-arith",
@@ -152,9 +152,9 @@ dependencies = [
[[package]]
name = "arrow-arith"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef983914f477d4278b068f13b3224b7d19eb2b807ac9048544d3bfebdf2554c4"
+checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -167,9 +167,9 @@ dependencies = [
[[package]]
name = "arrow-array"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6eaf89041fa5937940ae390294ece29e1db584f46d995608d6e5fe65a2e0e9b"
+checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d"
dependencies = [
"ahash",
"arrow-buffer",
@@ -184,9 +184,9 @@ dependencies = [
[[package]]
name = "arrow-buffer"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55512d988c6fbd76e514fd3ff537ac50b0a675da5a245e4fdad77ecfd654205f"
+checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c"
dependencies = [
"bytes",
"half",
@@ -195,15 +195,16 @@ dependencies = [
[[package]]
name = "arrow-cast"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "655ee51a2156ba5375931ce21c1b2494b1d9260e6dcdc6d4db9060c37dc3325b"
+checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"arrow-select",
+ "base64",
"chrono",
"comfy-table",
"half",
@@ -213,9 +214,9 @@ dependencies = [
[[package]]
name = "arrow-csv"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "258bb689997ad5b6660b3ce3638bd6b383d668ec555ed41ad7c6559cbb2e4f91"
+checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -232,9 +233,9 @@ dependencies = [
[[package]]
name = "arrow-data"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6dc2b9fec74763427e2e5575b8cc31ce96ba4c9b4eb05ce40e0616d9fad12461"
+checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634"
dependencies = [
"arrow-buffer",
"arrow-schema",
@@ -244,9 +245,9 @@ dependencies = [
[[package]]
name = "arrow-ipc"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6eaa6ab203cc6d89b7eaa1ac781c1dfeef325454c5d5a0419017f95e6bafc03c"
+checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -258,9 +259,9 @@ dependencies = [
[[package]]
name = "arrow-json"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb64e30d9b73f66fdc5c52d5f4cf69bbf03d62f64ffeafa0715590a5320baed7"
+checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -278,9 +279,9 @@ dependencies = [
[[package]]
name = "arrow-ord"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9a818951c0d11c428dda03e908175969c262629dd20bd0850bd6c7a8c3bfe48"
+checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -293,9 +294,9 @@ dependencies = [
[[package]]
name = "arrow-row"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5d664318bc05f930559fc088888f0f7174d3c5bc888c0f4f9ae8f23aa398ba3"
+checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a"
dependencies = [
"ahash",
"arrow-array",
@@ -308,15 +309,15 @@ dependencies = [
[[package]]
name = "arrow-schema"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aaf4d737bba93da59f16129bec21e087aed0be84ff840e74146d4703879436cb"
+checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167"
[[package]]
name = "arrow-select"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "374c4c3b812ecc2118727b892252a4a4308f87a8aca1dbf09f3ce4bc578e668a"
+checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036"
dependencies = [
"ahash",
"arrow-array",
@@ -328,9 +329,9 @@ dependencies = [
[[package]]
name = "arrow-string"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b15aed5624bb23da09142f58502b59c23f5bea607393298bb81dab1ce60fc769"
+checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -2288,9 +2289,9 @@ dependencies = [
[[package]]
name = "object_store"
-version = "0.7.1"
+version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f930c88a43b1c3f6e776dfe495b4afab89882dbc81530c632db2ed65451ebcb4"
+checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050"
dependencies = [
"async-trait",
"base64",
@@ -2305,7 +2306,7 @@ dependencies = [
"quick-xml",
"rand",
"reqwest",
- "ring 0.16.20",
+ "ring 0.17.5",
"rustls-pemfile",
"serde",
"serde_json",
@@ -2374,9 +2375,9 @@ dependencies = [
[[package]]
name = "parquet"
-version = "48.0.1"
+version = "49.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6bfe55df96e3f02f11bf197ae37d91bb79801631f82f6195dd196ef521df3597"
+checksum = "af88740a842787da39b3d69ce5fbf6fce97d20211d3b299fee0a0da6430c74d4"
dependencies = [
"ahash",
"arrow-array",
@@ -2597,9 +2598,9 @@ checksum =
"658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88"
[[package]]
name = "quick-xml"
-version = "0.30.0"
+version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
+checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33"
dependencies = [
"memchr",
"serde",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 890f84522c..dd7a077988 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -29,7 +29,7 @@ rust-version = "1.70"
readme = "README.md"
[dependencies]
-arrow = "~48.0.1"
+arrow = "49.0.0"
async-trait = "0.1.41"
aws-config = "0.55"
aws-credential-types = "0.55"
@@ -38,7 +38,7 @@ datafusion = { path = "../datafusion/core", version =
"33.0.0", features = ["avr
dirs = "4.0.0"
env_logger = "0.9"
mimalloc = { version = "0.1", default-features = false }
-object_store = { version = "0.7.0", features = ["aws", "gcp"] }
+object_store = { version = "0.8.0", features = ["aws", "gcp"] }
parking_lot = { version = "0.12" }
regex = "1.8"
rustyline = "11.0"
diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs
index b62ad12dbf..14ac22687b 100644
--- a/datafusion-cli/src/exec.rs
+++ b/datafusion-cli/src/exec.rs
@@ -350,7 +350,7 @@ mod tests {
async fn create_object_store_table_gcs() -> Result<()> {
let service_account_path = "fake_service_account_path";
let service_account_key =
- "{\"private_key\":
\"fake_private_key.pem\",\"client_email\":\"fake_client_email\"}";
+ "{\"private_key\":
\"fake_private_key.pem\",\"client_email\":\"fake_client_email\",
\"private_key_id\":\"id\"}";
let application_credentials_path = "fake_application_credentials_path";
let location = "gcs://bucket/path/file.parquet";
@@ -366,8 +366,9 @@ mod tests {
let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET
OPTIONS('service_account_key' '{service_account_key}') LOCATION '{location}'");
let err = create_external_table_test(location, &sql)
.await
- .unwrap_err();
- assert!(err.to_string().contains("No RSA key found in pem file"));
+ .unwrap_err()
+ .to_string();
+ assert!(err.contains("No RSA key found in pem file"), "{err}");
// for application_credentials_path
let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET
diff --git a/datafusion/core/src/catalog/listing_schema.rs
b/datafusion/core/src/catalog/listing_schema.rs
index 7e527642be..0d5c49f377 100644
--- a/datafusion/core/src/catalog/listing_schema.rs
+++ b/datafusion/core/src/catalog/listing_schema.rs
@@ -92,12 +92,7 @@ impl ListingSchemaProvider {
/// Reload table information from ObjectStore
pub async fn refresh(&self, state: &SessionState) ->
datafusion_common::Result<()> {
- let entries: Vec<_> = self
- .store
- .list(Some(&self.path))
- .await?
- .try_collect()
- .await?;
+ let entries: Vec<_> =
self.store.list(Some(&self.path)).try_collect().await?;
let base = Path::new(self.path.as_ref());
let mut tables = HashSet::new();
for file in entries.iter() {
diff --git a/datafusion/core/src/datasource/file_format/arrow.rs
b/datafusion/core/src/datasource/file_format/arrow.rs
index a9bd7d0e27..07c96bdae1 100644
--- a/datafusion/core/src/datasource/file_format/arrow.rs
+++ b/datafusion/core/src/datasource/file_format/arrow.rs
@@ -214,6 +214,7 @@ mod tests {
last_modified: DateTime::default(),
size: usize::MAX,
e_tag: None,
+ version: None,
};
let arrow_format = ArrowFormat {};
@@ -256,6 +257,7 @@ mod tests {
last_modified: DateTime::default(),
size: usize::MAX,
e_tag: None,
+ version: None,
};
let arrow_format = ArrowFormat {};
diff --git a/datafusion/core/src/datasource/file_format/csv.rs
b/datafusion/core/src/datasource/file_format/csv.rs
index 684f416f77..df6689af6b 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -673,6 +673,7 @@ mod tests {
last_modified: DateTime::default(),
size: usize::MAX,
e_tag: None,
+ version: None,
};
let num_rows_to_read = 100;
diff --git a/datafusion/core/src/datasource/file_format/mod.rs
b/datafusion/core/src/datasource/file_format/mod.rs
index b541e2a1d4..7c2331548e 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -124,7 +124,8 @@ pub(crate) mod test_util {
use object_store::local::LocalFileSystem;
use object_store::path::Path;
use object_store::{
- GetOptions, GetResult, GetResultPayload, ListResult, MultipartId,
+ GetOptions, GetResult, GetResultPayload, ListResult, MultipartId,
PutOptions,
+ PutResult,
};
use tokio::io::AsyncWrite;
@@ -189,7 +190,12 @@ pub(crate) mod test_util {
#[async_trait]
impl ObjectStore for VariableStream {
- async fn put(&self, _location: &Path, _bytes: Bytes) ->
object_store::Result<()> {
+ async fn put_opts(
+ &self,
+ _location: &Path,
+ _bytes: Bytes,
+ _opts: PutOptions,
+ ) -> object_store::Result<PutResult> {
unimplemented!()
}
@@ -228,6 +234,7 @@ pub(crate) mod test_util {
last_modified: Default::default(),
size: range.end,
e_tag: None,
+ version: None,
},
range: Default::default(),
})
@@ -257,11 +264,10 @@ pub(crate) mod test_util {
unimplemented!()
}
- async fn list(
+ fn list(
&self,
_prefix: Option<&Path>,
- ) -> object_store::Result<BoxStream<'_,
object_store::Result<ObjectMeta>>>
- {
+ ) -> BoxStream<'_, object_store::Result<ObjectMeta>> {
unimplemented!()
}
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs
b/datafusion/core/src/datasource/file_format/parquet.rs
index c4d05adfc6..cf6b874081 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -1199,7 +1199,9 @@ mod tests {
use log::error;
use object_store::local::LocalFileSystem;
use object_store::path::Path;
- use object_store::{GetOptions, GetResult, ListResult, MultipartId};
+ use object_store::{
+ GetOptions, GetResult, ListResult, MultipartId, PutOptions, PutResult,
+ };
use parquet::arrow::arrow_reader::ArrowReaderOptions;
use parquet::arrow::ParquetRecordBatchStreamBuilder;
use parquet::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex};
@@ -1283,7 +1285,12 @@ mod tests {
#[async_trait]
impl ObjectStore for RequestCountingObjectStore {
- async fn put(&self, _location: &Path, _bytes: Bytes) ->
object_store::Result<()> {
+ async fn put_opts(
+ &self,
+ _location: &Path,
+ _bytes: Bytes,
+ _opts: PutOptions,
+ ) -> object_store::Result<PutResult> {
Err(object_store::Error::NotImplemented)
}
@@ -1320,12 +1327,13 @@ mod tests {
Err(object_store::Error::NotImplemented)
}
- async fn list(
+ fn list(
&self,
_prefix: Option<&Path>,
- ) -> object_store::Result<BoxStream<'_,
object_store::Result<ObjectMeta>>>
- {
- Err(object_store::Error::NotImplemented)
+ ) -> BoxStream<'_, object_store::Result<ObjectMeta>> {
+ Box::pin(futures::stream::once(async {
+ Err(object_store::Error::NotImplemented)
+ }))
}
async fn list_with_delimiter(
diff --git a/datafusion/core/src/datasource/listing/helpers.rs
b/datafusion/core/src/datasource/listing/helpers.rs
index 3d2a3dc928..322d65d564 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -361,8 +361,7 @@ pub async fn pruned_partition_list<'a>(
Some(files) => files,
None => {
trace!("Recursively listing partition {}", partition.path);
- let s = store.list(Some(&partition.path)).await?;
- s.try_collect().await?
+ store.list(Some(&partition.path)).try_collect().await?
}
};
diff --git a/datafusion/core/src/datasource/listing/mod.rs
b/datafusion/core/src/datasource/listing/mod.rs
index aa2e20164b..87c1663ae7 100644
--- a/datafusion/core/src/datasource/listing/mod.rs
+++ b/datafusion/core/src/datasource/listing/mod.rs
@@ -80,6 +80,7 @@ impl PartitionedFile {
last_modified: chrono::Utc.timestamp_nanos(0),
size: size as usize,
e_tag: None,
+ version: None,
},
partition_values: vec![],
range: None,
@@ -95,6 +96,7 @@ impl PartitionedFile {
last_modified: chrono::Utc.timestamp_nanos(0),
size: size as usize,
e_tag: None,
+ version: None,
},
partition_values: vec![],
range: Some(FileRange { start, end }),
diff --git a/datafusion/core/src/datasource/listing/url.rs
b/datafusion/core/src/datasource/listing/url.rs
index ba3c3fae21..45845916a9 100644
--- a/datafusion/core/src/datasource/listing/url.rs
+++ b/datafusion/core/src/datasource/listing/url.rs
@@ -210,17 +210,15 @@ impl ListingTableUrl {
// If the prefix is a file, use a head request, otherwise list
let list = match self.is_collection() {
true => match
ctx.runtime_env().cache_manager.get_list_files_cache() {
- None => futures::stream::once(store.list(Some(&self.prefix)))
- .try_flatten()
- .boxed(),
+ None => store.list(Some(&self.prefix)),
Some(cache) => {
if let Some(res) = cache.get(&self.prefix) {
debug!("Hit list all files cache");
futures::stream::iter(res.as_ref().clone().into_iter().map(Ok))
.boxed()
} else {
- let list_res = store.list(Some(&self.prefix)).await;
- let vec =
list_res?.try_collect::<Vec<ObjectMeta>>().await?;
+ let list_res = store.list(Some(&self.prefix));
+ let vec =
list_res.try_collect::<Vec<ObjectMeta>>().await?;
cache.put(&self.prefix, Arc::new(vec.clone()));
futures::stream::iter(vec.into_iter().map(Ok)).boxed()
}
@@ -330,8 +328,8 @@ mod tests {
let url = ListingTableUrl::parse("file:///foo/bar?").unwrap();
assert_eq!(url.prefix.as_ref(), "foo/bar");
- let err = ListingTableUrl::parse("file:///foo/😺").unwrap_err();
- assert_eq!(err.to_string(), "Object Store error: Encountered object
with invalid path: Error parsing Path \"/foo/😺\": Encountered illegal character
sequence \"😺\" whilst parsing path segment \"😺\"");
+ let url = ListingTableUrl::parse("file:///foo/😺").unwrap();
+ assert_eq!(url.prefix.as_ref(), "foo/😺");
let url = ListingTableUrl::parse("file:///foo/bar%2Efoo").unwrap();
assert_eq!(url.prefix.as_ref(), "foo/bar.foo");
diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs
b/datafusion/core/src/datasource/physical_plan/mod.rs
index 738e70966b..aca71678d9 100644
--- a/datafusion/core/src/datasource/physical_plan/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/mod.rs
@@ -784,6 +784,7 @@ mod tests {
last_modified: Utc::now(),
size: 42,
e_tag: None,
+ version: None,
};
PartitionedFile {
diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs
b/datafusion/core/src/datasource/physical_plan/parquet.rs
index 960b2ec733..731672ceb8 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet.rs
@@ -1718,6 +1718,7 @@ mod tests {
last_modified: Utc.timestamp_nanos(0),
size: 1337,
e_tag: None,
+ version: None,
},
partition_values: vec![],
range: None,
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
index dc6ef50bc1..0079368f9c 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
@@ -1243,6 +1243,7 @@ mod tests {
last_modified:
chrono::DateTime::from(std::time::SystemTime::now()),
size: data.len(),
e_tag: None,
+ version: None,
};
let in_memory = object_store::memory::InMemory::new();
in_memory
diff --git a/datafusion/core/src/test/object_store.rs
b/datafusion/core/src/test/object_store.rs
index 08cebb56cc..d6f324a7f1 100644
--- a/datafusion/core/src/test/object_store.rs
+++ b/datafusion/core/src/test/object_store.rs
@@ -61,5 +61,6 @@ pub fn local_unpartitioned_file(path: impl
AsRef<std::path::Path>) -> ObjectMeta
last_modified:
metadata.modified().map(chrono::DateTime::from).unwrap(),
size: metadata.len() as usize,
e_tag: None,
+ version: None,
}
}
diff --git a/datafusion/core/src/test_util/parquet.rs
b/datafusion/core/src/test_util/parquet.rs
index 0d11526703..f3c0d2987a 100644
--- a/datafusion/core/src/test_util/parquet.rs
+++ b/datafusion/core/src/test_util/parquet.rs
@@ -113,6 +113,7 @@ impl TestParquetFile {
last_modified: Default::default(),
size,
e_tag: None,
+ version: None,
};
Ok(Self {
diff --git a/datafusion/core/tests/parquet/custom_reader.rs
b/datafusion/core/tests/parquet/custom_reader.rs
index 37481b936d..3752d42dbf 100644
--- a/datafusion/core/tests/parquet/custom_reader.rs
+++ b/datafusion/core/tests/parquet/custom_reader.rs
@@ -188,6 +188,7 @@ async fn store_parquet_in_memory(
last_modified: chrono::DateTime::from(SystemTime::now()),
size: buf.len(),
e_tag: None,
+ version: None,
};
(meta, Bytes::from(buf))
diff --git a/datafusion/core/tests/parquet/page_pruning.rs
b/datafusion/core/tests/parquet/page_pruning.rs
index b77643c35e..e1e8b8e66e 100644
--- a/datafusion/core/tests/parquet/page_pruning.rs
+++ b/datafusion/core/tests/parquet/page_pruning.rs
@@ -50,6 +50,7 @@ async fn get_parquet_exec(state: &SessionState, filter: Expr)
-> ParquetExec {
last_modified:
metadata.modified().map(chrono::DateTime::from).unwrap(),
size: metadata.len() as usize,
e_tag: None,
+ version: None,
};
let schema = ParquetFormat::default()
diff --git a/datafusion/core/tests/parquet/schema_coercion.rs
b/datafusion/core/tests/parquet/schema_coercion.rs
index b3134d470b..25c62f18f5 100644
--- a/datafusion/core/tests/parquet/schema_coercion.rs
+++ b/datafusion/core/tests/parquet/schema_coercion.rs
@@ -194,5 +194,6 @@ pub fn local_unpartitioned_file(path: impl
AsRef<std::path::Path>) -> ObjectMeta
last_modified:
metadata.modified().map(chrono::DateTime::from).unwrap(),
size: metadata.len() as usize,
e_tag: None,
+ version: None,
}
}
diff --git a/datafusion/core/tests/path_partition.rs
b/datafusion/core/tests/path_partition.rs
index 27d146de79..dd8eb52f67 100644
--- a/datafusion/core/tests/path_partition.rs
+++ b/datafusion/core/tests/path_partition.rs
@@ -46,7 +46,7 @@ use futures::stream;
use futures::stream::BoxStream;
use object_store::{
path::Path, GetOptions, GetResult, GetResultPayload, ListResult,
MultipartId,
- ObjectMeta, ObjectStore,
+ ObjectMeta, ObjectStore, PutOptions, PutResult,
};
use tokio::io::AsyncWrite;
use url::Url;
@@ -620,7 +620,12 @@ impl MirroringObjectStore {
#[async_trait]
impl ObjectStore for MirroringObjectStore {
- async fn put(&self, _location: &Path, _bytes: Bytes) ->
object_store::Result<()> {
+ async fn put_opts(
+ &self,
+ _location: &Path,
+ _bytes: Bytes,
+ _opts: PutOptions,
+ ) -> object_store::Result<PutResult> {
unimplemented!()
}
@@ -653,6 +658,7 @@ impl ObjectStore for MirroringObjectStore {
last_modified:
metadata.modified().map(chrono::DateTime::from).unwrap(),
size: metadata.len() as usize,
e_tag: None,
+ version: None,
};
Ok(GetResult {
@@ -680,26 +686,16 @@ impl ObjectStore for MirroringObjectStore {
Ok(data.into())
}
- async fn head(&self, location: &Path) -> object_store::Result<ObjectMeta> {
- self.files.iter().find(|x| *x == location).unwrap();
- Ok(ObjectMeta {
- location: location.clone(),
- last_modified: Utc.timestamp_nanos(0),
- size: self.file_size as usize,
- e_tag: None,
- })
- }
-
async fn delete(&self, _location: &Path) -> object_store::Result<()> {
unimplemented!()
}
- async fn list(
+ fn list(
&self,
prefix: Option<&Path>,
- ) -> object_store::Result<BoxStream<'_, object_store::Result<ObjectMeta>>>
{
+ ) -> BoxStream<'_, object_store::Result<ObjectMeta>> {
let prefix = prefix.cloned().unwrap_or_default();
- Ok(Box::pin(stream::iter(self.files.iter().filter_map(
+ Box::pin(stream::iter(self.files.iter().filter_map(
move |location| {
// Don't return for exact prefix match
let filter = location
@@ -713,10 +709,11 @@ impl ObjectStore for MirroringObjectStore {
last_modified: Utc.timestamp_nanos(0),
size: self.file_size as usize,
e_tag: None,
+ version: None,
})
})
},
- ))))
+ )))
}
async fn list_with_delimiter(
@@ -750,6 +747,7 @@ impl ObjectStore for MirroringObjectStore {
last_modified: Utc.timestamp_nanos(0),
size: self.file_size as usize,
e_tag: None,
+ version: None,
};
objects.push(object);
}
diff --git a/datafusion/execution/src/cache/cache_unit.rs
b/datafusion/execution/src/cache/cache_unit.rs
index 4a21dc02bd..c54839061c 100644
--- a/datafusion/execution/src/cache/cache_unit.rs
+++ b/datafusion/execution/src/cache/cache_unit.rs
@@ -176,6 +176,7 @@ mod tests {
.into(),
size: 1024,
e_tag: None,
+ version: None,
};
let cache = DefaultFileStatisticsCache::default();
assert!(cache.get_with_extra(&meta.location, &meta).is_none());
@@ -219,6 +220,7 @@ mod tests {
.into(),
size: 1024,
e_tag: None,
+ version: None,
};
let cache = DefaultListFilesCache::default();
diff --git a/datafusion/physical-expr/src/expressions/cast.rs
b/datafusion/physical-expr/src/expressions/cast.rs
index 780e042156..cbc82cc776 100644
--- a/datafusion/physical-expr/src/expressions/cast.rs
+++ b/datafusion/physical-expr/src/expressions/cast.rs
@@ -680,7 +680,11 @@ mod tests {
// Ensure a useful error happens at plan time if invalid casts are used
let schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
- let result = cast(col("a", &schema).unwrap(), &schema,
DataType::LargeBinary);
+ let result = cast(
+ col("a", &schema).unwrap(),
+ &schema,
+ DataType::Interval(IntervalUnit::MonthDayNano),
+ );
result.expect_err("expected Invalid CAST");
}
diff --git a/datafusion/physical-expr/src/expressions/try_cast.rs
b/datafusion/physical-expr/src/expressions/try_cast.rs
index dea7f9f86a..0f7909097a 100644
--- a/datafusion/physical-expr/src/expressions/try_cast.rs
+++ b/datafusion/physical-expr/src/expressions/try_cast.rs
@@ -555,7 +555,11 @@ mod tests {
// Ensure a useful error happens at plan time if invalid casts are used
let schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
- let result = try_cast(col("a", &schema).unwrap(), &schema,
DataType::LargeBinary);
+ let result = try_cast(
+ col("a", &schema).unwrap(),
+ &schema,
+ DataType::Interval(IntervalUnit::MonthDayNano),
+ );
result.expect_err("expected Invalid TRY_CAST");
}
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs
b/datafusion/proto/src/physical_plan/from_proto.rs
index f5771ddb15..dcebfbf2da 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -540,6 +540,7 @@ impl TryFrom<&protobuf::PartitionedFile> for
PartitionedFile {
last_modified: Utc.timestamp_nanos(val.last_modified_ns as
i64),
size: val.size as usize,
e_tag: None,
+ version: None,
},
partition_values: val
.partition_values
diff --git a/datafusion/sqllogictest/test_files/copy.slt
b/datafusion/sqllogictest/test_files/copy.slt
index fbf1523477..02ab330833 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -66,8 +66,8 @@ select * from validate_parquet;
# Copy parquet with all supported statment overrides
query IT
-COPY source_table
-TO 'test_files/scratch/copy/table_with_options'
+COPY source_table
+TO 'test_files/scratch/copy/table_with_options'
(format parquet,
single_file_output false,
compression snappy,
@@ -206,11 +206,11 @@ select * from validate_single_json;
# COPY csv files with all options set
query IT
-COPY source_table
-to 'test_files/scratch/copy/table_csv_with_options'
-(format csv,
-single_file_output false,
-header false,
+COPY source_table
+to 'test_files/scratch/copy/table_csv_with_options'
+(format csv,
+single_file_output false,
+header false,
compression 'uncompressed',
datetime_format '%FT%H:%M:%S.%9f',
delimiter ';',
@@ -220,8 +220,8 @@ null_value 'NULLVAL');
# Validate single csv output
statement ok
-CREATE EXTERNAL TABLE validate_csv_with_options
-STORED AS csv
+CREATE EXTERNAL TABLE validate_csv_with_options
+STORED AS csv
LOCATION 'test_files/scratch/copy/table_csv_with_options';
query T
diff --git a/datafusion/substrait/src/physical_plan/consumer.rs
b/datafusion/substrait/src/physical_plan/consumer.rs
index 1dab1f9d5e..942798173e 100644
--- a/datafusion/substrait/src/physical_plan/consumer.rs
+++ b/datafusion/substrait/src/physical_plan/consumer.rs
@@ -89,6 +89,7 @@ pub async fn from_substrait_rel(
location: path.into(),
size,
e_tag: None,
+ version: None,
},
partition_values: vec![],
range: None,