This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 2352f3e9a Fix `ListingTableUrl` to decode percent (#3750)
2352f3e9a is described below
commit 2352f3e9a7adf0394bfb166bb6e331db3075fbc4
Author: unvalley <[email protected]>
AuthorDate: Tue Oct 11 05:26:41 2022 -0700
Fix `ListingTableUrl` to decode percent (#3750)
* fix: ListingTabUrl prefix decoding
* chore: remove waste change
* fix: use from instead of parse
* test: add test cases for prefix
* chore: cargo fmt
---
datafusion/core/Cargo.toml | 1 +
datafusion/core/src/datasource/listing/url.rs | 14 +++++++++++++-
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 097ee3158..f7a9ce4e9 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -80,6 +80,7 @@ ordered-float = "3.0"
parking_lot = "0.12"
parquet = { version = "24.0.0", features = ["arrow", "async"] }
paste = "^1.0"
+percent-encoding = "2.2.0"
pin-project-lite = "^0.2.7"
pyo3 = { version = "0.17.1", optional = true }
rand = "0.8"
diff --git a/datafusion/core/src/datasource/listing/url.rs
b/datafusion/core/src/datasource/listing/url.rs
index 8676f2118..d1a527f23 100644
--- a/datafusion/core/src/datasource/listing/url.rs
+++ b/datafusion/core/src/datasource/listing/url.rs
@@ -23,6 +23,7 @@ use glob::Pattern;
use itertools::Itertools;
use object_store::path::Path;
use object_store::{ObjectMeta, ObjectStore};
+use percent_encoding;
use url::Url;
/// A parsed URL identifying files for a listing table, see
[`ListingTableUrl::parse`]
@@ -108,7 +109,9 @@ impl ListingTableUrl {
/// Creates a new [`ListingTableUrl`] from a url and optional glob
expression
fn new(url: Url, glob: Option<Pattern>) -> Self {
- let prefix = Path::parse(url.path()).expect("should be URL safe");
+ let decoded_path =
+
percent_encoding::percent_decode_str(url.path()).decode_utf8_lossy();
+ let prefix = Path::from(decoded_path.as_ref());
Self { url, prefix, glob }
}
@@ -246,6 +249,15 @@ mod tests {
let url = ListingTableUrl::parse("file:///foo").unwrap();
let child = Path::parse("/foob/bar").unwrap();
assert!(url.strip_prefix(&child).is_none());
+
+ let url = ListingTableUrl::parse("file:///foo/ bar").unwrap();
+ assert_eq!(url.prefix.as_ref(), "foo/ bar");
+
+ let url = ListingTableUrl::parse("file:///foo/bar?").unwrap();
+ assert_eq!(url.prefix.as_ref(), "foo/bar");
+
+ let url = ListingTableUrl::parse("file:///foo/😺").unwrap();
+ assert_eq!(url.prefix.as_ref(), "foo/%F0%9F%98%BA");
}
#[test]