This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 2352f3e9a Fix `ListingTableUrl` to decode percent (#3750)
2352f3e9a is described below

commit 2352f3e9a7adf0394bfb166bb6e331db3075fbc4
Author: unvalley <[email protected]>
AuthorDate: Tue Oct 11 05:26:41 2022 -0700

    Fix `ListingTableUrl` to decode percent (#3750)
    
    * fix: ListingTabUrl prefix decoding
    
    * chore: remove waste change
    
    * fix: use from instead of parse
    
    * test: add test cases for prefix
    
    * chore: cargo fmt
---
 datafusion/core/Cargo.toml                    |  1 +
 datafusion/core/src/datasource/listing/url.rs | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 097ee3158..f7a9ce4e9 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -80,6 +80,7 @@ ordered-float = "3.0"
 parking_lot = "0.12"
 parquet = { version = "24.0.0", features = ["arrow", "async"] }
 paste = "^1.0"
+percent-encoding = "2.2.0"
 pin-project-lite = "^0.2.7"
 pyo3 = { version = "0.17.1", optional = true }
 rand = "0.8"
diff --git a/datafusion/core/src/datasource/listing/url.rs 
b/datafusion/core/src/datasource/listing/url.rs
index 8676f2118..d1a527f23 100644
--- a/datafusion/core/src/datasource/listing/url.rs
+++ b/datafusion/core/src/datasource/listing/url.rs
@@ -23,6 +23,7 @@ use glob::Pattern;
 use itertools::Itertools;
 use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
+use percent_encoding;
 use url::Url;
 
 /// A parsed URL identifying files for a listing table, see 
[`ListingTableUrl::parse`]
@@ -108,7 +109,9 @@ impl ListingTableUrl {
 
     /// Creates a new [`ListingTableUrl`] from a url and optional glob 
expression
     fn new(url: Url, glob: Option<Pattern>) -> Self {
-        let prefix = Path::parse(url.path()).expect("should be URL safe");
+        let decoded_path =
+            
percent_encoding::percent_decode_str(url.path()).decode_utf8_lossy();
+        let prefix = Path::from(decoded_path.as_ref());
         Self { url, prefix, glob }
     }
 
@@ -246,6 +249,15 @@ mod tests {
         let url = ListingTableUrl::parse("file:///foo").unwrap();
         let child = Path::parse("/foob/bar").unwrap();
         assert!(url.strip_prefix(&child).is_none());
+
+        let url = ListingTableUrl::parse("file:///foo/ bar").unwrap();
+        assert_eq!(url.prefix.as_ref(), "foo/ bar");
+
+        let url = ListingTableUrl::parse("file:///foo/bar?").unwrap();
+        assert_eq!(url.prefix.as_ref(), "foo/bar");
+
+        let url = ListingTableUrl::parse("file:///foo/😺").unwrap();
+        assert_eq!(url.prefix.as_ref(), "foo/%F0%9F%98%BA");
     }
 
     #[test]

Reply via email to