This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new d94be88538 Fix `CREATE EXTERNAL TABLE` doesn't work with non-standard
file ext (#6274)
d94be88538 is described below
commit d94be885380e8d1dc8e18773e588a25aa0ad409f
Author: Armin Primadi <[email protected]>
AuthorDate: Tue May 9 16:56:14 2023 +0700
Fix `CREATE EXTERNAL TABLE` doesn't work with non-standard file ext (#6274)
* Change CsvReadOptions default file_extension to None
* Revert "Change CsvReadOptions default file_extension to None"
This reverts commit 9f7fcc055cd41e4070d60b8204f331c0987f1d9a.
* Fix `ListingTableFactory::create` using invalid file extension
* Adding test and fix get extension function
* Fix cargo fmt
---
.../core/src/datasource/listing_table_factory.rs | 59 +++++++++++++++++++++-
1 file changed, 57 insertions(+), 2 deletions(-)
diff --git a/datafusion/core/src/datasource/listing_table_factory.rs
b/datafusion/core/src/datasource/listing_table_factory.rs
index 01d8ea6eac..d61235445a 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -17,6 +17,7 @@
//! Factory for creating ListingTables with default options
+use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;
@@ -66,8 +67,7 @@ impl TableProviderFactory for ListingTableFactory {
DataFusionError::Execution(format!("Unknown FileType {}",
cmd.file_type))
})?;
- let file_extension =
-
file_type.get_ext_with_compression(file_compression_type.to_owned())?;
+ let file_extension = get_extension(cmd.location.as_str());
let file_format: Arc<dyn FileFormat> = match file_type {
FileType::CSV => Arc::new(
@@ -164,3 +164,58 @@ impl TableProviderFactory for ListingTableFactory {
Ok(Arc::new(table))
}
}
+
+// Get file extension from path
+fn get_extension(path: &str) -> String {
+ let res = Path::new(path).extension().and_then(|ext| ext.to_str());
+ match res {
+ Some(ext) => format!(".{}", ext),
+ None => "".to_string(),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ use std::collections::HashMap;
+
+ use crate::execution::context::SessionContext;
+ use datafusion_common::parsers::CompressionTypeVariant;
+ use datafusion_common::{DFSchema, OwnedTableReference};
+
+ #[tokio::test]
+ async fn test_create_using_non_std_file_ext() {
+ let csv_file = tempfile::Builder::new()
+ .prefix("foo")
+ .suffix(".tbl")
+ .tempfile()
+ .unwrap();
+
+ let factory = ListingTableFactory::new();
+ let context = SessionContext::new();
+ let state = context.state();
+ let name = OwnedTableReference::bare("foo".to_string());
+ let cmd = CreateExternalTable {
+ name,
+ location: csv_file.path().to_str().unwrap().to_string(),
+ file_type: "csv".to_string(),
+ has_header: true,
+ delimiter: ',',
+ schema: Arc::new(DFSchema::empty()),
+ table_partition_cols: vec![],
+ if_not_exists: false,
+ file_compression_type: CompressionTypeVariant::UNCOMPRESSED,
+ definition: None,
+ order_exprs: vec![],
+ options: HashMap::new(),
+ };
+ let table_provider = factory.create(&state, &cmd).await.unwrap();
+ let listing_table = table_provider
+ .as_any()
+ .downcast_ref::<ListingTable>()
+ .unwrap();
+ let listing_options = listing_table.options();
+ assert_eq!(".tbl", listing_options.file_extension);
+ }
+}