This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new d94be88538 Fix `CREATE EXTERNAL TABLE` doesn't work with non-standard 
file ext (#6274)
d94be88538 is described below

commit d94be885380e8d1dc8e18773e588a25aa0ad409f
Author: Armin Primadi <[email protected]>
AuthorDate: Tue May 9 16:56:14 2023 +0700

    Fix `CREATE EXTERNAL TABLE` doesn't work with non-standard file ext (#6274)
    
    * Change CsvReadOptions default file_extension to None
    
    * Revert "Change CsvReadOptions default file_extension to None"
    
    This reverts commit 9f7fcc055cd41e4070d60b8204f331c0987f1d9a.
    
    * Fix `ListingTableFactory::create` using invalid file extension
    
    * Adding test and fix get extension function
    
    * Fix cargo fmt
---
 .../core/src/datasource/listing_table_factory.rs   | 59 +++++++++++++++++++++-
 1 file changed, 57 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/src/datasource/listing_table_factory.rs 
b/datafusion/core/src/datasource/listing_table_factory.rs
index 01d8ea6eac..d61235445a 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -17,6 +17,7 @@
 
 //! Factory for creating ListingTables with default options
 
+use std::path::Path;
 use std::str::FromStr;
 use std::sync::Arc;
 
@@ -66,8 +67,7 @@ impl TableProviderFactory for ListingTableFactory {
             DataFusionError::Execution(format!("Unknown FileType {}", 
cmd.file_type))
         })?;
 
-        let file_extension =
-            
file_type.get_ext_with_compression(file_compression_type.to_owned())?;
+        let file_extension = get_extension(cmd.location.as_str());
 
         let file_format: Arc<dyn FileFormat> = match file_type {
             FileType::CSV => Arc::new(
@@ -164,3 +164,58 @@ impl TableProviderFactory for ListingTableFactory {
         Ok(Arc::new(table))
     }
 }
+
+// Get file extension from path
+fn get_extension(path: &str) -> String {
+    let res = Path::new(path).extension().and_then(|ext| ext.to_str());
+    match res {
+        Some(ext) => format!(".{}", ext),
+        None => "".to_string(),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::collections::HashMap;
+
+    use crate::execution::context::SessionContext;
+    use datafusion_common::parsers::CompressionTypeVariant;
+    use datafusion_common::{DFSchema, OwnedTableReference};
+
+    #[tokio::test]
+    async fn test_create_using_non_std_file_ext() {
+        let csv_file = tempfile::Builder::new()
+            .prefix("foo")
+            .suffix(".tbl")
+            .tempfile()
+            .unwrap();
+
+        let factory = ListingTableFactory::new();
+        let context = SessionContext::new();
+        let state = context.state();
+        let name = OwnedTableReference::bare("foo".to_string());
+        let cmd = CreateExternalTable {
+            name,
+            location: csv_file.path().to_str().unwrap().to_string(),
+            file_type: "csv".to_string(),
+            has_header: true,
+            delimiter: ',',
+            schema: Arc::new(DFSchema::empty()),
+            table_partition_cols: vec![],
+            if_not_exists: false,
+            file_compression_type: CompressionTypeVariant::UNCOMPRESSED,
+            definition: None,
+            order_exprs: vec![],
+            options: HashMap::new(),
+        };
+        let table_provider = factory.create(&state, &cmd).await.unwrap();
+        let listing_table = table_provider
+            .as_any()
+            .downcast_ref::<ListingTable>()
+            .unwrap();
+        let listing_options = listing_table.options();
+        assert_eq!(".tbl", listing_options.file_extension);
+    }
+}

Reply via email to