This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 4c3b8474c7 feat:  add SchemaProvider::table_type(table_name: &str) 
(#16401)
4c3b8474c7 is described below

commit 4c3b8474c7dbcf04e056ebe2a28a942b805bef2a
Author: epgif <e...@influxdata.com>
AuthorDate: Wed Jun 18 16:09:48 2025 -0500

    feat:  add SchemaProvider::table_type(table_name: &str) (#16401)
    
    * feat:  add SchemaProvider::table_type(table_name: &str)
    
    InformationSchemaConfig::make_tables only needs the TableType not the
    whole TableProvider, and the former may require an expensive catalog
    operation to construct and the latter may not.
    
    This allows avoiding `SELECT * FROM information_schema.tables` having to
    make 1 of those potentially expensive operations per table.
    
    * test:  new InformationSchemaConfig::make_tables behavior
    
    * Move tests to same file to fix CI
    
    ---------
    
    Co-authored-by: Andrew Lamb <and...@nerdnetworks.org>
---
 datafusion/catalog/src/information_schema.rs | 95 +++++++++++++++++++++++++++-
 datafusion/catalog/src/schema.rs             |  9 +++
 2 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/datafusion/catalog/src/information_schema.rs 
b/datafusion/catalog/src/information_schema.rs
index 057d1a8198..83b6d64ef4 100644
--- a/datafusion/catalog/src/information_schema.rs
+++ b/datafusion/catalog/src/information_schema.rs
@@ -103,12 +103,14 @@ impl InformationSchemaConfig {
                     // schema name may not exist in the catalog, so we need to 
check
                     if let Some(schema) = catalog.schema(&schema_name) {
                         for table_name in schema.table_names() {
-                            if let Some(table) = 
schema.table(&table_name).await? {
+                            if let Some(table_type) =
+                                schema.table_type(&table_name).await?
+                            {
                                 builder.add_table(
                                     &catalog_name,
                                     &schema_name,
                                     &table_name,
-                                    table.table_type(),
+                                    table_type,
                                 );
                             }
                         }
@@ -1359,3 +1361,92 @@ impl PartitionStream for InformationSchemaParameters {
         ))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::CatalogProvider;
+
+    #[tokio::test]
+    async fn make_tables_uses_table_type() {
+        let config = InformationSchemaConfig {
+            catalog_list: Arc::new(Fixture),
+        };
+        let mut builder = InformationSchemaTablesBuilder {
+            catalog_names: StringBuilder::new(),
+            schema_names: StringBuilder::new(),
+            table_names: StringBuilder::new(),
+            table_types: StringBuilder::new(),
+            schema: Arc::new(Schema::empty()),
+        };
+
+        assert!(config.make_tables(&mut builder).await.is_ok());
+
+        assert_eq!("BASE TABLE", builder.table_types.finish().value(0));
+    }
+
+    #[derive(Debug)]
+    struct Fixture;
+
+    #[async_trait]
+    impl SchemaProvider for Fixture {
+        // InformationSchemaConfig::make_tables should use this.
+        async fn table_type(&self, _: &str) -> Result<Option<TableType>> {
+            Ok(Some(TableType::Base))
+        }
+
+        // InformationSchemaConfig::make_tables used this before `table_type`
+        // existed but should not, as it may be expensive.
+        async fn table(&self, _: &str) -> Result<Option<Arc<dyn 
TableProvider>>> {
+            panic!("InformationSchemaConfig::make_tables called 
SchemaProvider::table instead of table_type")
+        }
+
+        fn as_any(&self) -> &dyn Any {
+            unimplemented!("not required for these tests")
+        }
+
+        fn table_names(&self) -> Vec<String> {
+            vec!["atable".to_string()]
+        }
+
+        fn table_exist(&self, _: &str) -> bool {
+            unimplemented!("not required for these tests")
+        }
+    }
+
+    impl CatalogProviderList for Fixture {
+        fn as_any(&self) -> &dyn Any {
+            unimplemented!("not required for these tests")
+        }
+
+        fn register_catalog(
+            &self,
+            _: String,
+            _: Arc<dyn CatalogProvider>,
+        ) -> Option<Arc<dyn CatalogProvider>> {
+            unimplemented!("not required for these tests")
+        }
+
+        fn catalog_names(&self) -> Vec<String> {
+            vec!["acatalog".to_string()]
+        }
+
+        fn catalog(&self, _: &str) -> Option<Arc<dyn CatalogProvider>> {
+            Some(Arc::new(Self))
+        }
+    }
+
+    impl CatalogProvider for Fixture {
+        fn as_any(&self) -> &dyn Any {
+            unimplemented!("not required for these tests")
+        }
+
+        fn schema_names(&self) -> Vec<String> {
+            vec!["aschema".to_string()]
+        }
+
+        fn schema(&self, _: &str) -> Option<Arc<dyn SchemaProvider>> {
+            Some(Arc::new(Self))
+        }
+    }
+}
diff --git a/datafusion/catalog/src/schema.rs b/datafusion/catalog/src/schema.rs
index 5b37348fd7..9ba55256f1 100644
--- a/datafusion/catalog/src/schema.rs
+++ b/datafusion/catalog/src/schema.rs
@@ -26,6 +26,7 @@ use std::sync::Arc;
 
 use crate::table::TableProvider;
 use datafusion_common::Result;
+use datafusion_expr::TableType;
 
 /// Represents a schema, comprising a number of named tables.
 ///
@@ -54,6 +55,14 @@ pub trait SchemaProvider: Debug + Sync + Send {
         name: &str,
     ) -> Result<Option<Arc<dyn TableProvider>>, DataFusionError>;
 
+    /// Retrieves the type of a specific table from the schema by name, if it 
exists, otherwise
+    /// returns `None`.  Implementations for which this operation is cheap but 
[Self::table] is
+    /// expensive can override this to improve operations that only need the 
type, e.g.
+    /// `SELECT * FROM information_schema.tables`.
+    async fn table_type(&self, name: &str) -> Result<Option<TableType>> {
+        self.table(name).await.map(|o| o.map(|t| t.table_type()))
+    }
+
     /// If supported by the implementation, adds a new table named `name` to
     /// this schema.
     ///


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to