This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 4c3b8474c7 feat: add SchemaProvider::table_type(table_name: &str) (#16401) 4c3b8474c7 is described below commit 4c3b8474c7dbcf04e056ebe2a28a942b805bef2a Author: epgif <e...@influxdata.com> AuthorDate: Wed Jun 18 16:09:48 2025 -0500 feat: add SchemaProvider::table_type(table_name: &str) (#16401) * feat: add SchemaProvider::table_type(table_name: &str) InformationSchemaConfig::make_tables only needs the TableType not the whole TableProvider, and the former may require an expensive catalog operation to construct and the latter may not. This allows avoiding `SELECT * FROM information_schema.tables` having to make 1 of those potentially expensive operations per table. * test: new InformationSchemaConfig::make_tables behavior * Move tests to same file to fix CI --------- Co-authored-by: Andrew Lamb <and...@nerdnetworks.org> --- datafusion/catalog/src/information_schema.rs | 95 +++++++++++++++++++++++++++- datafusion/catalog/src/schema.rs | 9 +++ 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs index 057d1a8198..83b6d64ef4 100644 --- a/datafusion/catalog/src/information_schema.rs +++ b/datafusion/catalog/src/information_schema.rs @@ -103,12 +103,14 @@ impl InformationSchemaConfig { // schema name may not exist in the catalog, so we need to check if let Some(schema) = catalog.schema(&schema_name) { for table_name in schema.table_names() { - if let Some(table) = schema.table(&table_name).await? { + if let Some(table_type) = + schema.table_type(&table_name).await? + { builder.add_table( &catalog_name, &schema_name, &table_name, - table.table_type(), + table_type, ); } } @@ -1359,3 +1361,92 @@ impl PartitionStream for InformationSchemaParameters { )) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::CatalogProvider; + + #[tokio::test] + async fn make_tables_uses_table_type() { + let config = InformationSchemaConfig { + catalog_list: Arc::new(Fixture), + }; + let mut builder = InformationSchemaTablesBuilder { + catalog_names: StringBuilder::new(), + schema_names: StringBuilder::new(), + table_names: StringBuilder::new(), + table_types: StringBuilder::new(), + schema: Arc::new(Schema::empty()), + }; + + assert!(config.make_tables(&mut builder).await.is_ok()); + + assert_eq!("BASE TABLE", builder.table_types.finish().value(0)); + } + + #[derive(Debug)] + struct Fixture; + + #[async_trait] + impl SchemaProvider for Fixture { + // InformationSchemaConfig::make_tables should use this. + async fn table_type(&self, _: &str) -> Result<Option<TableType>> { + Ok(Some(TableType::Base)) + } + + // InformationSchemaConfig::make_tables used this before `table_type` + // existed but should not, as it may be expensive. + async fn table(&self, _: &str) -> Result<Option<Arc<dyn TableProvider>>> { + panic!("InformationSchemaConfig::make_tables called SchemaProvider::table instead of table_type") + } + + fn as_any(&self) -> &dyn Any { + unimplemented!("not required for these tests") + } + + fn table_names(&self) -> Vec<String> { + vec!["atable".to_string()] + } + + fn table_exist(&self, _: &str) -> bool { + unimplemented!("not required for these tests") + } + } + + impl CatalogProviderList for Fixture { + fn as_any(&self) -> &dyn Any { + unimplemented!("not required for these tests") + } + + fn register_catalog( + &self, + _: String, + _: Arc<dyn CatalogProvider>, + ) -> Option<Arc<dyn CatalogProvider>> { + unimplemented!("not required for these tests") + } + + fn catalog_names(&self) -> Vec<String> { + vec!["acatalog".to_string()] + } + + fn catalog(&self, _: &str) -> Option<Arc<dyn CatalogProvider>> { + Some(Arc::new(Self)) + } + } + + impl CatalogProvider for Fixture { + fn as_any(&self) -> &dyn Any { + unimplemented!("not required for these tests") + } + + fn schema_names(&self) -> Vec<String> { + vec!["aschema".to_string()] + } + + fn schema(&self, _: &str) -> Option<Arc<dyn SchemaProvider>> { + Some(Arc::new(Self)) + } + } +} diff --git a/datafusion/catalog/src/schema.rs b/datafusion/catalog/src/schema.rs index 5b37348fd7..9ba55256f1 100644 --- a/datafusion/catalog/src/schema.rs +++ b/datafusion/catalog/src/schema.rs @@ -26,6 +26,7 @@ use std::sync::Arc; use crate::table::TableProvider; use datafusion_common::Result; +use datafusion_expr::TableType; /// Represents a schema, comprising a number of named tables. /// @@ -54,6 +55,14 @@ pub trait SchemaProvider: Debug + Sync + Send { name: &str, ) -> Result<Option<Arc<dyn TableProvider>>, DataFusionError>; + /// Retrieves the type of a specific table from the schema by name, if it exists, otherwise + /// returns `None`. Implementations for which this operation is cheap but [Self::table] is + /// expensive can override this to improve operations that only need the type, e.g. + /// `SELECT * FROM information_schema.tables`. + async fn table_type(&self, name: &str) -> Result<Option<TableType>> { + self.table(name).await.map(|o| o.map(|t| t.table_type())) + } + /// If supported by the implementation, adds a new table named `name` to /// this schema. /// --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org