This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new df6074a  feat(datafusion): auto-register built-in table functions on 
catalog registration (#324)
df6074a is described below

commit df6074a4267e605e00d89d02cd837b5b54063b65
Author: shyjsarah <[email protected]>
AuthorDate: Tue May 19 00:31:42 2026 -0700

    feat(datafusion): auto-register built-in table functions on catalog 
registration (#324)
---
 bindings/python/Cargo.toml                        |  2 +-
 bindings/python/tests/test_datafusion.py          | 17 +++++++++++++++++
 crates/integrations/datafusion/src/sql_context.rs | 14 ++++++++++++++
 docs/src/sql.md                                   | 10 +++++++++-
 4 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml
index 6ed2406..0c3b487 100644
--- a/bindings/python/Cargo.toml
+++ b/bindings/python/Cargo.toml
@@ -31,6 +31,6 @@ arrow = { workspace = true, features = ["pyarrow"] }
 datafusion = { workspace = true }
 datafusion-ffi = { workspace = true }
 paimon = { path = "../../crates/paimon", features = ["storage-all"] }
-paimon-datafusion = { path = "../../crates/integrations/datafusion" }
+paimon-datafusion = { path = "../../crates/integrations/datafusion", features 
= ["fulltext"] }
 pyo3 = { version = "0.28", features = ["abi3-py310"] }
 tokio = { workspace = true }
diff --git a/bindings/python/tests/test_datafusion.py 
b/bindings/python/tests/test_datafusion.py
index 5e4e5e9..2576b7c 100644
--- a/bindings/python/tests/test_datafusion.py
+++ b/bindings/python/tests/test_datafusion.py
@@ -177,3 +177,20 @@ def test_register_batch_invalid_catalog():
             assert False, "Expected an error for unknown catalog"
         except Exception as e:
             assert "unknown_catalog" in str(e).lower() or "not a paimon" in 
str(e).lower() or "unknown" in str(e).lower()
+
+
+def test_table_functions_registered_with_catalog():
+    """register_catalog auto-registers vector_search / full_text_search as
+    UDTFs. Calling one with the wrong argument count surfaces the function's
+    own validation error, which proves it is registered — an unregistered
+    name would instead fail with 'table function not found'."""
+    with tempfile.TemporaryDirectory() as warehouse:
+        ctx = SQLContext()
+        ctx.register_catalog("paimon", {"warehouse": warehouse})
+
+        for fn in ("vector_search", "full_text_search"):
+            try:
+                ctx.sql(f"SELECT * FROM {fn}('only_one_arg')")
+                assert False, f"expected {fn} to reject a single argument"
+            except Exception as e:
+                assert "requires 4 arguments" in str(e), str(e)
diff --git a/crates/integrations/datafusion/src/sql_context.rs 
b/crates/integrations/datafusion/src/sql_context.rs
index 973e263..f77b79e 100644
--- a/crates/integrations/datafusion/src/sql_context.rs
+++ b/crates/integrations/datafusion/src/sql_context.rs
@@ -136,6 +136,7 @@ impl SQLContext {
                 self.dynamic_options.clone(),
             )),
         );
+        register_table_functions(&self.ctx, &catalog, default_db);
         self.catalogs.insert(catalog_name.clone(), catalog);
         if is_first {
             self.set_current_catalog(catalog_name).await?;
@@ -2302,6 +2303,19 @@ fn ok_result(ctx: &SessionContext) -> 
DFResult<DataFrame> {
     Ok(df)
 }
 
+/// Registers the built-in table-valued functions against `catalog` so they can
+/// be used in SQL without any extra setup call. Called for every catalog
+/// registered on the context; add new built-in table functions here.
+fn register_table_functions(
+    ctx: &SessionContext,
+    catalog: &Arc<dyn Catalog>,
+    default_database: &str,
+) {
+    crate::vector_search::register_vector_search(ctx, Arc::clone(catalog), 
default_database);
+    #[cfg(feature = "fulltext")]
+    crate::full_text_search::register_full_text_search(ctx, 
Arc::clone(catalog), default_database);
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/docs/src/sql.md b/docs/src/sql.md
index 94eef71..325d236 100644
--- a/docs/src/sql.md
+++ b/docs/src/sql.md
@@ -53,7 +53,7 @@ async fn example() -> Result<(), Box<dyn std::error::Error>> {
 }
 ```
 
-`SQLContext::new` creates a session context with the Paimon relation planner 
pre-registered. Use `register_catalog` to add one or more Paimon catalogs. It 
also manages session-scoped dynamic options internally for `SET`/`RESET` 
support.
+`SQLContext::new` creates a session context with the Paimon relation planner 
pre-registered. Use `register_catalog` to add one or more Paimon catalogs; 
registering a catalog also registers the built-in table-valued functions 
(`vector_search`, `full_text_search`) against it. It also manages 
session-scoped dynamic options internally for `SET`/`RESET` support.
 
 ## Data Types
 
@@ -445,6 +445,10 @@ Paimon supports approximate nearest neighbor (ANN) vector 
search via the Lumina
 
 ### Registration
 
+When you use a `SQLContext`, `vector_search` is registered automatically for 
every catalog you register — no extra setup is needed.
+
+With a raw DataFusion `SessionContext`, register it explicitly:
+
 ```rust
 use paimon_datafusion::register_vector_search;
 
@@ -510,6 +514,10 @@ paimon-datafusion = { version = "0.1.0", features = 
["fulltext"] }
 
 ### Registration
 
+When you use a `SQLContext`, `full_text_search` is registered automatically 
for every catalog you register (when the `fulltext` feature is enabled) — no 
extra setup is needed.
+
+With a raw DataFusion `SessionContext`, register it explicitly:
+
 ```rust
 use paimon_datafusion::register_full_text_search;
 

Reply via email to