This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-rust.git
The following commit(s) were added to refs/heads/main by this push:
new df6074a feat(datafusion): auto-register built-in table functions on
catalog registration (#324)
df6074a is described below
commit df6074a4267e605e00d89d02cd837b5b54063b65
Author: shyjsarah <[email protected]>
AuthorDate: Tue May 19 00:31:42 2026 -0700
feat(datafusion): auto-register built-in table functions on catalog
registration (#324)
---
bindings/python/Cargo.toml | 2 +-
bindings/python/tests/test_datafusion.py | 17 +++++++++++++++++
crates/integrations/datafusion/src/sql_context.rs | 14 ++++++++++++++
docs/src/sql.md | 10 +++++++++-
4 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml
index 6ed2406..0c3b487 100644
--- a/bindings/python/Cargo.toml
+++ b/bindings/python/Cargo.toml
@@ -31,6 +31,6 @@ arrow = { workspace = true, features = ["pyarrow"] }
datafusion = { workspace = true }
datafusion-ffi = { workspace = true }
paimon = { path = "../../crates/paimon", features = ["storage-all"] }
-paimon-datafusion = { path = "../../crates/integrations/datafusion" }
+paimon-datafusion = { path = "../../crates/integrations/datafusion", features
= ["fulltext"] }
pyo3 = { version = "0.28", features = ["abi3-py310"] }
tokio = { workspace = true }
diff --git a/bindings/python/tests/test_datafusion.py
b/bindings/python/tests/test_datafusion.py
index 5e4e5e9..2576b7c 100644
--- a/bindings/python/tests/test_datafusion.py
+++ b/bindings/python/tests/test_datafusion.py
@@ -177,3 +177,20 @@ def test_register_batch_invalid_catalog():
assert False, "Expected an error for unknown catalog"
except Exception as e:
assert "unknown_catalog" in str(e).lower() or "not a paimon" in
str(e).lower() or "unknown" in str(e).lower()
+
+
+def test_table_functions_registered_with_catalog():
+ """register_catalog auto-registers vector_search / full_text_search as
+ UDTFs. Calling one with the wrong argument count surfaces the function's
+ own validation error, which proves it is registered — an unregistered
+ name would instead fail with 'table function not found'."""
+ with tempfile.TemporaryDirectory() as warehouse:
+ ctx = SQLContext()
+ ctx.register_catalog("paimon", {"warehouse": warehouse})
+
+ for fn in ("vector_search", "full_text_search"):
+ try:
+ ctx.sql(f"SELECT * FROM {fn}('only_one_arg')")
+ assert False, f"expected {fn} to reject a single argument"
+ except Exception as e:
+ assert "requires 4 arguments" in str(e), str(e)
diff --git a/crates/integrations/datafusion/src/sql_context.rs
b/crates/integrations/datafusion/src/sql_context.rs
index 973e263..f77b79e 100644
--- a/crates/integrations/datafusion/src/sql_context.rs
+++ b/crates/integrations/datafusion/src/sql_context.rs
@@ -136,6 +136,7 @@ impl SQLContext {
self.dynamic_options.clone(),
)),
);
+ register_table_functions(&self.ctx, &catalog, default_db);
self.catalogs.insert(catalog_name.clone(), catalog);
if is_first {
self.set_current_catalog(catalog_name).await?;
@@ -2302,6 +2303,19 @@ fn ok_result(ctx: &SessionContext) ->
DFResult<DataFrame> {
Ok(df)
}
+/// Registers the built-in table-valued functions against `catalog` so they can
+/// be used in SQL without any extra setup call. Called for every catalog
+/// registered on the context; add new built-in table functions here.
+fn register_table_functions(
+ ctx: &SessionContext,
+ catalog: &Arc<dyn Catalog>,
+ default_database: &str,
+) {
+ crate::vector_search::register_vector_search(ctx, Arc::clone(catalog),
default_database);
+ #[cfg(feature = "fulltext")]
+ crate::full_text_search::register_full_text_search(ctx,
Arc::clone(catalog), default_database);
+}
+
#[cfg(test)]
mod tests {
use super::*;
diff --git a/docs/src/sql.md b/docs/src/sql.md
index 94eef71..325d236 100644
--- a/docs/src/sql.md
+++ b/docs/src/sql.md
@@ -53,7 +53,7 @@ async fn example() -> Result<(), Box<dyn std::error::Error>> {
}
```
-`SQLContext::new` creates a session context with the Paimon relation planner
pre-registered. Use `register_catalog` to add one or more Paimon catalogs. It
also manages session-scoped dynamic options internally for `SET`/`RESET`
support.
+`SQLContext::new` creates a session context with the Paimon relation planner
pre-registered. Use `register_catalog` to add one or more Paimon catalogs;
registering a catalog also registers the built-in table-valued functions
(`vector_search`, `full_text_search`) against it. It also manages
session-scoped dynamic options internally for `SET`/`RESET` support.
## Data Types
@@ -445,6 +445,10 @@ Paimon supports approximate nearest neighbor (ANN) vector
search via the Lumina
### Registration
+When you use a `SQLContext`, `vector_search` is registered automatically for
every catalog you register — no extra setup is needed.
+
+With a raw DataFusion `SessionContext`, register it explicitly:
+
```rust
use paimon_datafusion::register_vector_search;
@@ -510,6 +514,10 @@ paimon-datafusion = { version = "0.1.0", features =
["fulltext"] }
### Registration
+When you use a `SQLContext`, `full_text_search` is registered automatically
for every catalog you register (when the `fulltext` feature is enabled) — no
extra setup is needed.
+
+With a raw DataFusion `SessionContext`, register it explicitly:
+
```rust
use paimon_datafusion::register_full_text_search;