This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 48c82b8d chore(ci): Validate the docs coverage on SQL functions (#652)
48c82b8d is described below
commit 48c82b8dee642adc5ae2e9de5f573d20cffd36a4
Author: Hiroaki Yutani <[email protected]>
AuthorDate: Sat Feb 21 13:42:01 2026 +0900
chore(ci): Validate the docs coverage on SQL functions (#652)
Co-authored-by: Dewey Dunnington <[email protected]>
---
.github/workflows/packaging.yml | 4 +
Cargo.lock | 4 +
ci/scripts/check_sql_function_docs.py | 127 +++++++++++++++++++++
.../sql/{st_line_merge.qmd => rs_bandpath.qmd} | 19 +--
.../sql/{st_geogfromwkt.qmd => rs_setcrs.qmd} | 22 ++--
.../sql/{st_geogfromwkt.qmd => rs_setsrid.qmd} | 20 ++--
docs/reference/sql/st_astext.qmd | 4 +
docs/reference/sql/st_geogfromwkt.qmd | 4 +
docs/reference/sql/st_geomfromwkt.qmd | 1 +
.../{st_geomfromwkt.qmd => st_interiorringn.qmd} | 38 +++---
.../sql/{st_line_merge.qmd => st_linemerge.qmd} | 0
rust/sedona-functions/src/distance.rs | 20 ----
rust/sedona-functions/src/register.rs | 4 -
sedona-cli/Cargo.toml | 4 +
sedona-cli/src/functions.rs | 57 ++++++++-
sedona-cli/src/main.rs | 31 ++++-
16 files changed, 283 insertions(+), 76 deletions(-)
diff --git a/.github/workflows/packaging.yml b/.github/workflows/packaging.yml
index 2316a6ff..7634995e 100644
--- a/.github/workflows/packaging.yml
+++ b/.github/workflows/packaging.yml
@@ -163,6 +163,10 @@ jobs:
run: |
pip install "python/sedonadb/[geopandas]" -v
+ - name: Validate SQL function docs coverage
+ run: |
+ cargo run -p sedona-cli -- list-functions |
ci/scripts/check_sql_function_docs.py -
+
- name: Build documentation
run: |
ci/scripts/build-docs.sh
diff --git a/Cargo.lock b/Cargo.lock
index da4563d2..67e3f4cd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5139,7 +5139,11 @@ dependencies = [
"rustyline",
"sedona",
"sedona-common",
+ "sedona-functions",
+ "sedona-raster-functions",
"sedona-tg",
+ "serde",
+ "serde_json",
"tokio",
]
diff --git a/ci/scripts/check_sql_function_docs.py
b/ci/scripts/check_sql_function_docs.py
new file mode 100755
index 00000000..069b1dca
--- /dev/null
+++ b/ci/scripts/check_sql_function_docs.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+IGNORED_FUNCTIONS = {
+ # Internal/unsupported for public docs
+ "st_geomfromwkbunchecked",
+}
+
+
+def load_functions_from_stream(text):
+ data = json.loads(text)
+ if not isinstance(data, list):
+ raise ValueError("Expected top-level JSON array from list-functions")
+ return data
+
+
+def validate_docs(functions, docs_dir):
+ missing_docs = []
+ missing_alias_mentions = []
+
+ for item in functions:
+ name = item.get("name")
+ aliases = item.get("aliases", [])
+ if not isinstance(name, str):
+ raise ValueError(f"Invalid function item (name): {item!r}")
+ if not isinstance(aliases, list) or not all(
+ isinstance(a, str) for a in aliases
+ ):
+ raise ValueError(f"Invalid function item (aliases): {item!r}")
+ if name in IGNORED_FUNCTIONS:
+ continue
+
+ qmd = docs_dir / f"{name}.qmd"
+ if not qmd.exists():
+ missing_docs.append(name)
+ continue
+
+ if aliases:
+ text = qmd.read_text(encoding="utf-8").lower()
+ for alias in aliases:
+ if alias.lower() not in text:
+ missing_alias_mentions.append(f"{name}:{alias}")
+
+ return missing_docs, missing_alias_mentions
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description=(
+ "Validate that docs/reference/sql has a <function>.qmd for every "
+ "function entry and that aliases are mentioned. Input JSON is "
+ "expected to be emitted by `sedona-cli list-functions`."
+ )
+ )
+ parser.add_argument(
+ "--docs-dir",
+ default="docs/reference/sql",
+ help="Path to SQL docs directory (default: docs/reference/sql)",
+ )
+ parser.add_argument(
+ "functions_json",
+ help=(
+ "Path to JSON file emitted by `sedona-cli list-functions`; "
+ "use '-' to read from stdin"
+ ),
+ )
+ args = parser.parse_args()
+
+ docs_dir = Path(args.docs_dir)
+ if not docs_dir.is_dir():
+ raise ValueError(f"ERROR: docs directory not found: {docs_dir}")
+
+ try:
+ if args.functions_json == "-":
+ raw_json = sys.stdin.read()
+ else:
+ raw_json = Path(args.functions_json).read_text(encoding="utf-8")
+ functions = load_functions_from_stream(raw_json)
+ missing_docs, missing_alias_mentions = validate_docs(functions,
docs_dir)
+ except Exception as e: # noqa: BLE001
+ raise ValueError(f"Validation failed: {e}") from e
+
+ source = "stdin" if args.functions_json == "-" else args.functions_json
+ print(f"Checked {len(functions)} functions from: {source}")
+ print(f"Docs directory: {docs_dir}")
+ print(f"Missing docs: {len(missing_docs)}")
+ print(f"Missing alias mentions: {len(missing_alias_mentions)}")
+
+ if missing_docs:
+ print("\nMissing .qmd files:")
+ for name in missing_docs:
+ print(f"- {name}")
+
+ if missing_alias_mentions:
+ print("\nMissing alias mentions (<function>:<alias>):")
+ for item in missing_alias_mentions:
+ print(f"- {item}")
+
+ if missing_docs or missing_alias_mentions:
+ return 1
+ return 0
+
+
+if __name__ == "__main__":
+ import sys
+
+ sys.exit(main())
diff --git a/docs/reference/sql/st_line_merge.qmd
b/docs/reference/sql/rs_bandpath.qmd
similarity index 68%
copy from docs/reference/sql/st_line_merge.qmd
copy to docs/reference/sql/rs_bandpath.qmd
index 9d2ead33..3eed74f9 100644
--- a/docs/reference/sql/st_line_merge.qmd
+++ b/docs/reference/sql/rs_bandpath.qmd
@@ -16,17 +16,22 @@
# specific language governing permissions and limitations
# under the License.
-title: ST_LineMerge
-description: Merges a collection of potentially connected line segments into
the fewest possible LineStrings.
+title: RS_BandPath
+description: >
+ Retrieves the file path of an out-of-database (out-db) raster band, returning
+ the external raster file location referenced by the raster.
kernels:
- - returns: geometry
- args: [geometry]
+ - returns: utf8
+ args: [raster]
---
+## Description
+
+Primarily used with out-db rasters, where only raster path and geo-referencing
+metadata are stored in the database.
+
## Examples
```sql
-SELECT ST_LineMerge(
- ST_GeomFromWKT('MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))')
-);
+SELECT RS_BandPath(RS_Example());
```
diff --git a/docs/reference/sql/st_geogfromwkt.qmd
b/docs/reference/sql/rs_setcrs.qmd
similarity index 73%
copy from docs/reference/sql/st_geogfromwkt.qmd
copy to docs/reference/sql/rs_setcrs.qmd
index ab2340e3..344ce589 100644
--- a/docs/reference/sql/st_geogfromwkt.qmd
+++ b/docs/reference/sql/rs_setcrs.qmd
@@ -16,23 +16,23 @@
# specific language governing permissions and limitations
# under the License.
-title: ST_GeogFromWKT
-description: Constructs a Geography from WKT.
+title: RS_SetCRS
+description: Sets the Coordinate Reference System (CRS) for a raster.
kernels:
- - returns: geography
+ - returns: raster
args:
- - name: wkt
+ - raster
+ - name: target_crs
type: string
- - returns: geography
- args:
- - name: wkt
- type: string
- - name: srid
- type: integer
---
+## Description
+
+Sets a CRS on a raster. This is metadata-only: raster cell values and
+geotransform are not transformed.
+
## Examples
```sql
-SELECT ST_GeogFromWKT('LINESTRING (1 2, 3 4, 5 6)');
+SELECT RS_SetCRS(RS_Example(), 'EPSG:4326');
```
diff --git a/docs/reference/sql/st_geogfromwkt.qmd
b/docs/reference/sql/rs_setsrid.qmd
similarity index 75%
copy from docs/reference/sql/st_geogfromwkt.qmd
copy to docs/reference/sql/rs_setsrid.qmd
index ab2340e3..f7d61758 100644
--- a/docs/reference/sql/st_geogfromwkt.qmd
+++ b/docs/reference/sql/rs_setsrid.qmd
@@ -16,23 +16,23 @@
# specific language governing permissions and limitations
# under the License.
-title: ST_GeogFromWKT
-description: Constructs a Geography from WKT.
+title: RS_SetSRID
+description: Sets the SRID (spatial reference identifier) for a raster.
kernels:
- - returns: geography
+ - returns: raster
args:
- - name: wkt
- type: string
- - returns: geography
- args:
- - name: wkt
- type: string
+ - raster
- name: srid
type: integer
---
+## Description
+
+Sets the SRID of a raster. This is metadata-only: raster cell values and
+geotransform are not transformed.
+
## Examples
```sql
-SELECT ST_GeogFromWKT('LINESTRING (1 2, 3 4, 5 6)');
+SELECT RS_SetSRID(RS_Example(), 4326);
```
diff --git a/docs/reference/sql/st_astext.qmd b/docs/reference/sql/st_astext.qmd
index 3d05f6dc..185c62fc 100644
--- a/docs/reference/sql/st_astext.qmd
+++ b/docs/reference/sql/st_astext.qmd
@@ -23,6 +23,10 @@ kernels:
args: [geometry]
---
+## Description
+
+Alias: `ST_AsWKT`.
+
## Examples
```sql
diff --git a/docs/reference/sql/st_geogfromwkt.qmd
b/docs/reference/sql/st_geogfromwkt.qmd
index ab2340e3..b4ed2cc5 100644
--- a/docs/reference/sql/st_geogfromwkt.qmd
+++ b/docs/reference/sql/st_geogfromwkt.qmd
@@ -31,6 +31,10 @@ kernels:
type: integer
---
+## Description
+
+Alias: `ST_GeogFromText`.
+
## Examples
```sql
diff --git a/docs/reference/sql/st_geomfromwkt.qmd
b/docs/reference/sql/st_geomfromwkt.qmd
index ddf713a3..4d98ad74 100644
--- a/docs/reference/sql/st_geomfromwkt.qmd
+++ b/docs/reference/sql/st_geomfromwkt.qmd
@@ -34,6 +34,7 @@ kernels:
## Description
An optional SRID or CRS can be provided as a second argument to set the
spatial reference.
+Aliases: `ST_GeomFromText`, `ST_GeometryFromText`.
## Examples
diff --git a/docs/reference/sql/st_geomfromwkt.qmd
b/docs/reference/sql/st_interiorringn.qmd
similarity index 63%
copy from docs/reference/sql/st_geomfromwkt.qmd
copy to docs/reference/sql/st_interiorringn.qmd
index ddf713a3..51648c99 100644
--- a/docs/reference/sql/st_geomfromwkt.qmd
+++ b/docs/reference/sql/st_interiorringn.qmd
@@ -16,37 +16,31 @@
# specific language governing permissions and limitations
# under the License.
-title: ST_GeomFromWKT
-description: Constructs a Geometry from Well-Known Text (WKT).
+title: ST_InteriorRingN
+description: Returns the Nth interior ring of a polygon.
kernels:
- returns: geometry
args:
- - name: wkt
- type: string
- - returns: geometry
- args:
- - name: wkt
- type: string
- - name: srid
- type: crs
+ - geometry
+ - name: n
+ type: integer
---
## Description
-An optional SRID or CRS can be provided as a second argument to set the
spatial reference.
-
-## Examples
-
-```sql
-SELECT ST_AsText(ST_GeomFromWKT('POINT (30 10)'));
-```
+Returns the Nth interior `LINESTRING` ring of a `POLYGON`. Returns `NULL` if
+the geometry is not a polygon or `n` is out of range.
-With an SRID:
+`n` is 1-based in SedonaDB.
-```sql
-SELECT ST_GeomFromWKT('POINT (30 10)', 4326);
-```
+## Examples
```sql
-SELECT ST_GeomFromWKT('POINT (30 10)', 'OGC:CRS27');
+SELECT
+ ST_InteriorRingN(
+ ST_GeomFromText(
+ 'POLYGON((0 0, 0 5, 5 5, 5 0, 0 0), (1 1, 2 1, 2 2, 1 2, 1 1), (1
3, 2 3, 2 4, 1 4, 1 3), (3 3, 4 3, 4 4, 3 4, 3 3))'
+ ),
+ 1
+ );
```
diff --git a/docs/reference/sql/st_line_merge.qmd
b/docs/reference/sql/st_linemerge.qmd
similarity index 100%
rename from docs/reference/sql/st_line_merge.qmd
rename to docs/reference/sql/st_linemerge.qmd
diff --git a/rust/sedona-functions/src/distance.rs
b/rust/sedona-functions/src/distance.rs
index 726fb587..5924b235 100644
--- a/rust/sedona-functions/src/distance.rs
+++ b/rust/sedona-functions/src/distance.rs
@@ -24,31 +24,11 @@ pub fn st_distance_udf() -> SedonaScalarUDF {
distance_stub_udf("ST_Distance")
}
-/// ST_DistanceSphere() scalar UDF stub
-pub fn st_distance_sphere_udf() -> SedonaScalarUDF {
- distance_stub_udf("ST_DistanceSphere")
-}
-
-/// ST_DistanceSpheroid() scalar UDF stub
-pub fn st_distance_spheroid_udf() -> SedonaScalarUDF {
- distance_stub_udf("ST_DistanceSpheroid")
-}
-
/// ST_MaxDistance() scalar UDF stub
pub fn st_max_distance_udf() -> SedonaScalarUDF {
distance_stub_udf("ST_MaxDistance")
}
-/// ST_HausdorffDistance() scalar UDF stub
-pub fn st_hausdorff_distance_udf() -> SedonaScalarUDF {
- distance_stub_udf("ST_HausdorffDistance")
-}
-
-/// ST_FrechetDistance() scalar UDF stub
-pub fn st_frechet_distance_udf() -> SedonaScalarUDF {
- distance_stub_udf("ST_FrechetDistance")
-}
-
pub fn distance_stub_udf(name: &str) -> SedonaScalarUDF {
SedonaScalarUDF::new_stub(
&name.to_lowercase(),
diff --git a/rust/sedona-functions/src/register.rs
b/rust/sedona-functions/src/register.rs
index e53b5c07..b50079e7 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -38,11 +38,7 @@ pub fn default_function_set() -> FunctionSet {
register_scalar_udfs!(
function_set,
- crate::distance::st_distance_sphere_udf,
- crate::distance::st_distance_spheroid_udf,
crate::distance::st_distance_udf,
- crate::distance::st_frechet_distance_udf,
- crate::distance::st_hausdorff_distance_udf,
crate::distance::st_max_distance_udf,
crate::overlay::st_difference_udf,
crate::overlay::st_intersection_udf,
diff --git a/sedona-cli/Cargo.toml b/sedona-cli/Cargo.toml
index c856d481..671c0d5e 100644
--- a/sedona-cli/Cargo.toml
+++ b/sedona-cli/Cargo.toml
@@ -63,5 +63,9 @@ regex = { workspace = true }
rustyline = "15.0"
sedona = { workspace = true, features = ["aws", "gcp", "http", "proj"] }
sedona-common = { workspace = true }
+sedona-functions = { workspace = true }
+sedona-raster-functions = { workspace = true }
sedona-tg = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread",
"sync", "parking_lot", "signal"] }
diff --git a/sedona-cli/src/functions.rs b/sedona-cli/src/functions.rs
index c660c645..62fbcceb 100644
--- a/sedona-cli/src/functions.rs
+++ b/sedona-cli/src/functions.rs
@@ -20,12 +20,15 @@
use std::fmt;
use std::str::FromStr;
use std::sync::Arc;
+use std::{collections::BTreeMap, collections::BTreeSet};
use arrow::array::StringArray;
use arrow::datatypes::{DataType, Field, Schema};
use arrow::record_batch::RecordBatch;
use arrow::util::pretty::pretty_format_batches;
-use datafusion::error::Result;
+use datafusion::error::{DataFusionError, Result};
+use datafusion::logical_expr::{AggregateUDFImpl, ScalarUDFImpl};
+use serde::Serialize;
#[derive(Debug)]
pub enum Function {
@@ -198,3 +201,55 @@ pub fn display_all_functions() -> Result<()> {
println!("{}", pretty_format_batches(&[batch]).unwrap());
Ok(())
}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct FunctionInfo {
+ pub name: String,
+ pub aliases: Vec<String>,
+}
+
+pub fn list_all_functions() -> Vec<FunctionInfo> {
+ let mut function_set = sedona_functions::register::default_function_set();
+
function_set.merge(sedona_raster_functions::register::default_function_set());
+
+ let mut functions = BTreeMap::<String, BTreeSet<String>>::new();
+
+ for function in function_set.scalar_udfs() {
+ if !is_sedona_sql_function(function.name()) {
+ continue;
+ }
+ let entry = functions.entry(function.name().to_string()).or_default();
+ for alias in function.aliases() {
+ if is_sedona_sql_function(alias) {
+ entry.insert(alias.to_string());
+ }
+ }
+ }
+
+ for function in function_set.aggregate_udfs() {
+ if !is_sedona_sql_function(function.name()) {
+ continue;
+ }
+ functions.entry(function.name().to_string()).or_default();
+ }
+
+ functions
+ .into_iter()
+ .map(|(name, aliases)| FunctionInfo {
+ name,
+ aliases: aliases.into_iter().collect(),
+ })
+ .collect()
+}
+
+fn is_sedona_sql_function(name: &str) -> bool {
+ let lower = name.to_ascii_lowercase();
+ lower.starts_with("st_") || lower.starts_with("rs_")
+}
+
+pub fn print_all_functions_json() -> Result<()> {
+ let output = serde_json::to_string_pretty(&list_all_functions())
+ .map_err(|e| DataFusionError::External(Box::new(e)))?;
+ println!("{output}");
+ Ok(())
+}
diff --git a/sedona-cli/src/main.rs b/sedona-cli/src/main.rs
index c0964d84..6dd315ee 100644
--- a/sedona-cli/src/main.rs
+++ b/sedona-cli/src/main.rs
@@ -25,12 +25,13 @@ use sedona::memory_pool::DEFAULT_UNSPILLABLE_RESERVE_RATIO;
use sedona::pool_type::PoolType;
use sedona_cli::{
exec,
+ functions::print_all_functions_json,
print_format::PrintFormat,
print_options::{MaxRows, PrintOptions},
DATAFUSION_CLI_VERSION,
};
-use clap::Parser;
+use clap::{Parser, Subcommand, ValueEnum};
#[cfg(feature = "mimalloc")]
use mimalloc::MiMalloc;
@@ -42,6 +43,9 @@ static GLOBAL: MiMalloc = MiMalloc;
#[derive(Debug, Parser, PartialEq)]
#[clap(author, version, about, long_about= None)]
struct Args {
+ #[command(subcommand)]
+ subcommand: Option<CliSubcommand>,
+
#[clap(
short = 'p',
long,
@@ -122,6 +126,20 @@ struct Args {
color: bool,
}
+#[derive(Debug, Subcommand, PartialEq)]
+enum CliSubcommand {
+ /// List all built-in functions.
+ ListFunctions {
+ #[clap(long, value_enum, default_value_t = FunctionListFormat::Json)]
+ format: FunctionListFormat,
+ },
+}
+
+#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
+enum FunctionListFormat {
+ Json,
+}
+
#[tokio::main]
/// Calls [`main_inner`], then handles printing errors and returning the
correct exit code
pub async fn main() -> ExitCode {
@@ -149,6 +167,17 @@ async fn main_inner() -> Result<()> {
let args = Args::parse();
+ if let Some(subcommand) = args.subcommand.as_ref() {
+ match subcommand {
+ CliSubcommand::ListFunctions {
+ format: FunctionListFormat::Json,
+ } => {
+ print_all_functions_json()?;
+ return Ok(());
+ }
+ }
+ }
+
if !args.quiet {
println!("Sedona CLI v{DATAFUSION_CLI_VERSION}");
}