This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 48c82b8d chore(ci): Validate the docs coverage on SQL functions (#652)
48c82b8d is described below

commit 48c82b8dee642adc5ae2e9de5f573d20cffd36a4
Author: Hiroaki Yutani <[email protected]>
AuthorDate: Sat Feb 21 13:42:01 2026 +0900

    chore(ci): Validate the docs coverage on SQL functions (#652)
    
    Co-authored-by: Dewey Dunnington <[email protected]>
---
 .github/workflows/packaging.yml                    |   4 +
 Cargo.lock                                         |   4 +
 ci/scripts/check_sql_function_docs.py              | 127 +++++++++++++++++++++
 .../sql/{st_line_merge.qmd => rs_bandpath.qmd}     |  19 +--
 .../sql/{st_geogfromwkt.qmd => rs_setcrs.qmd}      |  22 ++--
 .../sql/{st_geogfromwkt.qmd => rs_setsrid.qmd}     |  20 ++--
 docs/reference/sql/st_astext.qmd                   |   4 +
 docs/reference/sql/st_geogfromwkt.qmd              |   4 +
 docs/reference/sql/st_geomfromwkt.qmd              |   1 +
 .../{st_geomfromwkt.qmd => st_interiorringn.qmd}   |  38 +++---
 .../sql/{st_line_merge.qmd => st_linemerge.qmd}    |   0
 rust/sedona-functions/src/distance.rs              |  20 ----
 rust/sedona-functions/src/register.rs              |   4 -
 sedona-cli/Cargo.toml                              |   4 +
 sedona-cli/src/functions.rs                        |  57 ++++++++-
 sedona-cli/src/main.rs                             |  31 ++++-
 16 files changed, 283 insertions(+), 76 deletions(-)

diff --git a/.github/workflows/packaging.yml b/.github/workflows/packaging.yml
index 2316a6ff..7634995e 100644
--- a/.github/workflows/packaging.yml
+++ b/.github/workflows/packaging.yml
@@ -163,6 +163,10 @@ jobs:
         run: |
             pip install "python/sedonadb/[geopandas]" -v
 
+      - name: Validate SQL function docs coverage
+        run: |
+          cargo run -p sedona-cli -- list-functions | 
ci/scripts/check_sql_function_docs.py -
+
       - name: Build documentation
         run: |
           ci/scripts/build-docs.sh
diff --git a/Cargo.lock b/Cargo.lock
index da4563d2..67e3f4cd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5139,7 +5139,11 @@ dependencies = [
  "rustyline",
  "sedona",
  "sedona-common",
+ "sedona-functions",
+ "sedona-raster-functions",
  "sedona-tg",
+ "serde",
+ "serde_json",
  "tokio",
 ]
 
diff --git a/ci/scripts/check_sql_function_docs.py 
b/ci/scripts/check_sql_function_docs.py
new file mode 100755
index 00000000..069b1dca
--- /dev/null
+++ b/ci/scripts/check_sql_function_docs.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+IGNORED_FUNCTIONS = {
+    # Internal/unsupported for public docs
+    "st_geomfromwkbunchecked",
+}
+
+
+def load_functions_from_stream(text):
+    data = json.loads(text)
+    if not isinstance(data, list):
+        raise ValueError("Expected top-level JSON array from list-functions")
+    return data
+
+
+def validate_docs(functions, docs_dir):
+    missing_docs = []
+    missing_alias_mentions = []
+
+    for item in functions:
+        name = item.get("name")
+        aliases = item.get("aliases", [])
+        if not isinstance(name, str):
+            raise ValueError(f"Invalid function item (name): {item!r}")
+        if not isinstance(aliases, list) or not all(
+            isinstance(a, str) for a in aliases
+        ):
+            raise ValueError(f"Invalid function item (aliases): {item!r}")
+        if name in IGNORED_FUNCTIONS:
+            continue
+
+        qmd = docs_dir / f"{name}.qmd"
+        if not qmd.exists():
+            missing_docs.append(name)
+            continue
+
+        if aliases:
+            text = qmd.read_text(encoding="utf-8").lower()
+            for alias in aliases:
+                if alias.lower() not in text:
+                    missing_alias_mentions.append(f"{name}:{alias}")
+
+    return missing_docs, missing_alias_mentions
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=(
+            "Validate that docs/reference/sql has a <function>.qmd for every "
+            "function entry and that aliases are mentioned. Input JSON is "
+            "expected to be emitted by `sedona-cli list-functions`."
+        )
+    )
+    parser.add_argument(
+        "--docs-dir",
+        default="docs/reference/sql",
+        help="Path to SQL docs directory (default: docs/reference/sql)",
+    )
+    parser.add_argument(
+        "functions_json",
+        help=(
+            "Path to JSON file emitted by `sedona-cli list-functions`; "
+            "use '-' to read from stdin"
+        ),
+    )
+    args = parser.parse_args()
+
+    docs_dir = Path(args.docs_dir)
+    if not docs_dir.is_dir():
+        raise ValueError(f"ERROR: docs directory not found: {docs_dir}")
+
+    try:
+        if args.functions_json == "-":
+            raw_json = sys.stdin.read()
+        else:
+            raw_json = Path(args.functions_json).read_text(encoding="utf-8")
+        functions = load_functions_from_stream(raw_json)
+        missing_docs, missing_alias_mentions = validate_docs(functions, 
docs_dir)
+    except Exception as e:  # noqa: BLE001
+        raise ValueError(f"Validation failed: {e}") from e
+
+    source = "stdin" if args.functions_json == "-" else args.functions_json
+    print(f"Checked {len(functions)} functions from: {source}")
+    print(f"Docs directory: {docs_dir}")
+    print(f"Missing docs: {len(missing_docs)}")
+    print(f"Missing alias mentions: {len(missing_alias_mentions)}")
+
+    if missing_docs:
+        print("\nMissing .qmd files:")
+        for name in missing_docs:
+            print(f"- {name}")
+
+    if missing_alias_mentions:
+        print("\nMissing alias mentions (<function>:<alias>):")
+        for item in missing_alias_mentions:
+            print(f"- {item}")
+
+    if missing_docs or missing_alias_mentions:
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    import sys
+
+    sys.exit(main())
diff --git a/docs/reference/sql/st_line_merge.qmd 
b/docs/reference/sql/rs_bandpath.qmd
similarity index 68%
copy from docs/reference/sql/st_line_merge.qmd
copy to docs/reference/sql/rs_bandpath.qmd
index 9d2ead33..3eed74f9 100644
--- a/docs/reference/sql/st_line_merge.qmd
+++ b/docs/reference/sql/rs_bandpath.qmd
@@ -16,17 +16,22 @@
 # specific language governing permissions and limitations
 # under the License.
 
-title: ST_LineMerge
-description: Merges a collection of potentially connected line segments into 
the fewest possible LineStrings.
+title: RS_BandPath
+description: >
+  Retrieves the file path of an out-of-database (out-db) raster band, returning
+  the external raster file location referenced by the raster.
 kernels:
-  - returns: geometry
-    args: [geometry]
+  - returns: utf8
+    args: [raster]
 ---
 
+## Description
+
+Primarily used with out-db rasters, where only raster path and geo-referencing
+metadata are stored in the database.
+
 ## Examples
 
 ```sql
-SELECT ST_LineMerge(
-    ST_GeomFromWKT('MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))')
-);
+SELECT RS_BandPath(RS_Example());
 ```
diff --git a/docs/reference/sql/st_geogfromwkt.qmd 
b/docs/reference/sql/rs_setcrs.qmd
similarity index 73%
copy from docs/reference/sql/st_geogfromwkt.qmd
copy to docs/reference/sql/rs_setcrs.qmd
index ab2340e3..344ce589 100644
--- a/docs/reference/sql/st_geogfromwkt.qmd
+++ b/docs/reference/sql/rs_setcrs.qmd
@@ -16,23 +16,23 @@
 # specific language governing permissions and limitations
 # under the License.
 
-title: ST_GeogFromWKT
-description: Constructs a Geography from WKT.
+title: RS_SetCRS
+description: Sets the Coordinate Reference System (CRS) for a raster.
 kernels:
-  - returns: geography
+  - returns: raster
     args:
-    - name: wkt
+    - raster
+    - name: target_crs
       type: string
-  - returns: geography
-    args:
-    - name: wkt
-      type: string
-    - name: srid
-      type: integer
 ---
 
+## Description
+
+Sets a CRS on a raster. This is metadata-only: raster cell values and
+geotransform are not transformed.
+
 ## Examples
 
 ```sql
-SELECT ST_GeogFromWKT('LINESTRING (1 2, 3 4, 5 6)');
+SELECT RS_SetCRS(RS_Example(), 'EPSG:4326');
 ```
diff --git a/docs/reference/sql/st_geogfromwkt.qmd 
b/docs/reference/sql/rs_setsrid.qmd
similarity index 75%
copy from docs/reference/sql/st_geogfromwkt.qmd
copy to docs/reference/sql/rs_setsrid.qmd
index ab2340e3..f7d61758 100644
--- a/docs/reference/sql/st_geogfromwkt.qmd
+++ b/docs/reference/sql/rs_setsrid.qmd
@@ -16,23 +16,23 @@
 # specific language governing permissions and limitations
 # under the License.
 
-title: ST_GeogFromWKT
-description: Constructs a Geography from WKT.
+title: RS_SetSRID
+description: Sets the SRID (spatial reference identifier) for a raster.
 kernels:
-  - returns: geography
+  - returns: raster
     args:
-    - name: wkt
-      type: string
-  - returns: geography
-    args:
-    - name: wkt
-      type: string
+    - raster
     - name: srid
       type: integer
 ---
 
+## Description
+
+Sets the SRID of a raster. This is metadata-only: raster cell values and
+geotransform are not transformed.
+
 ## Examples
 
 ```sql
-SELECT ST_GeogFromWKT('LINESTRING (1 2, 3 4, 5 6)');
+SELECT RS_SetSRID(RS_Example(), 4326);
 ```
diff --git a/docs/reference/sql/st_astext.qmd b/docs/reference/sql/st_astext.qmd
index 3d05f6dc..185c62fc 100644
--- a/docs/reference/sql/st_astext.qmd
+++ b/docs/reference/sql/st_astext.qmd
@@ -23,6 +23,10 @@ kernels:
     args: [geometry]
 ---
 
+## Description
+
+Alias: `ST_AsWKT`.
+
 ## Examples
 
 ```sql
diff --git a/docs/reference/sql/st_geogfromwkt.qmd 
b/docs/reference/sql/st_geogfromwkt.qmd
index ab2340e3..b4ed2cc5 100644
--- a/docs/reference/sql/st_geogfromwkt.qmd
+++ b/docs/reference/sql/st_geogfromwkt.qmd
@@ -31,6 +31,10 @@ kernels:
       type: integer
 ---
 
+## Description
+
+Alias: `ST_GeogFromText`.
+
 ## Examples
 
 ```sql
diff --git a/docs/reference/sql/st_geomfromwkt.qmd 
b/docs/reference/sql/st_geomfromwkt.qmd
index ddf713a3..4d98ad74 100644
--- a/docs/reference/sql/st_geomfromwkt.qmd
+++ b/docs/reference/sql/st_geomfromwkt.qmd
@@ -34,6 +34,7 @@ kernels:
 ## Description
 
 An optional SRID or CRS can be provided as a second argument to set the 
spatial reference.
+Aliases: `ST_GeomFromText`, `ST_GeometryFromText`.
 
 ## Examples
 
diff --git a/docs/reference/sql/st_geomfromwkt.qmd 
b/docs/reference/sql/st_interiorringn.qmd
similarity index 63%
copy from docs/reference/sql/st_geomfromwkt.qmd
copy to docs/reference/sql/st_interiorringn.qmd
index ddf713a3..51648c99 100644
--- a/docs/reference/sql/st_geomfromwkt.qmd
+++ b/docs/reference/sql/st_interiorringn.qmd
@@ -16,37 +16,31 @@
 # specific language governing permissions and limitations
 # under the License.
 
-title: ST_GeomFromWKT
-description: Constructs a Geometry from Well-Known Text (WKT).
+title: ST_InteriorRingN
+description: Returns the Nth interior ring of a polygon.
 kernels:
   - returns: geometry
     args:
-    - name: wkt
-      type: string
-  - returns: geometry
-    args:
-    - name: wkt
-      type: string
-    - name: srid
-      type: crs
+    - geometry
+    - name: n
+      type: integer
 ---
 
 ## Description
 
-An optional SRID or CRS can be provided as a second argument to set the 
spatial reference.
-
-## Examples
-
-```sql
-SELECT ST_AsText(ST_GeomFromWKT('POINT (30 10)'));
-```
+Returns the Nth interior `LINESTRING` ring of a `POLYGON`. Returns `NULL` if
+the geometry is not a polygon or `n` is out of range.
 
-With an SRID:
+`n` is 1-based in SedonaDB.
 
-```sql
-SELECT ST_GeomFromWKT('POINT (30 10)', 4326);
-```
+## Examples
 
 ```sql
-SELECT ST_GeomFromWKT('POINT (30 10)', 'OGC:CRS27');
+SELECT
+    ST_InteriorRingN(
+        ST_GeomFromText(
+            'POLYGON((0 0, 0 5, 5 5, 5 0, 0 0), (1 1, 2 1, 2 2, 1 2, 1 1), (1 
3, 2 3, 2 4, 1 4, 1 3), (3 3, 4 3, 4 4, 3 4, 3 3))'
+        ),
+        1
+    );
 ```
diff --git a/docs/reference/sql/st_line_merge.qmd 
b/docs/reference/sql/st_linemerge.qmd
similarity index 100%
rename from docs/reference/sql/st_line_merge.qmd
rename to docs/reference/sql/st_linemerge.qmd
diff --git a/rust/sedona-functions/src/distance.rs 
b/rust/sedona-functions/src/distance.rs
index 726fb587..5924b235 100644
--- a/rust/sedona-functions/src/distance.rs
+++ b/rust/sedona-functions/src/distance.rs
@@ -24,31 +24,11 @@ pub fn st_distance_udf() -> SedonaScalarUDF {
     distance_stub_udf("ST_Distance")
 }
 
-/// ST_DistanceSphere() scalar UDF stub
-pub fn st_distance_sphere_udf() -> SedonaScalarUDF {
-    distance_stub_udf("ST_DistanceSphere")
-}
-
-/// ST_DistanceSpheroid() scalar UDF stub
-pub fn st_distance_spheroid_udf() -> SedonaScalarUDF {
-    distance_stub_udf("ST_DistanceSpheroid")
-}
-
 /// ST_MaxDistance() scalar UDF stub
 pub fn st_max_distance_udf() -> SedonaScalarUDF {
     distance_stub_udf("ST_MaxDistance")
 }
 
-/// ST_HausdorffDistance() scalar UDF stub
-pub fn st_hausdorff_distance_udf() -> SedonaScalarUDF {
-    distance_stub_udf("ST_HausdorffDistance")
-}
-
-/// ST_FrechetDistance() scalar UDF stub
-pub fn st_frechet_distance_udf() -> SedonaScalarUDF {
-    distance_stub_udf("ST_FrechetDistance")
-}
-
 pub fn distance_stub_udf(name: &str) -> SedonaScalarUDF {
     SedonaScalarUDF::new_stub(
         &name.to_lowercase(),
diff --git a/rust/sedona-functions/src/register.rs 
b/rust/sedona-functions/src/register.rs
index e53b5c07..b50079e7 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -38,11 +38,7 @@ pub fn default_function_set() -> FunctionSet {
 
     register_scalar_udfs!(
         function_set,
-        crate::distance::st_distance_sphere_udf,
-        crate::distance::st_distance_spheroid_udf,
         crate::distance::st_distance_udf,
-        crate::distance::st_frechet_distance_udf,
-        crate::distance::st_hausdorff_distance_udf,
         crate::distance::st_max_distance_udf,
         crate::overlay::st_difference_udf,
         crate::overlay::st_intersection_udf,
diff --git a/sedona-cli/Cargo.toml b/sedona-cli/Cargo.toml
index c856d481..671c0d5e 100644
--- a/sedona-cli/Cargo.toml
+++ b/sedona-cli/Cargo.toml
@@ -63,5 +63,9 @@ regex = { workspace = true }
 rustyline = "15.0"
 sedona = { workspace = true, features = ["aws", "gcp", "http", "proj"] }
 sedona-common = { workspace = true }
+sedona-functions = { workspace = true }
+sedona-raster-functions = { workspace = true }
 sedona-tg = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
 tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", 
"sync", "parking_lot", "signal"] }
diff --git a/sedona-cli/src/functions.rs b/sedona-cli/src/functions.rs
index c660c645..62fbcceb 100644
--- a/sedona-cli/src/functions.rs
+++ b/sedona-cli/src/functions.rs
@@ -20,12 +20,15 @@
 use std::fmt;
 use std::str::FromStr;
 use std::sync::Arc;
+use std::{collections::BTreeMap, collections::BTreeSet};
 
 use arrow::array::StringArray;
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::pretty_format_batches;
-use datafusion::error::Result;
+use datafusion::error::{DataFusionError, Result};
+use datafusion::logical_expr::{AggregateUDFImpl, ScalarUDFImpl};
+use serde::Serialize;
 
 #[derive(Debug)]
 pub enum Function {
@@ -198,3 +201,55 @@ pub fn display_all_functions() -> Result<()> {
     println!("{}", pretty_format_batches(&[batch]).unwrap());
     Ok(())
 }
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct FunctionInfo {
+    pub name: String,
+    pub aliases: Vec<String>,
+}
+
+pub fn list_all_functions() -> Vec<FunctionInfo> {
+    let mut function_set = sedona_functions::register::default_function_set();
+    
function_set.merge(sedona_raster_functions::register::default_function_set());
+
+    let mut functions = BTreeMap::<String, BTreeSet<String>>::new();
+
+    for function in function_set.scalar_udfs() {
+        if !is_sedona_sql_function(function.name()) {
+            continue;
+        }
+        let entry = functions.entry(function.name().to_string()).or_default();
+        for alias in function.aliases() {
+            if is_sedona_sql_function(alias) {
+                entry.insert(alias.to_string());
+            }
+        }
+    }
+
+    for function in function_set.aggregate_udfs() {
+        if !is_sedona_sql_function(function.name()) {
+            continue;
+        }
+        functions.entry(function.name().to_string()).or_default();
+    }
+
+    functions
+        .into_iter()
+        .map(|(name, aliases)| FunctionInfo {
+            name,
+            aliases: aliases.into_iter().collect(),
+        })
+        .collect()
+}
+
+fn is_sedona_sql_function(name: &str) -> bool {
+    let lower = name.to_ascii_lowercase();
+    lower.starts_with("st_") || lower.starts_with("rs_")
+}
+
+pub fn print_all_functions_json() -> Result<()> {
+    let output = serde_json::to_string_pretty(&list_all_functions())
+        .map_err(|e| DataFusionError::External(Box::new(e)))?;
+    println!("{output}");
+    Ok(())
+}
diff --git a/sedona-cli/src/main.rs b/sedona-cli/src/main.rs
index c0964d84..6dd315ee 100644
--- a/sedona-cli/src/main.rs
+++ b/sedona-cli/src/main.rs
@@ -25,12 +25,13 @@ use sedona::memory_pool::DEFAULT_UNSPILLABLE_RESERVE_RATIO;
 use sedona::pool_type::PoolType;
 use sedona_cli::{
     exec,
+    functions::print_all_functions_json,
     print_format::PrintFormat,
     print_options::{MaxRows, PrintOptions},
     DATAFUSION_CLI_VERSION,
 };
 
-use clap::Parser;
+use clap::{Parser, Subcommand, ValueEnum};
 
 #[cfg(feature = "mimalloc")]
 use mimalloc::MiMalloc;
@@ -42,6 +43,9 @@ static GLOBAL: MiMalloc = MiMalloc;
 #[derive(Debug, Parser, PartialEq)]
 #[clap(author, version, about, long_about= None)]
 struct Args {
+    #[command(subcommand)]
+    subcommand: Option<CliSubcommand>,
+
     #[clap(
         short = 'p',
         long,
@@ -122,6 +126,20 @@ struct Args {
     color: bool,
 }
 
+#[derive(Debug, Subcommand, PartialEq)]
+enum CliSubcommand {
+    /// List all built-in functions.
+    ListFunctions {
+        #[clap(long, value_enum, default_value_t = FunctionListFormat::Json)]
+        format: FunctionListFormat,
+    },
+}
+
+#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
+enum FunctionListFormat {
+    Json,
+}
+
 #[tokio::main]
 /// Calls [`main_inner`], then handles printing errors and returning the 
correct exit code
 pub async fn main() -> ExitCode {
@@ -149,6 +167,17 @@ async fn main_inner() -> Result<()> {
 
     let args = Args::parse();
 
+    if let Some(subcommand) = args.subcommand.as_ref() {
+        match subcommand {
+            CliSubcommand::ListFunctions {
+                format: FunctionListFormat::Json,
+            } => {
+                print_all_functions_json()?;
+                return Ok(());
+            }
+        }
+    }
+
     if !args.quiet {
         println!("Sedona CLI v{DATAFUSION_CLI_VERSION}");
     }

Reply via email to