Copilot commented on code in PR #816:
URL: https://github.com/apache/sedona-db/pull/816#discussion_r3198944365


##########
c/sedona-s2geography/src/kernels.rs:
##########
@@ -46,7 +53,202 @@ pub fn s2_scalar_kernels() -> Result<Vec<(String, 
ScalarKernelRef)>> {
                 Arc::new(imported_kernel) as ScalarKernelRef,
             ))
         })
-        .collect()
+        .collect::<Result<Vec<_>>>()?;
+
+    // A few functions need explicit NULL type matching. This is mostly a 
DataFusion
+    // thing (NULL can happen when binding parameters) so we handle it here 
and not
+    // in s2geography.
+
+    // Binary (geography, geography) -> Boolean
+    let binary_bool_fns = [
+        "st_contains",
+        "st_disjoint",
+        "st_equals",
+        "st_intersects",
+        "st_within",
+    ];
+    for fn_name in binary_bool_fns {
+        kernels.push((
+            fn_name.to_string(),
+            Arc::new(NullKernelHelper::new(ArgMatcher::new(
+                vec![ArgMatcher::is_geography(), ArgMatcher::is_null()],
+                SedonaType::Arrow(DataType::Boolean),
+            ))),
+        ));
+        kernels.push((
+            fn_name.to_string(),
+            Arc::new(NullKernelHelper::new(ArgMatcher::new(
+                vec![ArgMatcher::is_null(), ArgMatcher::is_geography()],
+                SedonaType::Arrow(DataType::Boolean),
+            ))),
+        ));
+    }
+
+    // Binary (geography, geography) -> Float64
+    let binary_float_fns = ["st_distance", "st_linelocatepoint", 
"st_maxdistance"];
+    for fn_name in binary_float_fns {
+        kernels.push((
+            fn_name.to_string(),
+            Arc::new(NullKernelHelper::new(ArgMatcher::new(
+                vec![ArgMatcher::is_geography(), ArgMatcher::is_null()],
+                SedonaType::Arrow(DataType::Float64),
+            ))),
+        ));
+        kernels.push((
+            fn_name.to_string(),
+            Arc::new(NullKernelHelper::new(ArgMatcher::new(
+                vec![ArgMatcher::is_null(), ArgMatcher::is_geography()],
+                SedonaType::Arrow(DataType::Float64),
+            ))),
+        ));
+    }
+
+    // Binary (geography, geography) -> geography
+    let binary_geog_fns = [
+        "st_closestpoint",
+        "st_difference",
+        "st_intersection",
+        "st_longestline",
+        "st_shortestline",
+        "st_symdifference",
+        "st_union",
+    ];
+    for fn_name in binary_geog_fns {
+        kernels.push((
+            fn_name.to_string(),
+            Arc::new(NullKernelHelper::new(ArgMatcher::new(
+                vec![ArgMatcher::is_geography(), ArgMatcher::is_null()],
+                WKB_GEOGRAPHY,
+            ))),
+        ));
+        kernels.push((
+            fn_name.to_string(),
+            Arc::new(NullKernelHelper::new(ArgMatcher::new(
+                vec![ArgMatcher::is_null(), ArgMatcher::is_geography()],
+                WKB_GEOGRAPHY,
+            ))),
+        ));
+    }
+
+    // Ternary (geography, geography, float64) -> Boolean
+    let ternary_bool_fns = ["st_dwithin"];
+    for fn_name in ternary_bool_fns {
+        kernels.push((
+            fn_name.to_string(),
+            Arc::new(NullKernelHelper::new(ArgMatcher::new(
+                vec![
+                    ArgMatcher::is_geography(),
+                    ArgMatcher::is_null(),
+                    ArgMatcher::is_numeric(),
+                ],
+                SedonaType::Arrow(DataType::Boolean),
+            ))),
+        ));
+        kernels.push((
+            fn_name.to_string(),
+            Arc::new(NullKernelHelper::new(ArgMatcher::new(
+                vec![
+                    ArgMatcher::is_null(),
+                    ArgMatcher::is_geography(),
+                    ArgMatcher::is_numeric(),
+                ],
+                SedonaType::Arrow(DataType::Boolean),
+            ))),
+        ));

Review Comment:
   The NULL-type helper signatures for `st_dwithin` only cover NULLs in the 
first/second (geography) arguments. If the numeric distance argument is a 
literal/bound NULL, it will still route to the imported kernel with an 
`Arrow(Null)` third arg type, which defeats the purpose of adding NULL-type 
matching here. Consider adding a `NullKernelHelper` signature for `(geography, 
geography, NULL)` (and potentially similar NULL-numeric positions for other 
arities) so these calls reliably return NULL instead of failing type 
resolution/FFI init.
   



##########
python/sedonadb/tests/geography/test_geog_transformations.py:
##########
@@ -37,3 +37,840 @@ def test_st_centroid(eng, geom, expected):
     eng.assert_query_result(
         f"SELECT ST_Centroid({geog_or_null(geom)})", expected, wkt_precision=4
     )
+
+
[email protected]("eng", [SedonaDB, BigQuery])
[email protected](
+    ("geog", "expected"),
+    [
+        # Nulls
+        pytest.param(None, None, id="null_centroid"),
+        # Empties
+        pytest.param("POINT EMPTY", "GEOMETRYCOLLECTION EMPTY", 
id="point_empty"),
+        pytest.param(
+            "LINESTRING EMPTY", "GEOMETRYCOLLECTION EMPTY", 
id="linestring_empty"
+        ),
+        pytest.param("POLYGON EMPTY", "GEOMETRYCOLLECTION EMPTY", 
id="polygon_empty"),
+        # Points
+        pytest.param("POINT (0 1)", "POINT (0 1)", id="point"),
+        pytest.param("MULTIPOINT ((0 0), (0 1))", "POINT (0 0.5)", 
id="multipoint"),
+        # Linestrings
+        pytest.param("LINESTRING (0 0, 0 1)", "POINT (0 0.5)", 
id="linestring"),
+        pytest.param(
+            "LINESTRING (0 0, 0 1, 0 5)", "POINT (0 2.5)", 
id="linestring_two_segments"
+        ),
+        # Polygons
+        pytest.param(
+            "POLYGON ((0 0, 0 1, 1 0, 0 0))",
+            "POINT (0.3333498812 0.3333442395)",
+            id="triangle",
+        ),
+    ],
+)
+def test_st_centroid_extended(eng, geog, expected):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        f"SELECT ST_Centroid({geog_or_null(geog)})", expected, wkt_precision=10
+    )
+
+
+# Neither PostGIS nor BigQuery support ZM in their centroid calculation
[email protected]("eng", [SedonaDB])
[email protected](
+    ("geom", "expected"),
+    [
+        # Points with Z
+        pytest.param("POINT Z (0 1 10)", "POINT Z (0 1 10)", id="point_z"),
+        pytest.param(
+            "MULTIPOINT Z ((0 0 10), (0 1 11))",
+            "POINT Z (0 0.5 10.5)",
+            id="multipoint_z",
+        ),
+        # Points with M
+        pytest.param("POINT M (0 1 10)", "POINT M (0 1 10)", id="point_m"),
+        pytest.param(
+            "MULTIPOINT M ((0 0 10), (0 1 11))",
+            "POINT M (0 0.5 10.5)",
+            id="multipoint_m",
+        ),
+        # Points with ZM
+        pytest.param("POINT ZM (0 1 10 20)", "POINT ZM (0 1 10 20)", 
id="point_zm"),
+        pytest.param(
+            "MULTIPOINT ZM ((0 0 10 20), (0 1 11 21))",
+            "POINT ZM (0 0.5 10.5 20.5)",
+            id="multipoint_zm",
+        ),
+        # Linestrings with Z
+        pytest.param(
+            "LINESTRING Z (0 0 10, 0 1 11)",
+            "POINT Z (0 0.5 10.5)",
+            id="linestring_z",
+        ),
+        pytest.param(
+            "LINESTRING Z (0 0 10, 0 1 11, 0 5 15)",
+            "POINT Z (0 2.5 12.5)",
+            id="linestring_two_segments_z",
+        ),
+        # Linestrings with M
+        pytest.param(
+            "LINESTRING M (0 0 10, 0 1 11)",
+            "POINT M (0 0.5 10.5)",
+            id="linestring_m",
+        ),
+        pytest.param(
+            "LINESTRING M (0 0 10, 0 1 11, 0 5 15)",
+            "POINT M (0 2.5 12.5)",
+            id="linestring_two_segments_m",
+        ),
+        # Linestrings with ZM
+        pytest.param(
+            "LINESTRING ZM (0 0 10 20, 0 1 11 21)",
+            "POINT ZM (0 0.5 10.5 20.5)",
+            id="linestring_zm",
+        ),
+        pytest.param(
+            "LINESTRING ZM (0 0 10 20, 0 1 11 21, 0 5 15 25)",
+            "POINT ZM (0 2.5 12.5 22.5)",
+            id="linestring_two_segments_zm",
+        ),
+        # Polygons with Z
+        pytest.param(
+            "POLYGON Z ((0 0 10, 0 1 10, 1 0 10, 0 0 10))",
+            "POINT Z (0.3333 0.3333 10)",
+            id="triangle_z",
+        ),
+        pytest.param(
+            "POLYGON Z ((0 0 10, 0 2 10, 2 0 10, 0 0 10), "
+            "(0.1 0.1 11, 0.1 0.5 11, 0.5 0.1 11, 0.1 0.1 11))",
+            "POINT Z (0.6849 0.6848 10.0385)",
+            id="polygon_with_hole_z",
+        ),
+        # Polygons with M
+        pytest.param(
+            "POLYGON M ((0 0 10, 0 1 10, 1 0 10, 0 0 10))",
+            "POINT M (0.3333 0.3333 10)",
+            id="triangle_m",
+        ),
+        pytest.param(
+            "POLYGON M ((0 0 10, 0 2 10, 2 0 10, 0 0 10), "
+            "(0.1 0.1 11, 0.1 0.5 11, 0.5 0.1 11, 0.1 0.1 11))",
+            "POINT M (0.6849 0.6848 10.0385)",
+            id="polygon_with_hole_m",
+        ),
+        # Polygons with ZM
+        pytest.param(
+            "POLYGON ZM ((0 0 10 20, 0 1 10 20, 1 0 10 20, 0 0 10 20))",
+            "POINT ZM (0.3333 0.3333 10 20)",
+            id="triangle_zm",
+        ),
+        pytest.param(
+            "POLYGON ZM ((0 0 10 20, 0 2 10 20, 2 0 10 20, 0 0 10 20), "
+            "(0.1 0.1 11 21, 0.1 0.5 11 21, 0.5 0.1 11 21, 0.1 0.1 11 21))",
+            "POINT ZM (0.6849 0.6848 10.0385 20.0385)",
+            id="polygon_with_hole_zm",
+        ),
+    ],
+)
+def test_st_centroid_zm(eng, geom, expected):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        f"SELECT ST_Centroid({geog_or_null(geom)})", expected, wkt_precision=4
+    )
+
+
[email protected]("eng", [SedonaDB, BigQuery])
[email protected](
+    ("geog", "expected"),
+    [
+        # Nulls
+        pytest.param(None, None, id="null_convex_hull"),
+        # Points
+        pytest.param("POINT (0 1)", "POINT (0 1)", id="point"),
+        pytest.param(
+            "MULTIPOINT ((0 0), (0 1), (1 0))",
+            "POLYGON ((0 0, 1 0, 0 1, 0 0))",
+            id="multipoint_three",
+        ),
+        # Linestrings
+        pytest.param(
+            "LINESTRING (0 0, 0 1, 1 0)",
+            "POLYGON ((0 0, 1 0, 0 1, 0 0))",
+            id="linestring_non_colinear",
+        ),
+        # Polygons
+        pytest.param(
+            "POLYGON ((0 0, 0 1, 1 0, 0 0))",
+            "POLYGON ((0 0, 1 0, 0 1, 0 0))",
+            id="triangle",
+        ),
+        pytest.param(
+            "POLYGON ((0 0, 0 2, 2 0, 0 0), (0.1 0.1, 0.1 0.5, 0.5 0.1, 0.1 
0.1))",
+            "POLYGON ((0 0, 2 0, 0 2, 0 0))",
+            id="polygon_with_hole",
+        ),
+        # GeometryCollection (convex hull of all vertices)
+        pytest.param(
+            "GEOMETRYCOLLECTION (POINT (5 5), LINESTRING (0 0, 0 1), POLYGON 
((0 0, 0 1, 1 0, 0 0)))",
+            "POLYGON ((0 0, 1 0, 5 5, 0 1, 0 0))",
+            id="geometrycollection",
+        ),
+    ],
+)
+def test_st_convexhull(eng, geog, expected):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        f"SELECT ST_ConvexHull({geog_or_null(geog)})", expected, 
wkt_precision=10
+    )
+
+
[email protected]("eng", [SedonaDB])
[email protected](
+    ("geog", "expected"),
+    [
+        # Empties
+        pytest.param("POINT EMPTY", "POINT (nan nan)", id="point_empty"),
+        pytest.param("LINESTRING EMPTY", "LINESTRING EMPTY", 
id="linestring_empty"),
+        pytest.param("POLYGON EMPTY", "POLYGON EMPTY", id="polygon_empty"),
+        pytest.param(
+            "MULTIPOINT ((0 0), (0 1))", "LINESTRING (0 0, 0 1)", 
id="multipoint_two"
+        ),
+        # Linestrings
+        pytest.param("LINESTRING (0 0, 0 1)", "LINESTRING (0 0, 0 1)", 
id="linestring"),
+        pytest.param(
+            "LINESTRING (0 0, 0 1, 0 2)",
+            "LINESTRING (0 0, 0 2)",
+            id="linestring_colinear",
+        ),
+    ],
+)
+def test_st_convexhull_degenerate(eng, geog, expected):
+    # Empty/degenerate behaviour does not match BigQuery but instead matches
+    # what PostGIS would give for a geometry implementation.
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        f"SELECT ST_ConvexHull({geog_or_null(geog)})",
+        expected,
+    )
+
+
[email protected]("eng", [SedonaDB])
[email protected](
+    ("geog", "expected"),
+    [
+        # Empties
+        pytest.param("POINT EMPTY", "POINT (nan nan)", id="point_empty"),
+        pytest.param("LINESTRING EMPTY", "POINT (nan nan)", 
id="linestring_empty"),
+        pytest.param("POLYGON EMPTY", "POINT (nan nan)", id="polygon_empty"),
+        # Points
+        pytest.param("POINT (0 1)", "POINT (0 1)", id="point"),
+        pytest.param("MULTIPOINT ((0 0), (0 1))", "POINT (0 1)", 
id="multipoint"),
+        # Points with Z/M/ZM
+        pytest.param("POINT Z (0 1 10)", "POINT Z (0 1 10)", id="point_z"),
+        pytest.param("POINT M (0 1 10)", "POINT M (0 1 10)", id="point_m"),
+        pytest.param("POINT ZM (0 1 10 20)", "POINT ZM (0 1 10 20)", 
id="point_zm"),
+        # Linestrings
+        pytest.param("LINESTRING (0 0, 0 1)", "POINT (0 1)", id="linestring"),
+        pytest.param(
+            "LINESTRING (0 0, 0 1, 0 5)", "POINT (0 1)", 
id="linestring_three_vertices"
+        ),
+        # Linestrings with Z/M/ZM
+        pytest.param(
+            "LINESTRING Z (0 0 10, 0 1 11)", "POINT Z (0 1 11)", 
id="linestring_z"
+        ),
+        pytest.param(
+            "LINESTRING M (0 0 10, 0 1 11)", "POINT M (0 1 11)", 
id="linestring_m"
+        ),
+        pytest.param(
+            "LINESTRING ZM (0 0 10 20, 0 1 11 21)",
+            "POINT ZM (0 1 11 21)",
+            id="linestring_zm",
+        ),
+        # Polygons
+        pytest.param(
+            "POLYGON ((0 0, 0 1, 1 0, 0 0))",
+            "POINT (0.224466 0.224464)",
+            id="triangle",
+        ),
+        pytest.param(
+            "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",
+            "POINT (0.450237 0.450223)",
+            id="square",
+        ),
+        # Polygons with Z/M/ZM are not yet supported and return a 2D result
+    ],
+)
+def test_st_pointonsurface(eng, geog, expected):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        f"SELECT ST_PointOnSurface({geog_or_null(geog)})", expected, 
wkt_precision=6
+    )
+
+
[email protected]("eng", [SedonaDB, BigQuery, PostGIS])
[email protected](
+    ("line", "fraction", "expected"),
+    [
+        # Nulls
+        pytest.param(None, 0.5, None, id="null_line"),
+        # Endpoints and midpoints
+        pytest.param("LINESTRING (0 0, 0 2)", 0.0, "POINT (0 0)", id="start"),
+        pytest.param("LINESTRING (0 0, 0 2)", 1.0, "POINT (0 2)", id="end"),
+        pytest.param("LINESTRING (0 0, 0 2)", 0.5, "POINT (0 1)", 
id="midpoint"),
+        # Multi-segment line
+        pytest.param(
+            "LINESTRING (0 0, 0 1, 0 2)",
+            0.25,
+            "POINT (0 0.5)",
+            id="multi_seg_quarter",
+        ),
+        pytest.param(
+            "LINESTRING (0 0, 0 1, 0 2)",
+            0.75,
+            "POINT (0 1.5)",
+            id="multi_seg_three_quarter",
+        ),
+        # Boundary fractions
+        pytest.param("LINESTRING (0 0, 0 2)", 0.0, "POINT (0 0)", 
id="fraction_zero"),
+        pytest.param("LINESTRING (0 0, 0 2)", 1.0, "POINT (0 2)", 
id="fraction_one"),
+    ],
+)
+def test_st_line_interpolate_point(eng, line, fraction, expected):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        f"SELECT ST_LineInterpolatePoint({geog_or_null(line)}, 
{val_or_null(fraction)})",
+        expected,
+        wkt_precision=4,
+    )
+
+
+# Degenerate behaviour matches PostGIS and not BigQuery
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+    ("line", "fraction", "expected"),
+    [
+        # Empties
+        pytest.param("LINESTRING EMPTY", 0.0, None, id="empty_line"),
+        # Degenerate
+        pytest.param(
+            "LINESTRING (1 1, 1 1)", 0.5, "POINT (1 1)", id="zero_length_line"
+        ),
+    ],
+)
+def test_st_line_interpolate_point_degenerate(eng, line, fraction, expected):
+    eng = eng.create_or_skip()
+    eng = eng.create_or_skip()

Review Comment:
   `eng = eng.create_or_skip()` is called twice back-to-back, which is 
redundant and makes the test harder to read. Please remove the duplicate call 
(keep a single `create_or_skip()` before assertions).
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to