Copilot commented on code in PR #2647:
URL: https://github.com/apache/sedona/pull/2647#discussion_r2797826719
##########
common/src/main/java/org/apache/sedona/common/Functions.java:
##########
@@ -270,9 +270,7 @@ public static Geometry expand(Geometry geometry, double
deltaX, double deltaY, d
newCoords[4] = newCoords[0];
return geometry.getFactory().createPolygon(newCoords);
}
- Geometry result = Constructors.polygonFromEnvelope(minX, minY, maxX, maxY);
- result.setSRID(geometry.getSRID());
- return result;
+ return Constructors.polygonFromEnvelope(minX, minY, maxX, maxY,
geometry.getFactory());
Review Comment:
`expand()` used to explicitly preserve the input geometry’s SRID via
`result.setSRID(geometry.getSRID())`. Creating the polygon with
`geometry.getFactory()` does not reliably preserve SRID, because JTS SRID is
stored on the Geometry, and the GeometryFactory’s SRID is often 0 even when
`geometry.getSRID()` is set. Consider restoring SRID preservation (e.g., set
SRID on the returned polygon to `geometry.getSRID()`), or create a
GeometryFactory that matches the input factory but uses the input geometry SRID
when building the envelope polygon.
##########
common/src/test/java/org/apache/sedona/common/FunctionsProj4PerformanceTest.java:
##########
@@ -0,0 +1,349 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common;
+
+import static org.junit.Assert.*;
+import static org.junit.Assume.assumeTrue;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import org.datasyslab.proj4sedona.Proj4;
+import org.datasyslab.proj4sedona.defs.Defs;
+import org.datasyslab.proj4sedona.grid.NadgridRegistry;
+import org.junit.Test;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.geom.GeometryFactory;
+import org.locationtech.jts.geom.Point;
+
+/**
+ * Performance tests for Proj4sedona CRS transformation.
+ *
+ * <p>These tests measure:
+ *
+ * <ol>
+ * <li>Proj4sedona vs GeoTools performance comparison
+ * <li>Cache effects in Proj4sedona:
+ * <ul>
+ * <li>2.1 Built-in EPSG codes
+ * <li>2.2 EPSG codes with remote fetching from spatialreference.org
+ * <li>2.3 PROJ and WKT strings
+ * <li>2.4 Grid files (local and remote)
+ * </ul>
+ * </ol>
+ *
+ * <p>Each test uses the pattern: 1 cold call (cache miss) + N warm calls
(cache hits)
+ */
+public class FunctionsProj4PerformanceTest extends TestBase {
+
+ private static final GeometryFactory GEOMETRY_FACTORY = new
GeometryFactory();
+ private static final int WARM_ITERATIONS = 10;
+
+ // Test coordinates
+ private static final double SF_LON = -122.4194;
+ private static final double SF_LAT = 37.7749;
+
+ // Remote grid file URL (OSTN15 from GitHub)
+ private static final String REMOTE_GRID_URL =
+
"https://raw.githubusercontent.com/jiayuasu/grid-files/main/us_os/OSTN15-NTv2/OSTN15_NTv2_ETRStoOSGB.gsb";
+
+ // ==================== Helper Methods ====================
+
+ private Point createTestPoint(double lon, double lat) {
+ return GEOMETRY_FACTORY.createPoint(new Coordinate(lon, lat));
+ }
+
+ private void printHeader(String title) {
+ System.out.println();
+ System.out.println("=".repeat(70));
+ System.out.println(title);
+ System.out.println("=".repeat(70));
+ }
+
+ private void printResult(String label, double coldMs, double warmAvgUs, int
cacheEntries) {
+ double speedup = (coldMs * 1000) / warmAvgUs;
+ System.out.printf("Cold (1 call): %10.2f ms%n", coldMs);
+ System.out.printf("Warm (%d calls): %10.2f μs avg%n", WARM_ITERATIONS,
warmAvgUs);
+ System.out.printf("Cache speedup: %10.0fx%n", speedup);
+ if (cacheEntries >= 0) {
+ System.out.printf("Proj cache entries: %10d%n", cacheEntries);
+ }
+ }
+
+ // ==================== 1. Proj4sedona vs GeoTools ====================
+
+ @Test
+ public void testProj4VsGeoToolsEpsgPerformance() throws Exception {
+ printHeader("1. Proj4sedona vs GeoTools (EPSG:4326 -> EPSG:3857)");
+
+ Point point = createTestPoint(SF_LON, SF_LAT);
+
+ // ===== Proj4sedona =====
+ System.out.println("\nProj4sedona:");
+ Proj4.clearCache();
+
+ // Cold call
+ long coldStart = System.nanoTime();
+ Geometry proj4ColdResult = FunctionsProj4.transform(point, "EPSG:4326",
"EPSG:3857");
+ double proj4ColdMs = (System.nanoTime() - coldStart) / 1e6;
+
+ // Warm calls
+ long warmStart = System.nanoTime();
+ for (int i = 0; i < WARM_ITERATIONS; i++) {
+ FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857");
+ }
+ double proj4WarmTotalMs = (System.nanoTime() - warmStart) / 1e6;
+ double proj4WarmAvgUs = (proj4WarmTotalMs * 1000) / WARM_ITERATIONS;
+
+ printResult("Proj4sedona", proj4ColdMs, proj4WarmAvgUs,
Proj4.getCacheSize());
+ assertNotNull(proj4ColdResult);
+ assertEquals(3857, proj4ColdResult.getSRID());
+
+ // ===== GeoTools =====
+ System.out.println("\nGeoTools:");
+
+ // Cold call
+ coldStart = System.nanoTime();
+ Geometry gtColdResult = FunctionsGeoTools.transform(point, "EPSG:4326",
"EPSG:3857");
+ double gtColdMs = (System.nanoTime() - coldStart) / 1e6;
+
+ // Warm calls
+ warmStart = System.nanoTime();
+ for (int i = 0; i < WARM_ITERATIONS; i++) {
+ FunctionsGeoTools.transform(point, "EPSG:4326", "EPSG:3857");
+ }
+ double gtWarmTotalMs = (System.nanoTime() - warmStart) / 1e6;
+ double gtWarmAvgUs = (gtWarmTotalMs * 1000) / WARM_ITERATIONS;
+
+ printResult("GeoTools", gtColdMs, gtWarmAvgUs, -1);
+ assertNotNull(gtColdResult);
+
+ // ===== Comparison =====
+ double warmSpeedup = gtWarmAvgUs / proj4WarmAvgUs;
+ System.out.printf(
+ "%nComparison: Proj4sedona is %.1fx faster than GeoTools (warm)%n",
warmSpeedup);
+
+ // Verify both produce similar results
+ assertEquals(
+ proj4ColdResult.getCoordinate().x,
+ gtColdResult.getCoordinate().x,
+ 1.0); // 1 meter tolerance
+ assertEquals(proj4ColdResult.getCoordinate().y,
gtColdResult.getCoordinate().y, 1.0);
+ }
+
+ // ==================== 2.1 Cache Effect: Built-in EPSG ====================
+
+ @Test
+ public void testCacheEffectBuiltInEpsgCode() {
+ printHeader("2.1 Cache Effect: Built-in EPSG (EPSG:4326 -> EPSG:3857)");
+
+ Point point = createTestPoint(SF_LON, SF_LAT);
+ Proj4.clearCache();
+
+ // Cold call
+ long coldStart = System.nanoTime();
+ Geometry coldResult = FunctionsProj4.transform(point, "EPSG:4326",
"EPSG:3857");
+ double coldMs = (System.nanoTime() - coldStart) / 1e6;
+
+ // Warm calls
+ long warmStart = System.nanoTime();
+ for (int i = 0; i < WARM_ITERATIONS; i++) {
+ FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857");
+ }
+ double warmTotalMs = (System.nanoTime() - warmStart) / 1e6;
+ double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS;
+
+ printResult("Built-in EPSG", coldMs, warmAvgUs, Proj4.getCacheSize());
+ assertNotNull(coldResult);
+ assertEquals(3857, coldResult.getSRID());
+ }
+
+ // ==================== 2.2 Cache Effect: Remote Fetch EPSG
====================
+
+ @Test
+ public void testCacheEffectRemoteFetchEpsgCode() {
+ printHeader("2.2 Cache Effect: Remote Fetch EPSG (EPSG:2154 - French
Lambert)");
+
+ // EPSG:2154 (RGF93 / Lambert-93) is NOT in the built-in list
+ // It requires fetching from spatialreference.org
Review Comment:
These “performance” tests run under the normal unit test framework and
include network-dependent behavior (remote CRS fetch) and extensive console
output. This can cause flaky/slow CI runs and noisy logs. Consider moving these
into a dedicated benchmark suite (e.g., JMH) or marking them ignored by default
(and only running them in a separate profiling workflow).
##########
docs/api/sql/Function.md:
##########
@@ -4648,62 +4648,15 @@ MULTIPOLYGON (((-2 -3, -3 -3, -3 3, -2 3, -2 -3)), ((3
-3, 3 3, 4 3, 4 -3, 3 -3)
Introduction:
-Transform the Spatial Reference System / Coordinate Reference System of A,
from SourceCRS to TargetCRS. For SourceCRS and TargetCRS, WKT format is also
available since `v1.3.1`. Since `v1.5.1`, if the `SourceCRS` is not specified,
CRS will be fetched from the geometry using [ST_SRID](#st_srid).
+Transform the Spatial Reference System / Coordinate Reference System of A,
from SourceCRS to TargetCRS. If the `SourceCRS` is not specified, CRS will be
fetched from the geometry using [ST_SRID](#st_srid).
-**Lon/Lat Order in the input geometry**
+Since `v1.9.0`, Sedona supports multiple CRS formats including EPSG codes,
WKT1, WKT2, PROJ strings, and PROJJSON. Grid files for high-accuracy datum
transformations are also supported.
-If the input geometry is in lat/lon order, it might throw an error such as
`too close to pole`, `latitude or longitude exceeded limits`, or give
unexpected results.
-You need to make sure that the input geometry is in lon/lat order. If the
input geometry is in lat/lon order, you can use ==ST_FlipCoordinates== to swap
X and Y.
-
-**Lon/Lat Order in the source and target CRS**
-
-Sedona will make sure the source and target CRS to be in lon/lat order. If the
source CRS or target CRS is in lat/lon order, these CRS will be swapped to
lon/lat order.
-
-**CRS code**
-
-The CRS code is the code of the CRS in the official EPSG database
(https://epsg.org/) in the format of `EPSG:XXXX`. A community tool
[EPSG.io](https://epsg.io/) can help you quick identify a CRS code. For
example, the code of WGS84 is `EPSG:4326`.
-
-**WKT format**
-
-You can also use OGC WKT v1 format to specify the source CRS and target CRS.
An example OGC WKT v1 CRS of `EPGS:3857` is as follows:
-
-```
-PROJCS["WGS 84 / Pseudo-Mercator",
- GEOGCS["WGS 84",
- DATUM["WGS_1984",
- SPHEROID["WGS 84",6378137,298.257223563,
- AUTHORITY["EPSG","7030"]],
- AUTHORITY["EPSG","6326"]],
- PRIMEM["Greenwich",0,
- AUTHORITY["EPSG","8901"]],
- UNIT["degree",0.0174532925199433,
- AUTHORITY["EPSG","9122"]],
- AUTHORITY["EPSG","4326"]],
- PROJECTION["Mercator_1SP"],
- PARAMETER["central_meridian",0],
- PARAMETER["scale_factor",1],
- PARAMETER["false_easting",0],
- PARAMETER["false_northing",0],
- UNIT["metre",1,
- AUTHORITY["EPSG","9001"]],
- AXIS["Easting",EAST],
- AXIS["Northing",NORTH],
- EXTENSION["PROJ4","+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0
+x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs"],
- AUTHORITY["EPSG","3857"]]
-```
-
-!!!note
- By default, this function uses lon/lat order since `v1.5.0`. Before, it
used lat/lon order.
-
-!!!note
- By default, ==ST_Transform== follows the `lenient` mode which tries to fix
issues by itself. You can append a boolean value at the end to enable the
`strict` mode. In `strict` mode, ==ST_Transform== will throw an error if it
finds any issue.
+!!!tip
+ For comprehensive details on supported CRS formats, grid file usage, and
more examples, see [CRS Transformation](CRS-Transformation.md).
Format:
Review Comment:
The docs no longer list the 4-argument `ST_Transform(..., lenientMode)`
overload, but the PR description and the Spark expression implementation
indicate that this overload is still supported for API compatibility (even if
ignored by proj4sedona). Please document the 4-arg signature again and clarify
backend behavior (GeoTools honors `lenient`, proj4sedona ignores it).
##########
spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala:
##########
@@ -0,0 +1,858 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.sql
+
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.sedona_sql.expressions.st_functions._
+import org.junit.Assert.{assertEquals, assertNotNull, assertTrue}
+import org.locationtech.jts.geom.Geometry
+
+/**
+ * Tests for ST_Transform using proj4sedona backend.
+ *
+ * These tests verify CRS transformations with various input formats:
+ * - EPSG codes
+ * - PROJ strings
+ * - WKT1 and WKT2
+ * - PROJJSON
+ * - NAD grid files
+ *
+ * Tests also verify config switching between proj4sedona and GeoTools.
+ */
+class CRSTransformProj4Test extends TestBaseScala {
+
+ private val COORD_TOLERANCE = 1.0 // 1 meter tolerance for projected
coordinates
+
+ describe("ST_Transform with proj4sedona (default mode)") {
+
+ it("should transform EPSG:4326 to EPSG:3857 using SQL API") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ // Validated against cs2cs: -13627665.27, 4547675.35
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using geometry SRID as source (2-arg version)") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 4326), 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform to UTM Zone 10N") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
'EPSG:4326', 'EPSG:32610')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(32610, result.getSRID)
+ // Validated against cs2cs: 551130.77, 4180998.88
+ assertEquals(551130.77, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4180998.88, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using PROJ string") {
+ val projString =
+ "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0
+k=1 +units=m +no_defs"
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 4326),
+ 'EPSG:4326',
+ '$projString')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ // Web Mercator coordinates
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using PROJ string for UTM") {
+ val projString = "+proj=utm +zone=10 +datum=WGS84 +units=m +no_defs"
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
+ '+proj=longlat +datum=WGS84 +no_defs',
+ '$projString')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ // UTM Zone 10N coordinates
+ assertTrue(result.getCoordinate.x > 540000 && result.getCoordinate.x <
560000)
+ assertTrue(result.getCoordinate.y > 4170000 && result.getCoordinate.y <
4190000)
+ }
+
+ it("should transform using WKT1") {
+ val sourceWkt =
+ """GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS
84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]"""
+ val targetWkt =
+ """PROJCS["WGS 84 / UTM zone 51N",GEOGCS["WGS
84",DATUM["WGS_1984",SPHEROID["WGS
84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",123],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1]]"""
+
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT (120 60)'),
+ '$sourceWkt',
+ '$targetWkt')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ // Validated against cs2cs: 332705.18, 6655205.48
+ assertEquals(332705.18, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(6655205.48, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using WKT2") {
+ val wkt2 =
+ """GEOGCRS["WGS 84",DATUM["World Geodetic System 1984",ELLIPSOID["WGS
84",6378137,298.257223563]],CS[ellipsoidal,2],AXIS["latitude",north],AXIS["longitude",east],UNIT["degree",0.0174532925199433]]"""
+
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
+ '$wkt2',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using PROJJSON") {
+ val projJson =
+ """{"type":"GeographicCRS","name":"WGS
84","datum":{"type":"GeodeticReferenceFrame","name":"World Geodetic System
1984","ellipsoid":{"name":"WGS
84","semi_major_axis":6378137,"inverse_flattening":298.257223563}}}"""
+
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
+ '$projJson',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+
+ it("should transform with NAD grid file") {
+ val gridFile =
getClass.getClassLoader.getResource("grids/us_noaa_conus.tif")
+ if (gridFile != null) {
+ val gridPath = gridFile.getPath
+ val projString = s"+proj=longlat +ellps=GRS80 +nadgrids=$gridPath
+no_defs"
+
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_SetSRID(ST_GeomFromWKT('POINT (-96.0 40.0)'), 4326),
+ 'EPSG:4326',
+ '$projString')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertTrue(result.isValid)
+ }
+ }
+
+ it("should handle round-trip transformation") {
+ val result = sparkSession
+ .sql("""SELECT ST_Transform(
+ ST_Transform(
+ ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
+ 'EPSG:4326',
+ 'EPSG:3857'),
+ 'EPSG:3857',
+ 'EPSG:4326')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(4326, result.getSRID)
+ // Should return close to original coordinates
+ assertEquals(-122.4194, result.getCoordinate.x, 1e-6)
+ assertEquals(37.7749, result.getCoordinate.y, 1e-6)
+ }
+
+ it("should preserve UserData") {
+ // Create a geometry with UserData
+ val df = sparkSession
+ .sql("SELECT ST_GeomFromWKT('POINT (-122.4194 37.7749)') as geom")
+ .selectExpr("ST_Transform(geom, 'EPSG:4326', 'EPSG:3857') as
transformed")
+
+ val result = df.first().getAs[Geometry](0)
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ }
+ }
+
+ describe("ST_Transform with DataFrame API") {
+
+ it("should transform using 2-arg DataFrame API") {
+ import sparkSession.implicits._
+ val df = sparkSession
+ .sql("SELECT ST_Point(-122.4194, 37.7749) AS geom")
+ .select(ST_SetSRID($"geom", lit(4326)).as("geom"))
+ .select(ST_Transform($"geom", lit("EPSG:3857")).as("geom"))
+
+ val result = df.first().getAs[Geometry](0)
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+
+ it("should transform using 3-arg DataFrame API") {
+ import sparkSession.implicits._
+ val df = sparkSession
+ .sql("SELECT ST_Point(-122.4194, 37.7749) AS geom")
+ .select(ST_Transform($"geom", lit("EPSG:4326"),
lit("EPSG:3857")).as("geom"))
+
+ val result = df.first().getAs[Geometry](0)
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+
+ it("should transform using PROJ string with DataFrame API") {
+ import sparkSession.implicits._
+ val projString =
+ "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0
+k=1 +units=m +no_defs"
+ val df = sparkSession
+ .sql("SELECT ST_Point(-122.4194, 37.7749) AS geom")
+ .select(ST_SetSRID($"geom", lit(4326)).as("geom"))
+ .select(ST_Transform($"geom", lit(projString)).as("geom"))
+
+ val result = df.first().getAs[Geometry](0)
+ assertNotNull(result)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using PROJJSON with DataFrame API") {
+ import sparkSession.implicits._
+ val projJson =
+ """{"type":"GeographicCRS","name":"WGS
84","datum":{"type":"GeodeticReferenceFrame","name":"World Geodetic System
1984","ellipsoid":{"name":"WGS
84","semi_major_axis":6378137,"inverse_flattening":298.257223563}}}"""
+
+ // Use createDataFrame to avoid constant folding issues with all-literal
expressions
+ val pointDf = Seq(("POINT (-122.4194 37.7749)"))
+ .toDF("wkt")
+ .selectExpr("ST_GeomFromWKT(wkt) as geom")
+
+ val df = pointDf.select(ST_Transform($"geom", lit(projJson),
lit("EPSG:3857")).as("geom"))
+
+ val result = df.first().getAs[Geometry](0)
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+
+ describe("ST_Transform with different geometry types") {
+
+ it("should transform LineString") {
+ val result = sparkSession
+ .sql("""SELECT ST_Transform(
+ ST_GeomFromWKT('LINESTRING(-122.4 37.7, -122.5 37.8, -122.6
37.9)'),
+ 'EPSG:4326',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals("LineString", result.getGeometryType)
+ assertEquals(3, result.getNumPoints)
+ assertEquals(3857, result.getSRID)
+ }
+
+ it("should transform Polygon") {
+ val result = sparkSession
+ .sql("""SELECT ST_Transform(
+ ST_GeomFromWKT('POLYGON((-122.5 37.7, -122.3 37.7, -122.3 37.9,
-122.5 37.9, -122.5 37.7))'),
+ 'EPSG:4326',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals("Polygon", result.getGeometryType)
+ assertEquals(3857, result.getSRID)
+ }
+
+ it("should transform MultiPoint") {
+ val result = sparkSession
+ .sql("""SELECT ST_Transform(
+ ST_GeomFromWKT('MULTIPOINT((-122.4 37.7), (-122.5 37.8), (-122.6
37.9))'),
+ 'EPSG:4326',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals("MultiPoint", result.getGeometryType)
+ assertEquals(3, result.getNumGeometries)
+ assertEquals(3857, result.getSRID)
+ }
+
+ it("should transform GeometryCollection") {
+ val result = sparkSession
+ .sql("""SELECT ST_Transform(
+ ST_GeomFromWKT('GEOMETRYCOLLECTION(POINT(-122.4 37.7),
LINESTRING(-122.5 37.8, -122.6 37.9))'),
+ 'EPSG:4326',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals("GeometryCollection", result.getGeometryType)
+ assertEquals(2, result.getNumGeometries)
+ assertEquals(3857, result.getSRID)
+ }
+ }
+
+ describe("ST_Transform config switching") {
+
+ it("should use proj4sedona when config is 'none'") {
+ withConf(Map("spark.sedona.crs.geotools" -> "none")) {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+
+ it("should use proj4sedona when config is 'raster' (default)") {
+ withConf(Map("spark.sedona.crs.geotools" -> "raster")) {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+
+ it("should use GeoTools when config is 'all'") {
+ withConf(Map("spark.sedona.crs.geotools" -> "all")) {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ // GeoTools should produce similar results (within tolerance)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+
+ it("should use proj4sedona for 4-arg version by default (lenient
ignored)") {
+ // 4-arg version should use proj4sedona by default, ignoring lenient
parameter
+ val resultLenientTrue = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
'EPSG:4326', 'EPSG:3857', true)")
+ .first()
+ .getAs[Geometry](0)
+
+ val resultLenientFalse = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
'EPSG:4326', 'EPSG:3857', false)")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(resultLenientTrue)
+ assertNotNull(resultLenientFalse)
+ // Both should produce same results (lenient is ignored)
+ assertEquals(resultLenientTrue.getCoordinate.x,
resultLenientFalse.getCoordinate.x, 1e-9)
+ assertEquals(resultLenientTrue.getCoordinate.y,
resultLenientFalse.getCoordinate.y, 1e-9)
+ // Should produce correct Web Mercator coordinates
+ assertEquals(-13627665.27, resultLenientTrue.getCoordinate.x,
COORD_TOLERANCE)
+ assertEquals(4547675.35, resultLenientTrue.getCoordinate.y,
COORD_TOLERANCE)
+ }
+
+ it("should use proj4sedona for 4-arg version when config is 'none'") {
+ withConf(Map("spark.sedona.crs.geotools" -> "none")) {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 'EPSG:4326', 'EPSG:3857', false)")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+
+ it("should use GeoTools for 4-arg version when config is 'all'") {
+ withConf(Map("spark.sedona.crs.geotools" -> "all")) {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 'EPSG:4326', 'EPSG:3857', false)")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ // GeoTools should produce similar results
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+ }
+
+ describe("ST_Transform edge cases") {
+
+ it("should handle same CRS transformation") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
'EPSG:4326', 'EPSG:4326')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(4326, result.getSRID)
+ assertEquals(-122.4194, result.getCoordinate.x, 1e-6)
+ assertEquals(37.7749, result.getCoordinate.y, 1e-6)
+ }
+
+ it("should handle empty geometry") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT EMPTY'), 'EPSG:4326',
'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertTrue(result.isEmpty)
+ }
+
+ it("should handle null geometry") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(null, 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .get(0)
+
+ assertTrue(result == null)
+ }
+ }
+
+ // ==================== OSTN15 British National Grid Tests
====================
+
+ /**
+ * Official OSTN15 test data from Ordnance Survey. 40 test points covering
all of Great Britain
+ * from Cornwall to Shetland.
+ *
+ * Data source: OSTN15_TestInput_ETRStoOSGB.txt and
OSTN15_TestOutput_ETRStoOSGB.txt from
+ * https://www.ordnancesurvey.co.uk/documents/resources/OSTN15-NTv2.zip
+ *
+ * Each point has:
+ * - pointId: Official test point identifier (TP01-TP40)
+ * - etrsLat: ETRS89 latitude in degrees
+ * - etrsLon: ETRS89 longitude in degrees
+ */
+ case class OSTN15TestPoint(pointId: String, etrsLat: Double, etrsLon: Double)
+
+ // All 40 official OSTN15 test points from OSTN15_TestInput_ETRStoOSGB.txt
+ private val ostn15TestPoints: Seq[OSTN15TestPoint] = Seq(
+ OSTN15TestPoint("TP01", 49.92226393730, -6.29977752014),
+ OSTN15TestPoint("TP02", 49.96006137820, -5.20304609998),
+ OSTN15TestPoint("TP03", 50.43885825610, -4.10864563561),
+ OSTN15TestPoint("TP04", 50.57563665000, -1.29782277240),
+ OSTN15TestPoint("TP05", 50.93127937910, -1.45051433700),
+ OSTN15TestPoint("TP06", 51.40078220140, -3.55128349240),
+ OSTN15TestPoint("TP07", 51.37447025550, 1.44454730409),
+ OSTN15TestPoint("TP08", 51.42754743020, -2.54407618349),
+ OSTN15TestPoint("TP09", 51.48936564950, -0.11992557180),
+ OSTN15TestPoint("TP10", 51.85890896400, -4.30852476960),
+ OSTN15TestPoint("TP11", 51.89436637350, 0.89724327012),
+ OSTN15TestPoint("TP12", 52.25529381630, -2.15458614387),
+ OSTN15TestPoint("TP13", 52.25160951230, -0.91248956970),
+ OSTN15TestPoint("TP14", 52.75136687170, 0.40153547065),
+ OSTN15TestPoint("TP15", 52.96219109410, -1.19747655922),
+ OSTN15TestPoint("TP16", 53.34480280190, -2.64049320810),
+ OSTN15TestPoint("TP17", 53.41628516040, -4.28918069756),
+ OSTN15TestPoint("TP18", 53.41630925420, -4.28917792869),
+ OSTN15TestPoint("TP19", 53.77911025760, -3.04045490691),
+ OSTN15TestPoint("TP20", 53.80021519630, -1.66379168242),
+ OSTN15TestPoint("TP21", 54.08666318080, -4.63452168212),
+ OSTN15TestPoint("TP22", 54.11685144290, -0.07773133187),
+ OSTN15TestPoint("TP23", 54.32919541010, -4.38849118133),
+ OSTN15TestPoint("TP24", 54.89542340420, -2.93827741149),
+ OSTN15TestPoint("TP25", 54.97912273660, -1.61657685184),
+ OSTN15TestPoint("TP26", 55.85399952950, -4.29649016251),
+ OSTN15TestPoint("TP27", 55.92478265510, -3.29479219337),
+ OSTN15TestPoint("TP28", 57.00606696050, -5.82836691850),
+ OSTN15TestPoint("TP29", 57.13902518960, -2.04856030746),
+ OSTN15TestPoint("TP30", 57.48625000720, -4.21926398555),
+ OSTN15TestPoint("TP31", 57.81351838410, -8.57854456076),
+ OSTN15TestPoint("TP32", 58.21262247180, -7.59255560556),
+ OSTN15TestPoint("TP33", 58.51560361300, -6.26091455533),
+ OSTN15TestPoint("TP34", 58.58120461280, -3.72631022121),
+ OSTN15TestPoint("TP35", 59.03743871190, -3.21454001115),
+ OSTN15TestPoint("TP36", 59.09335035320, -4.41757674598),
+ OSTN15TestPoint("TP37", 59.09671617400, -5.82799339844),
+ OSTN15TestPoint("TP38", 59.53470794490, -1.62516966058),
+ OSTN15TestPoint("TP39", 59.85409913890, -1.27486910356),
+ OSTN15TestPoint("TP40", 60.13308091660, -2.07382822798))
+
+ // Grid shift should produce small but non-zero changes (< 0.01 degrees)
+ private val GRID_SHIFT_TOLERANCE = 0.01
+ // Round-trip tolerance (should return to original within ~1e-6 degrees)
+ private val ROUND_TRIP_TOLERANCE = 1e-6
+
+ // Remote URL for grid file (hosted on GitHub)
+ private val OSTN15_ETRS_TO_OSGB_URL =
+
"https://raw.githubusercontent.com/jiayuasu/grid-files/main/us_os/OSTN15-NTv2/OSTN15_NTv2_ETRStoOSGB.gsb"
+
+ describe("ST_Transform with OSTN15 grid files - 40 official test points") {
+ import org.datasyslab.proj4sedona.Proj4
+ import org.datasyslab.proj4sedona.grid.NadgridRegistry
+
+ it("should transform all 40 points ETRS89 to OSGB36 with timing and cache
metrics") {
+ val gridFile =
getClass.getClassLoader.getResource("grids/uk_os_OSTN15_NTv2_ETRStoOSGB.gsb")
+ assume(gridFile != null, "OSTN15 ETRStoOSGB grid file not found")
+
+ val gridPath = gridFile.getPath
Review Comment:
`URL.getPath` can return URL-encoded paths (spaces as `%20`, etc.) and can
behave inconsistently across platforms (notably Windows). To avoid intermittent
“file not found” issues, prefer converting via `toURI` and using
`Paths.get(gridFile.toURI)` (or an equivalent safe decoding) before passing the
path into `+nadgrids=...`.
##########
spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala:
##########
@@ -0,0 +1,858 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.sql
+
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.sedona_sql.expressions.st_functions._
+import org.junit.Assert.{assertEquals, assertNotNull, assertTrue}
+import org.locationtech.jts.geom.Geometry
+
+/**
+ * Tests for ST_Transform using proj4sedona backend.
+ *
+ * These tests verify CRS transformations with various input formats:
+ * - EPSG codes
+ * - PROJ strings
+ * - WKT1 and WKT2
+ * - PROJJSON
+ * - NAD grid files
+ *
+ * Tests also verify config switching between proj4sedona and GeoTools.
+ */
+class CRSTransformProj4Test extends TestBaseScala {
+
+ private val COORD_TOLERANCE = 1.0 // 1 meter tolerance for projected
coordinates
+
+ describe("ST_Transform with proj4sedona (default mode)") {
+
+ it("should transform EPSG:4326 to EPSG:3857 using SQL API") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ // Validated against cs2cs: -13627665.27, 4547675.35
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using geometry SRID as source (2-arg version)") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 4326), 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform to UTM Zone 10N") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
'EPSG:4326', 'EPSG:32610')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(32610, result.getSRID)
+ // Validated against cs2cs: 551130.77, 4180998.88
+ assertEquals(551130.77, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4180998.88, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using PROJ string") {
+ val projString =
+ "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0
+k=1 +units=m +no_defs"
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 4326),
+ 'EPSG:4326',
+ '$projString')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ // Web Mercator coordinates
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using PROJ string for UTM") {
+ val projString = "+proj=utm +zone=10 +datum=WGS84 +units=m +no_defs"
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
+ '+proj=longlat +datum=WGS84 +no_defs',
+ '$projString')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ // UTM Zone 10N coordinates
+ assertTrue(result.getCoordinate.x > 540000 && result.getCoordinate.x <
560000)
+ assertTrue(result.getCoordinate.y > 4170000 && result.getCoordinate.y <
4190000)
+ }
+
+ it("should transform using WKT1") {
+ val sourceWkt =
+ """GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS
84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]"""
+ val targetWkt =
+ """PROJCS["WGS 84 / UTM zone 51N",GEOGCS["WGS
84",DATUM["WGS_1984",SPHEROID["WGS
84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",123],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1]]"""
+
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT (120 60)'),
+ '$sourceWkt',
+ '$targetWkt')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ // Validated against cs2cs: 332705.18, 6655205.48
+ assertEquals(332705.18, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(6655205.48, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using WKT2") {
+ val wkt2 =
+ """GEOGCRS["WGS 84",DATUM["World Geodetic System 1984",ELLIPSOID["WGS
84",6378137,298.257223563]],CS[ellipsoidal,2],AXIS["latitude",north],AXIS["longitude",east],UNIT["degree",0.0174532925199433]]"""
+
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
+ '$wkt2',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using PROJJSON") {
+ val projJson =
+ """{"type":"GeographicCRS","name":"WGS
84","datum":{"type":"GeodeticReferenceFrame","name":"World Geodetic System
1984","ellipsoid":{"name":"WGS
84","semi_major_axis":6378137,"inverse_flattening":298.257223563}}}"""
+
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
+ '$projJson',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+
+ it("should transform with NAD grid file") {
+ val gridFile =
getClass.getClassLoader.getResource("grids/us_noaa_conus.tif")
+ if (gridFile != null) {
+ val gridPath = gridFile.getPath
+ val projString = s"+proj=longlat +ellps=GRS80 +nadgrids=$gridPath
+no_defs"
+
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_SetSRID(ST_GeomFromWKT('POINT (-96.0 40.0)'), 4326),
+ 'EPSG:4326',
+ '$projString')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertTrue(result.isValid)
+ }
+ }
+
+ it("should handle round-trip transformation") {
+ val result = sparkSession
+ .sql("""SELECT ST_Transform(
+ ST_Transform(
+ ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
+ 'EPSG:4326',
+ 'EPSG:3857'),
+ 'EPSG:3857',
+ 'EPSG:4326')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(4326, result.getSRID)
+ // Should return close to original coordinates
+ assertEquals(-122.4194, result.getCoordinate.x, 1e-6)
+ assertEquals(37.7749, result.getCoordinate.y, 1e-6)
+ }
+
+ it("should preserve UserData") {
+ // Create a geometry with UserData
+ val df = sparkSession
+ .sql("SELECT ST_GeomFromWKT('POINT (-122.4194 37.7749)') as geom")
+ .selectExpr("ST_Transform(geom, 'EPSG:4326', 'EPSG:3857') as
transformed")
+
+ val result = df.first().getAs[Geometry](0)
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ }
+ }
+
+ describe("ST_Transform with DataFrame API") {
+
+ it("should transform using 2-arg DataFrame API") {
+ import sparkSession.implicits._
+ val df = sparkSession
+ .sql("SELECT ST_Point(-122.4194, 37.7749) AS geom")
+ .select(ST_SetSRID($"geom", lit(4326)).as("geom"))
+ .select(ST_Transform($"geom", lit("EPSG:3857")).as("geom"))
+
+ val result = df.first().getAs[Geometry](0)
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+
+ it("should transform using 3-arg DataFrame API") {
+ import sparkSession.implicits._
+ val df = sparkSession
+ .sql("SELECT ST_Point(-122.4194, 37.7749) AS geom")
+ .select(ST_Transform($"geom", lit("EPSG:4326"),
lit("EPSG:3857")).as("geom"))
+
+ val result = df.first().getAs[Geometry](0)
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+
+ it("should transform using PROJ string with DataFrame API") {
+ import sparkSession.implicits._
+ val projString =
+ "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0
+k=1 +units=m +no_defs"
+ val df = sparkSession
+ .sql("SELECT ST_Point(-122.4194, 37.7749) AS geom")
+ .select(ST_SetSRID($"geom", lit(4326)).as("geom"))
+ .select(ST_Transform($"geom", lit(projString)).as("geom"))
+
+ val result = df.first().getAs[Geometry](0)
+ assertNotNull(result)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should transform using PROJJSON with DataFrame API") {
+ import sparkSession.implicits._
+ val projJson =
+ """{"type":"GeographicCRS","name":"WGS
84","datum":{"type":"GeodeticReferenceFrame","name":"World Geodetic System
1984","ellipsoid":{"name":"WGS
84","semi_major_axis":6378137,"inverse_flattening":298.257223563}}}"""
+
+ // Use createDataFrame to avoid constant folding issues with all-literal
expressions
+ val pointDf = Seq(("POINT (-122.4194 37.7749)"))
+ .toDF("wkt")
+ .selectExpr("ST_GeomFromWKT(wkt) as geom")
+
+ val df = pointDf.select(ST_Transform($"geom", lit(projJson),
lit("EPSG:3857")).as("geom"))
+
+ val result = df.first().getAs[Geometry](0)
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+
+ describe("ST_Transform with different geometry types") {
+
+ it("should transform LineString") {
+ val result = sparkSession
+ .sql("""SELECT ST_Transform(
+ ST_GeomFromWKT('LINESTRING(-122.4 37.7, -122.5 37.8, -122.6
37.9)'),
+ 'EPSG:4326',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals("LineString", result.getGeometryType)
+ assertEquals(3, result.getNumPoints)
+ assertEquals(3857, result.getSRID)
+ }
+
+ it("should transform Polygon") {
+ val result = sparkSession
+ .sql("""SELECT ST_Transform(
+ ST_GeomFromWKT('POLYGON((-122.5 37.7, -122.3 37.7, -122.3 37.9,
-122.5 37.9, -122.5 37.7))'),
+ 'EPSG:4326',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals("Polygon", result.getGeometryType)
+ assertEquals(3857, result.getSRID)
+ }
+
+ it("should transform MultiPoint") {
+ val result = sparkSession
+ .sql("""SELECT ST_Transform(
+ ST_GeomFromWKT('MULTIPOINT((-122.4 37.7), (-122.5 37.8), (-122.6
37.9))'),
+ 'EPSG:4326',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals("MultiPoint", result.getGeometryType)
+ assertEquals(3, result.getNumGeometries)
+ assertEquals(3857, result.getSRID)
+ }
+
+ it("should transform GeometryCollection") {
+ val result = sparkSession
+ .sql("""SELECT ST_Transform(
+ ST_GeomFromWKT('GEOMETRYCOLLECTION(POINT(-122.4 37.7),
LINESTRING(-122.5 37.8, -122.6 37.9))'),
+ 'EPSG:4326',
+ 'EPSG:3857')""")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals("GeometryCollection", result.getGeometryType)
+ assertEquals(2, result.getNumGeometries)
+ assertEquals(3857, result.getSRID)
+ }
+ }
+
+ describe("ST_Transform config switching") {
+
+ it("should use proj4sedona when config is 'none'") {
+ withConf(Map("spark.sedona.crs.geotools" -> "none")) {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+
+ it("should use proj4sedona when config is 'raster' (default)") {
+ withConf(Map("spark.sedona.crs.geotools" -> "raster")) {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+
+ it("should use GeoTools when config is 'all'") {
+ withConf(Map("spark.sedona.crs.geotools" -> "all")) {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ // GeoTools should produce similar results (within tolerance)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+
+ it("should use proj4sedona for 4-arg version by default (lenient
ignored)") {
+ // 4-arg version should use proj4sedona by default, ignoring lenient
parameter
+ val resultLenientTrue = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
'EPSG:4326', 'EPSG:3857', true)")
+ .first()
+ .getAs[Geometry](0)
+
+ val resultLenientFalse = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
'EPSG:4326', 'EPSG:3857', false)")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(resultLenientTrue)
+ assertNotNull(resultLenientFalse)
+ // Both should produce same results (lenient is ignored)
+ assertEquals(resultLenientTrue.getCoordinate.x,
resultLenientFalse.getCoordinate.x, 1e-9)
+ assertEquals(resultLenientTrue.getCoordinate.y,
resultLenientFalse.getCoordinate.y, 1e-9)
+ // Should produce correct Web Mercator coordinates
+ assertEquals(-13627665.27, resultLenientTrue.getCoordinate.x,
COORD_TOLERANCE)
+ assertEquals(4547675.35, resultLenientTrue.getCoordinate.y,
COORD_TOLERANCE)
+ }
+
+ it("should use proj4sedona for 4-arg version when config is 'none'") {
+ withConf(Map("spark.sedona.crs.geotools" -> "none")) {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 'EPSG:4326', 'EPSG:3857', false)")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+
+ it("should use GeoTools for 4-arg version when config is 'all'") {
+ withConf(Map("spark.sedona.crs.geotools" -> "all")) {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 'EPSG:4326', 'EPSG:3857', false)")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ // GeoTools should produce similar results
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ }
+ }
+ }
+
+ describe("ST_Transform edge cases") {
+
+ it("should handle same CRS transformation") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT (-122.4194 37.7749)'),
'EPSG:4326', 'EPSG:4326')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(4326, result.getSRID)
+ assertEquals(-122.4194, result.getCoordinate.x, 1e-6)
+ assertEquals(37.7749, result.getCoordinate.y, 1e-6)
+ }
+
+ it("should handle empty geometry") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_GeomFromWKT('POINT EMPTY'), 'EPSG:4326',
'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertTrue(result.isEmpty)
+ }
+
+ it("should handle null geometry") {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(null, 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .get(0)
+
+ assertTrue(result == null)
+ }
+ }
+
+ // ==================== OSTN15 British National Grid Tests
====================
+
+ /**
+ * Official OSTN15 test data from Ordnance Survey. 40 test points covering
all of Great Britain
+ * from Cornwall to Shetland.
+ *
+ * Data source: OSTN15_TestInput_ETRStoOSGB.txt and
OSTN15_TestOutput_ETRStoOSGB.txt from
+ * https://www.ordnancesurvey.co.uk/documents/resources/OSTN15-NTv2.zip
+ *
+ * Each point has:
+ * - pointId: Official test point identifier (TP01-TP40)
+ * - etrsLat: ETRS89 latitude in degrees
+ * - etrsLon: ETRS89 longitude in degrees
+ */
+ case class OSTN15TestPoint(pointId: String, etrsLat: Double, etrsLon: Double)
+
+ // All 40 official OSTN15 test points from OSTN15_TestInput_ETRStoOSGB.txt
+ private val ostn15TestPoints: Seq[OSTN15TestPoint] = Seq(
+ OSTN15TestPoint("TP01", 49.92226393730, -6.29977752014),
+ OSTN15TestPoint("TP02", 49.96006137820, -5.20304609998),
+ OSTN15TestPoint("TP03", 50.43885825610, -4.10864563561),
+ OSTN15TestPoint("TP04", 50.57563665000, -1.29782277240),
+ OSTN15TestPoint("TP05", 50.93127937910, -1.45051433700),
+ OSTN15TestPoint("TP06", 51.40078220140, -3.55128349240),
+ OSTN15TestPoint("TP07", 51.37447025550, 1.44454730409),
+ OSTN15TestPoint("TP08", 51.42754743020, -2.54407618349),
+ OSTN15TestPoint("TP09", 51.48936564950, -0.11992557180),
+ OSTN15TestPoint("TP10", 51.85890896400, -4.30852476960),
+ OSTN15TestPoint("TP11", 51.89436637350, 0.89724327012),
+ OSTN15TestPoint("TP12", 52.25529381630, -2.15458614387),
+ OSTN15TestPoint("TP13", 52.25160951230, -0.91248956970),
+ OSTN15TestPoint("TP14", 52.75136687170, 0.40153547065),
+ OSTN15TestPoint("TP15", 52.96219109410, -1.19747655922),
+ OSTN15TestPoint("TP16", 53.34480280190, -2.64049320810),
+ OSTN15TestPoint("TP17", 53.41628516040, -4.28918069756),
+ OSTN15TestPoint("TP18", 53.41630925420, -4.28917792869),
+ OSTN15TestPoint("TP19", 53.77911025760, -3.04045490691),
+ OSTN15TestPoint("TP20", 53.80021519630, -1.66379168242),
+ OSTN15TestPoint("TP21", 54.08666318080, -4.63452168212),
+ OSTN15TestPoint("TP22", 54.11685144290, -0.07773133187),
+ OSTN15TestPoint("TP23", 54.32919541010, -4.38849118133),
+ OSTN15TestPoint("TP24", 54.89542340420, -2.93827741149),
+ OSTN15TestPoint("TP25", 54.97912273660, -1.61657685184),
+ OSTN15TestPoint("TP26", 55.85399952950, -4.29649016251),
+ OSTN15TestPoint("TP27", 55.92478265510, -3.29479219337),
+ OSTN15TestPoint("TP28", 57.00606696050, -5.82836691850),
+ OSTN15TestPoint("TP29", 57.13902518960, -2.04856030746),
+ OSTN15TestPoint("TP30", 57.48625000720, -4.21926398555),
+ OSTN15TestPoint("TP31", 57.81351838410, -8.57854456076),
+ OSTN15TestPoint("TP32", 58.21262247180, -7.59255560556),
+ OSTN15TestPoint("TP33", 58.51560361300, -6.26091455533),
+ OSTN15TestPoint("TP34", 58.58120461280, -3.72631022121),
+ OSTN15TestPoint("TP35", 59.03743871190, -3.21454001115),
+ OSTN15TestPoint("TP36", 59.09335035320, -4.41757674598),
+ OSTN15TestPoint("TP37", 59.09671617400, -5.82799339844),
+ OSTN15TestPoint("TP38", 59.53470794490, -1.62516966058),
+ OSTN15TestPoint("TP39", 59.85409913890, -1.27486910356),
+ OSTN15TestPoint("TP40", 60.13308091660, -2.07382822798))
+
+ // Grid shift should produce small but non-zero changes (< 0.01 degrees)
+ private val GRID_SHIFT_TOLERANCE = 0.01
+ // Round-trip tolerance (should return to original within ~1e-6 degrees)
+ private val ROUND_TRIP_TOLERANCE = 1e-6
+
+ // Remote URL for grid file (hosted on GitHub)
+ private val OSTN15_ETRS_TO_OSGB_URL =
+
"https://raw.githubusercontent.com/jiayuasu/grid-files/main/us_os/OSTN15-NTv2/OSTN15_NTv2_ETRStoOSGB.gsb"
+
+ describe("ST_Transform with OSTN15 grid files - 40 official test points") {
+ import org.datasyslab.proj4sedona.Proj4
+ import org.datasyslab.proj4sedona.grid.NadgridRegistry
+
+ it("should transform all 40 points ETRS89 to OSGB36 with timing and cache
metrics") {
+ val gridFile =
getClass.getClassLoader.getResource("grids/uk_os_OSTN15_NTv2_ETRStoOSGB.gsb")
+ assume(gridFile != null, "OSTN15 ETRStoOSGB grid file not found")
+
+ val gridPath = gridFile.getPath
+ val etrs89WithGrid = s"+proj=longlat +ellps=GRS80 +nadgrids=$gridPath"
+ val osgb36 = "+proj=longlat +ellps=airy"
+
+ // Clear caches to measure cold start
+ Proj4.clearCache()
+ NadgridRegistry.clear()
+
+ println("\n" + "=" * 70)
+ println("OSTN15 ETRS89 -> OSGB36: 40 Official Test Points")
+ println("=" * 70)
+
+ // First transformation (cold - loads grid file)
+ val startCold = System.nanoTime()
+ val firstResult = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT (${ostn15TestPoints.head.etrsLon}
${ostn15TestPoints.head.etrsLat})'),
+ '$etrs89WithGrid',
+ '$osgb36')""")
+ .first()
+ .getAs[Geometry](0)
+ val coldTime = (System.nanoTime() - startCold) / 1e6
+
+ println(f"First transform (cold, loads ~15MB grid): $coldTime%.2f ms")
+ println(f" Proj cache size: ${Proj4.getCacheSize}")
+ println(f" Grid cache size: ${NadgridRegistry.size()}")
+
+ // Transform remaining 39 points (warm - cached)
+ val startWarm = System.nanoTime()
+ var successCount = 1 // Already did first one
+ var failCount = 0
+
+ ostn15TestPoints.tail.foreach { tp =>
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT (${tp.etrsLon} ${tp.etrsLat})'),
+ '$etrs89WithGrid',
+ '$osgb36')""")
+ .first()
+ .getAs[Geometry](0)
+
+ if (result != null) {
+ val xShift = Math.abs(result.getCoordinate.x - tp.etrsLon)
+ val yShift = Math.abs(result.getCoordinate.y - tp.etrsLat)
+
+ if (xShift < GRID_SHIFT_TOLERANCE && yShift < GRID_SHIFT_TOLERANCE &&
+ xShift > 1e-9 && yShift > 1e-9) {
+ successCount += 1
+ } else {
+ failCount += 1
+ println(s" ${tp.pointId}: Unexpected shift - X: $xShift, Y:
$yShift")
+ }
+ } else {
+ failCount += 1
+ println(s" ${tp.pointId}: Result is null")
+ }
+ }
+ val warmTime = (System.nanoTime() - startWarm) / 1e6
+ val avgWarmTime = warmTime / 39
+
+ println(f"\nRemaining 39 transforms (warm, cached): $warmTime%.2f ms
total")
+ println(f" Average per transform: $avgWarmTime%.2f ms")
+ println(f" Cache speedup: ${coldTime / avgWarmTime}%.1fx")
+ println(f"\nResults: $successCount/40 passed, $failCount failed")
+
+ // Verify first result
+ assertNotNull("First result should not be null", firstResult)
+ val xShift = Math.abs(firstResult.getCoordinate.x -
ostn15TestPoints.head.etrsLon)
+ val yShift = Math.abs(firstResult.getCoordinate.y -
ostn15TestPoints.head.etrsLat)
+ assertTrue(
+ s"X shift ($xShift) should be < $GRID_SHIFT_TOLERANCE",
+ xShift < GRID_SHIFT_TOLERANCE)
+ assertTrue(
+ s"Y shift ($yShift) should be < $GRID_SHIFT_TOLERANCE",
+ yShift < GRID_SHIFT_TOLERANCE)
+ assertTrue(s"X shift ($xShift) should be non-zero", xShift > 1e-9)
+ assertTrue(s"Y shift ($yShift) should be non-zero", yShift > 1e-9)
+
+ assertEquals("All 40 points should transform successfully", 40,
successCount)
+ }
+
+ it("should transform all 40 points OSGB36 to ETRS89 with timing and cache
metrics") {
+ val gridFile =
getClass.getClassLoader.getResource("grids/uk_os_OSTN15_NTv2_OSGBtoETRS.gsb")
+ assume(gridFile != null, "OSTN15 OSGBtoETRS grid file not found")
+
+ val gridPath = gridFile.getPath
+ val osgb36WithGrid = s"+proj=longlat +ellps=airy +nadgrids=$gridPath"
+ val etrs89 = "+proj=longlat +ellps=GRS80"
+
+ // First, get OSGB36 coordinates by transforming ETRS89 -> OSGB36
+ val etrsToOsgbFile =
+
getClass.getClassLoader.getResource("grids/uk_os_OSTN15_NTv2_ETRStoOSGB.gsb")
+ assume(etrsToOsgbFile != null, "OSTN15 ETRStoOSGB grid file not found")
+ val etrs89WithGrid = s"+proj=longlat +ellps=GRS80
+nadgrids=${etrsToOsgbFile.getPath}"
+ val osgb36 = "+proj=longlat +ellps=airy"
+
+ // Clear caches
+ Proj4.clearCache()
+ NadgridRegistry.clear()
+
+ println("\n" + "=" * 70)
+ println("OSTN15 OSGB36 -> ETRS89: 40 Official Test Points")
+ println("=" * 70)
+
+ // Pre-compute OSGB36 coordinates (this also warms up ETRS->OSGB cache)
+ val osgb36Coords = ostn15TestPoints.map { tp =>
+ val osgbResult = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT (${tp.etrsLon} ${tp.etrsLat})'),
+ '$etrs89WithGrid',
+ '$osgb36')""")
+ .first()
+ .getAs[Geometry](0)
+ (
+ tp.pointId,
+ osgbResult.getCoordinate.x,
+ osgbResult.getCoordinate.y,
+ tp.etrsLon,
+ tp.etrsLat)
+ }
+
+ // Clear only the reverse direction cache to measure cold start for
OSGB->ETRS
+ // (Grid files are still cached in NadgridRegistry)
+ println(s"Grid cache size before OSGB->ETRS: ${NadgridRegistry.size()}")
+
+ // First transformation (semi-cold - grid file cached but new CRS pair)
+ val startFirst = System.nanoTime()
+ val (_, firstOsgbLon, firstOsgbLat, _, _) = osgb36Coords.head
+ val firstResult = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT ($firstOsgbLon $firstOsgbLat)'),
+ '$osgb36WithGrid',
+ '$etrs89')""")
+ .first()
+ .getAs[Geometry](0)
+ val firstTime = (System.nanoTime() - startFirst) / 1e6
+
+ println(f"First transform (grid cached, new CRS): $firstTime%.2f ms")
+
+ // Transform remaining 39 points
+ val startRest = System.nanoTime()
+ var successCount = 1
+ var failCount = 0
+
+ osgb36Coords.tail.foreach { case (pointId, osgbLon, osgbLat,
origEtrsLon, origEtrsLat) =>
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_GeomFromWKT('POINT ($osgbLon $osgbLat)'),
+ '$osgb36WithGrid',
+ '$etrs89')""")
+ .first()
+ .getAs[Geometry](0)
+
+ if (result != null) {
+ val xShift = Math.abs(result.getCoordinate.x - osgbLon)
+ val yShift = Math.abs(result.getCoordinate.y - osgbLat)
+
+ if (xShift < GRID_SHIFT_TOLERANCE && yShift < GRID_SHIFT_TOLERANCE &&
+ xShift > 1e-9 && yShift > 1e-9) {
+ successCount += 1
+ } else {
+ failCount += 1
+ println(s" $pointId: Unexpected shift - X: $xShift, Y: $yShift")
+ }
+ } else {
+ failCount += 1
+ println(s" $pointId: Result is null")
+ }
+ }
+ val restTime = (System.nanoTime() - startRest) / 1e6
+ val avgTime = restTime / 39
+
+ println(f"\nRemaining 39 transforms (fully cached): $restTime%.2f ms
total")
+ println(f" Average per transform: $avgTime%.2f ms")
+ println(f" Speedup vs first: ${firstTime / avgTime}%.1fx")
+ println(f"\nResults: $successCount/40 passed, $failCount failed")
+
+ assertNotNull("First result should not be null", firstResult)
+ assertEquals("All 40 points should transform successfully", 40,
successCount)
+ }
+
+ it("should round-trip all 40 points ETRS89 -> OSGB36 -> ETRS89") {
+ val etrsToOsgbFile =
+
getClass.getClassLoader.getResource("grids/uk_os_OSTN15_NTv2_ETRStoOSGB.gsb")
+ val osgbToEtrsFile =
+
getClass.getClassLoader.getResource("grids/uk_os_OSTN15_NTv2_OSGBtoETRS.gsb")
+ assume(etrsToOsgbFile != null && osgbToEtrsFile != null, "OSTN15 grid
files not found")
+
+ val etrs89WithGrid = s"+proj=longlat +ellps=GRS80
+nadgrids=${etrsToOsgbFile.getPath}"
+ val osgb36 = "+proj=longlat +ellps=airy"
+ val osgb36WithGrid = s"+proj=longlat +ellps=airy
+nadgrids=${osgbToEtrsFile.getPath}"
+ val etrs89 = "+proj=longlat +ellps=GRS80"
+
+ println("\n" + "=" * 70)
+ println("OSTN15 Round-trip: ETRS89 -> OSGB36 -> ETRS89 (40 points)")
+ println("=" * 70)
+
+ val startTotal = System.nanoTime()
+ var successCount = 0
+ var maxError = 0.0
+ var worstPoint = ""
+
+ ostn15TestPoints.foreach { tp =>
+ val result = sparkSession
+ .sql(s"""SELECT ST_Transform(
+ ST_Transform(
+ ST_GeomFromWKT('POINT (${tp.etrsLon} ${tp.etrsLat})'),
+ '$etrs89WithGrid',
+ '$osgb36'),
+ '$osgb36WithGrid',
+ '$etrs89')""")
+ .first()
+ .getAs[Geometry](0)
+
+ if (result != null) {
+ val xError = Math.abs(result.getCoordinate.x - tp.etrsLon)
+ val yError = Math.abs(result.getCoordinate.y - tp.etrsLat)
+ val totalError = Math.sqrt(xError * xError + yError * yError)
+
+ if (xError < ROUND_TRIP_TOLERANCE && yError < ROUND_TRIP_TOLERANCE) {
+ successCount += 1
+ } else {
+ println(f" ${tp.pointId}: Round-trip error - X: $xError%.9f, Y:
$yError%.9f")
+ }
+
+ if (totalError > maxError) {
+ maxError = totalError
+ worstPoint = tp.pointId
+ }
+ }
+ }
+ val totalTime = (System.nanoTime() - startTotal) / 1e6
+
+ println(f"\nTotal time for 40 round-trips: $totalTime%.2f ms")
+ println(f" Average per round-trip: ${totalTime / 40}%.2f ms")
+ println(f" Max error: $maxError%.9f degrees (at $worstPoint)")
+ println(f"\nResults: $successCount/40 passed within tolerance
($ROUND_TRIP_TOLERANCE deg)")
+
+ assertEquals("All 40 points should round-trip successfully", 40,
successCount)
+ }
+
+ it("should transform all 40 points using remote grid file with download
timing") {
+ // Clear caches to force fresh download
+ Proj4.clearCache()
+ NadgridRegistry.clear()
+
+ val etrs89WithGrid = s"+proj=longlat +ellps=GRS80
+nadgrids=$OSTN15_ETRS_TO_OSGB_URL"
Review Comment:
This test performs a real remote download from `raw.githubusercontent.com`,
which makes the test suite non-deterministic (network outages, rate limits, CI
sandbox restrictions). Please gate it behind an explicit opt-in (e.g.,
`assume(sys.env.contains("SEDONA_TEST_REMOTE_GRIDS"))`), or mark it
ignored/disabled by default and run it only in a dedicated
integration/benchmark job.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]