This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch fix/upgrade-proj4sedona-url-crs-provider-2657 in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 1b11cd8709aa4cffa1b7438906136a46e452f36e Author: Jia Yu <[email protected]> AuthorDate: Wed Feb 18 00:38:40 2026 -0800 Upgrade proj4sedona to 0.0.4 and adopt UrlCRSProvider (#2657) - Bump proj4sedona.version from 0.0.3 to 0.0.4 - Add 3 new Spark configs: spark.sedona.crs.url.base, spark.sedona.crs.url.pathTemplate, spark.sedona.crs.url.format - Add registerUrlCrsProvider() in FunctionsProj4 with thread-safe idempotent registration (AtomicReference, priority 50) - Wire ST_Transform to capture URL CRS config on driver and register provider on executors via companion object readConfig() - Add tests: 6 unit tests (FunctionsProj4Test), 6 config tests (SedonaConfTest), 4 integration tests (CRSTransformProj4Test) using local HTTP server with fake EPSG:990001 --- .../org/apache/sedona/common/FunctionsProj4.java | 81 ++++++++++ .../apache/sedona/common/FunctionsProj4Test.java | 167 +++++++++++++++++++++ pom.xml | 2 +- .../org/apache/sedona/core/utils/SedonaConf.java | 45 ++++++ .../sql/sedona_sql/expressions/Functions.scala | 47 ++++-- .../apache/sedona/core/utils/SedonaConfTest.java | 53 +++++++ .../apache/sedona/sql/CRSTransformProj4Test.scala | 120 +++++++++++++++ 7 files changed, 503 insertions(+), 12 deletions(-) diff --git a/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java index b5a5c1c43e..78e0a94f53 100644 --- a/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java +++ b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java @@ -18,9 +18,14 @@ */ package org.apache.sedona.common; +import java.util.Locale; +import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.datasyslab.proj4sedona.core.Proj; +import org.datasyslab.proj4sedona.defs.CRSResult; +import org.datasyslab.proj4sedona.defs.Defs; +import org.datasyslab.proj4sedona.defs.UrlCRSProvider; import org.datasyslab.proj4sedona.jts.JTSGeometryTransformer; import org.datasyslab.proj4sedona.parser.CRSSerializer; import org.locationtech.jts.geom.Geometry; @@ -62,6 +67,82 @@ public class FunctionsProj4 { private static final Pattern EPSG_PATTERN = Pattern.compile("^EPSG:(\\d+)$", Pattern.CASE_INSENSITIVE); + /** Name used for the registered URL CRS provider. */ + private static final String URL_CRS_PROVIDER_NAME = "sedona-url-crs"; + + /** + * Tracks the currently registered URL CRS provider config (baseUrl + "|" + pathTemplate + "|" + + * format). Null means no provider registered yet. Uses AtomicReference for thread-safe lazy + * initialization on executors. + */ + private static final AtomicReference<String> registeredUrlCrsConfig = new AtomicReference<>(null); + + /** + * Register a URL-based CRS provider with proj4sedona's Defs registry. This provider will be + * consulted before the built-in provider when resolving EPSG codes. + * + * <p>This method is safe to call multiple times — it only registers once per JVM (or re-registers + * if the config changes). It is called lazily on executors before the first CRS transformation. + * + * @param baseUrl The base URL of the CRS definition server + * @param pathTemplate The URL path template (e.g., "/{authority}/{code}.json") + * @param format The expected response format: "projjson", "proj", "wkt1", or "wkt2" + */ + public static void registerUrlCrsProvider(String baseUrl, String pathTemplate, String format) { + if (baseUrl == null || baseUrl.isEmpty()) { + return; + } + + String configKey = baseUrl + "|" + pathTemplate + "|" + format; + String current = registeredUrlCrsConfig.get(); + + if (configKey.equals(current)) { + // Already registered with the same config + return; + } + + // Remove existing provider if config changed + if (current != null) { + Defs.removeProvider(URL_CRS_PROVIDER_NAME); + } + + CRSResult.Format crsFormat = parseCrsFormat(format); + + UrlCRSProvider provider = + UrlCRSProvider.builder(URL_CRS_PROVIDER_NAME) + .baseUrl(baseUrl) + .pathTemplate(pathTemplate) + .format(crsFormat) + .build(); + + // Priority 50: before built-in (100) and spatialreference.org (101) + Defs.registerProvider(provider, 50); + registeredUrlCrsConfig.set(configKey); + } + + /** + * Parse the CRS format string from config to the CRSResult.Format enum. + * + * @param format Format string: "projjson", "proj", "wkt1", or "wkt2" + * @return The corresponding CRSResult.Format + */ + private static CRSResult.Format parseCrsFormat(String format) { + if (format == null || format.isEmpty()) { + return CRSResult.Format.PROJJSON; + } + switch (format.toLowerCase(Locale.ROOT)) { + case "proj": + return CRSResult.Format.PROJ4; + case "wkt1": + return CRSResult.Format.WKT1; + case "wkt2": + return CRSResult.Format.WKT2; + case "projjson": + default: + return CRSResult.Format.PROJJSON; + } + } + /** * Transform a geometry from the source CRS specified by the geometry's SRID to the target CRS. * diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java index 903bf2f9d3..7fbedd05b0 100644 --- a/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java +++ b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java @@ -21,8 +21,12 @@ package org.apache.sedona.common; import static org.junit.Assert.*; import static org.junit.Assume.assumeTrue; +import com.sun.net.httpserver.HttpServer; +import java.net.InetSocketAddress; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.util.concurrent.atomic.AtomicInteger; import org.junit.Test; import org.locationtech.jts.geom.*; import org.locationtech.jts.io.WKTReader; @@ -573,4 +577,167 @@ public class FunctionsProj4Test extends TestBase { assertEquals(original.getCoordinate().x, backToWgs84.getCoordinate().x, 1e-9); assertEquals(original.getCoordinate().y, backToWgs84.getCoordinate().y, 1e-9); } + + // ==================== URL CRS Provider Registration Tests ==================== + + @Test + public void testRegisterUrlCrsProviderNoOpOnNullOrEmpty() { + // null and empty baseUrl should be no-ops, not throw + FunctionsProj4.registerUrlCrsProvider(null, "/epsg/{code}.json", "projjson"); + FunctionsProj4.registerUrlCrsProvider("", "/epsg/{code}.json", "projjson"); + // No provider should have been registered + assertNull("No provider should be registered for null/empty baseUrl", findUrlCrsProvider()); + } + + @Test + public void testRegisterUrlCrsProviderRegistersAndIsIdempotent() { + String testUrl = "https://test-crs-server.example.com"; + try { + FunctionsProj4.registerUrlCrsProvider(testUrl, "/epsg/{code}.json", "projjson"); + assertNotNull("sedona-url-crs provider should be registered", findUrlCrsProvider()); + int countBefore = countProvidersByName("sedona-url-crs"); + + // Second call with same config — should not add a duplicate + FunctionsProj4.registerUrlCrsProvider(testUrl, "/epsg/{code}.json", "projjson"); + assertEquals( + "Provider should not be duplicated", countBefore, countProvidersByName("sedona-url-crs")); + } finally { + org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs"); + } + } + + @Test + public void testRegisterUrlCrsProviderReRegistersOnConfigChange() { + try { + FunctionsProj4.registerUrlCrsProvider( + "https://server-a.example.com", "/epsg/{code}.json", "projjson"); + assertEquals( + org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJJSON, + findUrlCrsProvider().getFormat()); + + // Change config — should re-register with new settings + FunctionsProj4.registerUrlCrsProvider( + "https://server-b.example.com", "/epsg/{code}.json", "wkt2"); + assertEquals( + org.datasyslab.proj4sedona.defs.CRSResult.Format.WKT2, findUrlCrsProvider().getFormat()); + } finally { + org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs"); + } + } + + @Test + public void testParseCrsFormatAllMappings() { + // Verify all valid format strings map to the correct enum + Object[][] cases = { + {"projjson", org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJJSON}, + {"proj", org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJ4}, + {"wkt1", org.datasyslab.proj4sedona.defs.CRSResult.Format.WKT1}, + {"wkt2", org.datasyslab.proj4sedona.defs.CRSResult.Format.WKT2}, + }; + for (Object[] c : cases) { + try { + FunctionsProj4.registerUrlCrsProvider( + "https://test.example.com", "/epsg/{code}", (String) c[0]); + assertEquals("Format '" + c[0] + "'", c[1], findUrlCrsProvider().getFormat()); + } finally { + org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs"); + } + } + } + + @Test + public void testParseCrsFormatDefaultsAndCaseInsensitive() { + // null, empty, unknown, and uppercase should all default to / map to PROJJSON + String[] inputs = {null, "", "unknown-format", "PROJJSON", "ProjJson"}; + for (String input : inputs) { + try { + FunctionsProj4.registerUrlCrsProvider("https://test.example.com", "/epsg/{code}", input); + assertEquals( + "Format input '" + input + "' should resolve to PROJJSON", + org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJJSON, + findUrlCrsProvider().getFormat()); + } finally { + org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs"); + } + } + } + + @Test + public void testTransformWithLocalUrlCrsProvider() throws Exception { + // Serve a deliberately wrong CRS definition for a fake EPSG code (990001) + // that no built-in provider knows. The definition is a Mercator projection + // with absurd false easting/northing (+x_0=10000000 +y_0=20000000). + // If the transform succeeds with these shifted coordinates, the URL provider + // resolved the CRS. If it didn't work, the transform would fail entirely + // because no built-in provider knows EPSG:990001. + AtomicInteger requestCount = new AtomicInteger(0); + HttpServer server = HttpServer.create(new InetSocketAddress(0), 0); + int port = server.getAddress().getPort(); + + // Web Mercator with intentional 10M/20M false easting/northing + String weirdMercator = + "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0" + + " +x_0=10000000 +y_0=20000000 +k=1 +units=m +no_defs"; + + server.createContext( + "/epsg/", + exchange -> { + String path = exchange.getRequestURI().getPath(); + if (path.contains("990001")) { + requestCount.incrementAndGet(); + byte[] body = weirdMercator.getBytes(StandardCharsets.UTF_8); + exchange.sendResponseHeaders(200, body.length); + exchange.getResponseBody().write(body); + exchange.getResponseBody().close(); + } else { + // 404 for everything else — built-in providers handle known codes + exchange.sendResponseHeaders(404, -1); + exchange.getResponseBody().close(); + } + }); + server.start(); + + try { + FunctionsProj4.registerUrlCrsProvider( + "http://localhost:" + port, "/epsg/{code}.json", "proj"); + + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 37.7749)); + Geometry result = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:990001"); + + assertNotNull("Transform to fake EPSG:990001 should succeed via URL provider", result); + assertEquals(990001, result.getSRID()); + // Standard Web Mercator: x = -13627665.27, y = 4547675.35 + // Our weird definition adds +x_0=10000000, +y_0=20000000 + assertEquals(-3627665.27, result.getCoordinate().x, 1.0); + assertEquals(24547675.35, result.getCoordinate().y, 1.0); + assertTrue("Local HTTP server should have been hit", requestCount.get() > 0); + } finally { + server.stop(0); + org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs"); + } + } + + // Helper: count providers with a given name + private int countProvidersByName(String name) { + int count = 0; + for (org.datasyslab.proj4sedona.defs.CRSProvider p : + org.datasyslab.proj4sedona.defs.Defs.getProviders()) { + if (name.equals(p.getName())) { + count++; + } + } + return count; + } + + // Helper: find the registered UrlCRSProvider + private org.datasyslab.proj4sedona.defs.UrlCRSProvider findUrlCrsProvider() { + for (org.datasyslab.proj4sedona.defs.CRSProvider p : + org.datasyslab.proj4sedona.defs.Defs.getProviders()) { + if ("sedona-url-crs".equals(p.getName()) + && p instanceof org.datasyslab.proj4sedona.defs.UrlCRSProvider) { + return (org.datasyslab.proj4sedona.defs.UrlCRSProvider) p; + } + } + return null; + } } diff --git a/pom.xml b/pom.xml index 05ca1cde9c..b8025399e6 100644 --- a/pom.xml +++ b/pom.xml @@ -96,7 +96,7 @@ <scala-collection-compat.version>2.5.0</scala-collection-compat.version> <geoglib.version>1.52</geoglib.version> <caffeine.version>2.9.2</caffeine.version> - <proj4sedona.version>0.0.3</proj4sedona.version> + <proj4sedona.version>0.0.4</proj4sedona.version> <geotools.scope>provided</geotools.scope> <!-- Because it's not in Maven central, make it provided by default --> diff --git a/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java b/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java index 44b2885815..84a9a78e79 100644 --- a/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java +++ b/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java @@ -119,6 +119,11 @@ public class SedonaConf implements Serializable { // Parameter for CRS transformation mode private CRSTransformMode crsTransformMode; + // Parameters for URL-based CRS provider + private String crsUrlBase; + private String crsUrlPathTemplate; + private String crsUrlFormat; + public static SedonaConf fromActiveSession() { return new SedonaConf(SparkSession.active().conf()); } @@ -234,6 +239,14 @@ public class SedonaConf implements Serializable { // - "all": Use GeoTools for all transformations (legacy behavior) this.crsTransformMode = CRSTransformMode.fromString(confGetter.get("spark.sedona.crs.geotools", "raster")); + + // URL-based CRS provider configuration + // When spark.sedona.crs.url.base is set, a UrlCRSProvider is registered to resolve + // SRID definitions from the given HTTP(S) endpoint before falling back to built-in defs. + this.crsUrlBase = confGetter.get("spark.sedona.crs.url.base", ""); + this.crsUrlPathTemplate = + confGetter.get("spark.sedona.crs.url.pathTemplate", "/{authority}/{code}.json"); + this.crsUrlFormat = confGetter.get("spark.sedona.crs.url.format", "projjson"); } // Helper method to prioritize `sedona.*` over `spark.sedona.*` @@ -342,4 +355,36 @@ public class SedonaConf implements Serializable { public CRSTransformMode getCRSTransformMode() { return crsTransformMode; } + + /** + * Get the base URL for the URL-based CRS provider. When non-empty, a {@code UrlCRSProvider} is + * registered to resolve SRID definitions from this HTTP(S) endpoint. + * + * @return The base URL, or empty string if disabled + * @since 1.9.0 + */ + public String getCrsUrlBase() { + return crsUrlBase; + } + + /** + * Get the path template for the URL-based CRS provider. Supports placeholders: {@code + * {authority}}, {@code {AUTHORITY}}, {@code {code}}. + * + * @return The path template (default: "/{authority}/{code}.json") + * @since 1.9.0 + */ + public String getCrsUrlPathTemplate() { + return crsUrlPathTemplate; + } + + /** + * Get the expected response format for the URL-based CRS provider. + * + * @return The format string: "projjson", "proj", "wkt1", or "wkt2" (default: "projjson") + * @since 1.9.0 + */ + public String getCrsUrlFormat() { + return crsUrlFormat; + } } diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala index b5f85b8968..52d62e930d 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala @@ -307,18 +307,33 @@ private[apache] case class ST_Centroid(inputExpressions: Seq[Expression]) * @param inputExpressions * @param useGeoTools */ -private[apache] case class ST_Transform(inputExpressions: Seq[Expression], useGeoTools: Boolean) +private[apache] case class ST_Transform( + inputExpressions: Seq[Expression], + useGeoTools: Boolean, + crsUrlBase: String, + crsUrlPathTemplate: String, + crsUrlFormat: String) extends InferredExpression( inferrableFunction4(FunctionsProj4.transform), inferrableFunction3(FunctionsProj4.transform), inferrableFunction2(FunctionsProj4.transform)) { - def this(inputExpressions: Seq[Expression]) { - // We decide whether to use GeoTools based on active session config. - // SparkSession may not be available on executors, so we need to - // construct ST_Transform on driver. useGeoTools will be passed down - // to executors through object serialization/deserialization. - this(inputExpressions, ST_Transform.useGeoTools()) + private def this( + inputExpressions: Seq[Expression], + config: (Boolean, String, String, String)) = { + this(inputExpressions, config._1, config._2, config._3, config._4) + } + + def this(inputExpressions: Seq[Expression]) = { + // Read all config from SedonaConf on the driver and pass to primary constructor. + // SparkSession may not be available on executors, so config is captured here + // and serialized to executors along with the expression node. + this(inputExpressions, ST_Transform.readConfig()) + } + + // Register URL CRS provider on executor if configured (lazy, once per JVM) + if (crsUrlBase.nonEmpty) { + FunctionsProj4.registerUrlCrsProvider(crsUrlBase, crsUrlPathTemplate, crsUrlFormat) } // Define proj4sedona function overloads (2, 3, 4-arg versions) @@ -347,13 +362,23 @@ private[apache] case class ST_Transform(inputExpressions: Seq[Expression], useGe } object ST_Transform { - private def useGeoTools(): Boolean = { + + /** + * Read all ST_Transform config from SedonaConf in one call. Defaults are handled by SedonaConf + * itself. Returns safe fallbacks (proj4sedona, no URL provider) when no active session exists. + */ + private def readConfig(): (Boolean, String, String, String) = { try { - SedonaConf.fromActiveSession().getCRSTransformMode.useGeoToolsForVector() + val conf = SedonaConf.fromActiveSession() + ( + conf.getCRSTransformMode.useGeoToolsForVector(), + conf.getCrsUrlBase, + conf.getCrsUrlPathTemplate, + conf.getCrsUrlFormat) } catch { case _: Exception => - // If no active session, fall back to default (proj4sedona) - false + // No active session (e.g., during constant folding) — use safe defaults + (false, "", "", "") } } } diff --git a/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java b/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java index f90641f76a..0fb24e0235 100644 --- a/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java +++ b/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java @@ -60,4 +60,57 @@ public class SedonaConfTest { // fromSparkEnv means we don't have access to default values so sometimes we get null as input assertEquals(0, SedonaConf.bytesFromString(null)); } + + // ==================== URL CRS Provider Config Tests ==================== + + @Test + public void testCrsUrlBaseDefault() { + // Default should be empty string (disabled) + assertEquals("", SedonaConf.fromActiveSession().getCrsUrlBase()); + } + + @Test + public void testCrsUrlPathTemplateDefault() { + // Default should be "/{authority}/{code}.json" + assertEquals( + "/{authority}/{code}.json", SedonaConf.fromActiveSession().getCrsUrlPathTemplate()); + } + + @Test + public void testCrsUrlFormatDefault() { + // Default should be "projjson" + assertEquals("projjson", SedonaConf.fromActiveSession().getCrsUrlFormat()); + } + + @Test + public void testCrsUrlBaseCustom() { + SparkSession.active().conf().set("spark.sedona.crs.url.base", "https://cdn.proj.org"); + try { + assertEquals("https://cdn.proj.org", SedonaConf.fromActiveSession().getCrsUrlBase()); + } finally { + SparkSession.active().conf().set("spark.sedona.crs.url.base", ""); + } + } + + @Test + public void testCrsUrlPathTemplateCustom() { + SparkSession.active().conf().set("spark.sedona.crs.url.pathTemplate", "/{authority}/{code}"); + try { + assertEquals("/{authority}/{code}", SedonaConf.fromActiveSession().getCrsUrlPathTemplate()); + } finally { + SparkSession.active() + .conf() + .set("spark.sedona.crs.url.pathTemplate", "/{authority}/{code}.json"); + } + } + + @Test + public void testCrsUrlFormatCustom() { + SparkSession.active().conf().set("spark.sedona.crs.url.format", "wkt2"); + try { + assertEquals("wkt2", SedonaConf.fromActiveSession().getCrsUrlFormat()); + } finally { + SparkSession.active().conf().set("spark.sedona.crs.url.format", "projjson"); + } + } } diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala b/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala index 73b0ae55bc..25576bb9b3 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala @@ -18,6 +18,9 @@ */ package org.apache.sedona.sql +import com.sun.net.httpserver.HttpServer +import java.net.InetSocketAddress +import java.util.concurrent.atomic.AtomicInteger import org.apache.spark.sql.functions.lit import org.apache.spark.sql.sedona_sql.expressions.st_functions._ import org.junit.Assert.{assertEquals, assertNotNull, assertTrue} @@ -855,4 +858,121 @@ class CRSTransformProj4Test extends TestBaseScala { assertEquals("All 40 points should transform successfully", 40, successCount) } } + + describe("URL CRS Provider config integration") { + + it("should still transform correctly when URL provider is not configured") { + // Verify default behavior (no URL provider) still works + sparkSession.conf.set("spark.sedona.crs.url.base", "") + val result = sparkSession + .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + assertEquals(3857, result.getSRID) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE) + } + + it("should fall back to built-in when URL provider returns nothing") { + // Point to a non-existent server — provider will fail, should fall back to built-in + sparkSession.conf.set("spark.sedona.crs.url.base", "http://127.0.0.1:1") + sparkSession.conf.set("spark.sedona.crs.url.pathTemplate", "/epsg/{code}.json") + sparkSession.conf.set("spark.sedona.crs.url.format", "projjson") + try { + val result = sparkSession + .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')") + .first() + .getAs[Geometry](0) + + // Should succeed via built-in fallback + assertNotNull(result) + assertEquals(3857, result.getSRID) + assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE) + assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE) + } finally { + sparkSession.conf.set("spark.sedona.crs.url.base", "") + org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs") + } + } + + it("should register URL CRS provider when config is set") { + sparkSession.conf.set("spark.sedona.crs.url.base", "https://test.example.com") + sparkSession.conf.set("spark.sedona.crs.url.pathTemplate", "/epsg/{code}.json") + sparkSession.conf.set("spark.sedona.crs.url.format", "projjson") + try { + // Force a transform to trigger provider registration + val result = sparkSession + .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')") + .first() + .getAs[Geometry](0) + + assertNotNull(result) + + // Verify provider was registered + val providers = org.datasyslab.proj4sedona.defs.Defs.getProviders + val found = providers.stream().anyMatch(p => p.getName == "sedona-url-crs") + assertTrue("sedona-url-crs provider should be registered", found) + } finally { + sparkSession.conf.set("spark.sedona.crs.url.base", "") + org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs") + } + } + + it("should transform using local HTTP URL CRS provider with custom CRS") { + // Serve a deliberately wrong CRS definition for fake EPSG:990001 that no + // built-in provider knows. Uses Mercator with absurd false easting/northing. + // If the transform succeeds with shifted coordinates, the URL provider was used. + // If the URL provider didn't work, the transform would fail entirely. + val requestCount = new AtomicInteger(0) + val server = HttpServer.create(new InetSocketAddress(0), 0) + val port = server.getAddress.getPort + + // Web Mercator with intentional 10M/20M false easting/northing + val weirdMercator = + "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0" + + " +x_0=10000000 +y_0=20000000 +k=1 +units=m +no_defs" + + server.createContext( + "/epsg/", + exchange => { + val path = exchange.getRequestURI.getPath + if (path.contains("990001")) { + requestCount.incrementAndGet() + val body = weirdMercator.getBytes("UTF-8") + exchange.sendResponseHeaders(200, body.length) + exchange.getResponseBody.write(body) + exchange.getResponseBody.close() + } else { + // 404 for everything else — built-in providers handle known codes + exchange.sendResponseHeaders(404, -1) + exchange.getResponseBody.close() + } + }) + server.start() + + sparkSession.conf.set("spark.sedona.crs.url.base", s"http://localhost:$port") + sparkSession.conf.set("spark.sedona.crs.url.pathTemplate", "/epsg/{code}.json") + sparkSession.conf.set("spark.sedona.crs.url.format", "proj") + try { + val result = sparkSession + .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:990001')") + .first() + .getAs[Geometry](0) + + assertNotNull("Transform to fake EPSG:990001 should succeed via URL provider", result) + assertEquals(990001, result.getSRID) + // Standard Web Mercator: x = -13627665.27, y = 4547675.35 + // Our weird definition adds +x_0=10000000, +y_0=20000000 + assertEquals(-3627665.27, result.getCoordinate.x, COORD_TOLERANCE) + assertEquals(24547675.35, result.getCoordinate.y, COORD_TOLERANCE) + assertTrue("Local HTTP server should have been hit", requestCount.get() > 0) + } finally { + server.stop(0) + sparkSession.conf.set("spark.sedona.crs.url.base", "") + org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs") + } + } + } }
