This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new e2db567e88 [GH-2657] Upgrade proj4sedona to 0.0.4 and adopt
UrlCRSProvider (#2658)
e2db567e88 is described below
commit e2db567e886b3bc777f8524731a19042e1e4dc58
Author: Jia Yu <[email protected]>
AuthorDate: Wed Feb 18 15:50:27 2026 -0700
[GH-2657] Upgrade proj4sedona to 0.0.4 and adopt UrlCRSProvider (#2658)
---
.../org/apache/sedona/common/FunctionsProj4.java | 103 ++++++++++
.../apache/sedona/common/FunctionsProj4Test.java | 219 +++++++++++++++++++++
docs/api/sql/CRS-Transformation.md | 166 ++++++++++++++++
docs/api/sql/Parameter.md | 19 ++
pom.xml | 2 +-
.../org/apache/sedona/core/utils/SedonaConf.java | 45 +++++
.../sql/sedona_sql/expressions/Functions.scala | 49 +++--
.../apache/sedona/core/utils/SedonaConfTest.java | 53 +++++
.../apache/sedona/sql/CRSTransformProj4Test.scala | 121 ++++++++++++
9 files changed, 765 insertions(+), 12 deletions(-)
diff --git a/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java
b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java
index b5a5c1c43e..8b40d93b6a 100644
--- a/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java
+++ b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java
@@ -18,9 +18,14 @@
*/
package org.apache.sedona.common;
+import java.util.Locale;
+import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.datasyslab.proj4sedona.core.Proj;
+import org.datasyslab.proj4sedona.defs.CRSResult;
+import org.datasyslab.proj4sedona.defs.Defs;
+import org.datasyslab.proj4sedona.defs.UrlCRSProvider;
import org.datasyslab.proj4sedona.jts.JTSGeometryTransformer;
import org.datasyslab.proj4sedona.parser.CRSSerializer;
import org.locationtech.jts.geom.Geometry;
@@ -62,6 +67,104 @@ public class FunctionsProj4 {
private static final Pattern EPSG_PATTERN =
Pattern.compile("^EPSG:(\\d+)$", Pattern.CASE_INSENSITIVE);
+ /** Name used for the registered URL CRS provider. */
+ private static final String URL_CRS_PROVIDER_NAME = "sedona-url-crs";
+
+ /**
+ * Tracks the currently registered URL CRS provider config (baseUrl + "|" +
pathTemplate + "|" +
+ * format). Null means no provider registered yet. Uses AtomicReference for
thread-safe lazy
+ * initialization on executors.
+ */
+ private static final AtomicReference<String> registeredUrlCrsConfig = new
AtomicReference<>(null);
+
+ /**
+ * Reset the URL CRS provider state. Package-private for testing only.
Removes the provider from
+ * Defs and clears the cached config key.
+ */
+ static void resetUrlCrsProviderForTest() {
+ Defs.removeProvider(URL_CRS_PROVIDER_NAME);
+ registeredUrlCrsConfig.set(null);
+ }
+
+ /**
+ * Register a URL-based CRS provider with proj4sedona's Defs registry. This
provider will be
+ * consulted before the built-in provider when resolving EPSG codes.
+ *
+ * <p>This method is safe to call concurrently from multiple threads — it
uses double-checked
+ * locking so the fast path (already registered with the same config) is
lock-free, and the
+ * synchronized slow path executes at most once per JVM (or once per config
change).
+ *
+ * @param baseUrl The base URL of the CRS definition server
+ * @param pathTemplate The URL path template (e.g.,
"/{authority}/{code}.json")
+ * @param format The expected response format: "projjson", "proj", "wkt1",
or "wkt2"
+ */
+ public static void registerUrlCrsProvider(String baseUrl, String
pathTemplate, String format) {
+ if (baseUrl == null || baseUrl.isEmpty()) {
+ return;
+ }
+
+ // Canonicalize format to avoid unnecessary re-registration for equivalent
configs
+ String canonicalFormat =
parseCrsFormat(format).name().toLowerCase(Locale.ROOT);
+ String configKey = baseUrl + "|" + pathTemplate + "|" + canonicalFormat;
+
+ // Fast path (lock-free): already registered with the same config.
+ // This handles 99.999%+ of calls with just a volatile read +
String.equals().
+ if (configKey.equals(registeredUrlCrsConfig.get())) {
+ return;
+ }
+
+ // Slow path: synchronize to make the remove-register-set sequence atomic.
+ // Only the first thread per JVM (or per config change) enters this block.
+ synchronized (registeredUrlCrsConfig) {
+ // Re-check after acquiring lock — another thread may have registered
already
+ String current = registeredUrlCrsConfig.get();
+ if (configKey.equals(current)) {
+ return;
+ }
+
+ // Remove existing provider if config changed
+ if (current != null) {
+ Defs.removeProvider(URL_CRS_PROVIDER_NAME);
+ }
+
+ CRSResult.Format crsFormat = parseCrsFormat(format);
+
+ UrlCRSProvider provider =
+ UrlCRSProvider.builder(URL_CRS_PROVIDER_NAME)
+ .baseUrl(baseUrl)
+ .pathTemplate(pathTemplate)
+ .format(crsFormat)
+ .build();
+
+ // Priority 50: before built-in (100) and spatialreference.org (101)
+ Defs.registerProvider(provider, 50);
+ registeredUrlCrsConfig.set(configKey);
+ }
+ }
+
+ /**
+ * Parse the CRS format string from config to the CRSResult.Format enum.
+ *
+ * @param format Format string: "projjson", "proj", "wkt1", or "wkt2"
+ * @return The corresponding CRSResult.Format
+ */
+ private static CRSResult.Format parseCrsFormat(String format) {
+ if (format == null || format.isEmpty()) {
+ return CRSResult.Format.PROJJSON;
+ }
+ switch (format.toLowerCase(Locale.ROOT)) {
+ case "proj":
+ return CRSResult.Format.PROJ4;
+ case "wkt1":
+ return CRSResult.Format.WKT1;
+ case "wkt2":
+ return CRSResult.Format.WKT2;
+ case "projjson":
+ default:
+ return CRSResult.Format.PROJJSON;
+ }
+ }
+
/**
* Transform a geometry from the source CRS specified by the geometry's SRID
to the target CRS.
*
diff --git
a/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java
b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java
index 903bf2f9d3..2584f99ccb 100644
--- a/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java
+++ b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java
@@ -21,8 +21,18 @@ package org.apache.sedona.common;
import static org.junit.Assert.*;
import static org.junit.Assume.assumeTrue;
+import com.sun.net.httpserver.HttpServer;
+import java.net.InetSocketAddress;
+import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.atomic.AtomicInteger;
import org.junit.Test;
import org.locationtech.jts.geom.*;
import org.locationtech.jts.io.WKTReader;
@@ -573,4 +583,213 @@ public class FunctionsProj4Test extends TestBase {
assertEquals(original.getCoordinate().x, backToWgs84.getCoordinate().x,
1e-9);
assertEquals(original.getCoordinate().y, backToWgs84.getCoordinate().y,
1e-9);
}
+
+ // ==================== URL CRS Provider Registration Tests
====================
+
+ @Test
+ public void testRegisterUrlCrsProviderNoOpOnNullOrEmpty() {
+ // null and empty baseUrl should be no-ops, not throw
+ FunctionsProj4.registerUrlCrsProvider(null, "/epsg/{code}.json",
"projjson");
+ FunctionsProj4.registerUrlCrsProvider("", "/epsg/{code}.json", "projjson");
+ // No provider should have been registered
+ assertNull("No provider should be registered for null/empty baseUrl",
findUrlCrsProvider());
+ }
+
+ @Test
+ public void testRegisterUrlCrsProviderRegistersAndIsIdempotent() {
+ String testUrl = "https://test-crs-server.example.com";
+ try {
+ FunctionsProj4.registerUrlCrsProvider(testUrl, "/epsg/{code}.json",
"projjson");
+ assertNotNull("sedona-url-crs provider should be registered",
findUrlCrsProvider());
+ int countBefore = countProvidersByName("sedona-url-crs");
+
+ // Second call with same config — should not add a duplicate
+ FunctionsProj4.registerUrlCrsProvider(testUrl, "/epsg/{code}.json",
"projjson");
+ assertEquals(
+ "Provider should not be duplicated", countBefore,
countProvidersByName("sedona-url-crs"));
+ } finally {
+ FunctionsProj4.resetUrlCrsProviderForTest();
+ }
+ }
+
+ @Test
+ public void testRegisterUrlCrsProviderReRegistersOnConfigChange() {
+ try {
+ FunctionsProj4.registerUrlCrsProvider(
+ "https://server-a.example.com", "/epsg/{code}.json", "projjson");
+ assertEquals(
+ org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJJSON,
+ findUrlCrsProvider().getFormat());
+
+ // Change config — should re-register with new settings
+ FunctionsProj4.registerUrlCrsProvider(
+ "https://server-b.example.com", "/epsg/{code}.json", "wkt2");
+ assertEquals(
+ org.datasyslab.proj4sedona.defs.CRSResult.Format.WKT2,
findUrlCrsProvider().getFormat());
+ } finally {
+ FunctionsProj4.resetUrlCrsProviderForTest();
+ }
+ }
+
+ @Test
+ public void testParseCrsFormatAllMappings() {
+ // Verify all valid format strings map to the correct enum
+ Object[][] cases = {
+ {"projjson", org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJJSON},
+ {"proj", org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJ4},
+ {"wkt1", org.datasyslab.proj4sedona.defs.CRSResult.Format.WKT1},
+ {"wkt2", org.datasyslab.proj4sedona.defs.CRSResult.Format.WKT2},
+ };
+ for (Object[] c : cases) {
+ try {
+ FunctionsProj4.registerUrlCrsProvider(
+ "https://test.example.com", "/epsg/{code}", (String) c[0]);
+ assertEquals("Format '" + c[0] + "'", c[1],
findUrlCrsProvider().getFormat());
+ } finally {
+ FunctionsProj4.resetUrlCrsProviderForTest();
+ }
+ }
+ }
+
+ @Test
+ public void testParseCrsFormatDefaultsAndCaseInsensitive() {
+ // null, empty, unknown, and uppercase should all default to / map to
PROJJSON
+ String[] inputs = {null, "", "unknown-format", "PROJJSON", "ProjJson"};
+ for (String input : inputs) {
+ try {
+ FunctionsProj4.registerUrlCrsProvider("https://test.example.com",
"/epsg/{code}", input);
+ assertEquals(
+ "Format input '" + input + "' should resolve to PROJJSON",
+ org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJJSON,
+ findUrlCrsProvider().getFormat());
+ } finally {
+ // Use the test reset so registeredUrlCrsConfig is also cleared
+ FunctionsProj4.resetUrlCrsProviderForTest();
+ }
+ }
+ }
+
+ @Test
+ public void testTransformWithLocalUrlCrsProvider() throws Exception {
+ // Serve a deliberately wrong CRS definition for a fake EPSG code (990001)
+ // that no built-in provider knows. The definition is a Mercator projection
+ // with absurd false easting/northing (+x_0=10000000 +y_0=20000000).
+ // If the transform succeeds with these shifted coordinates, the URL
provider
+ // resolved the CRS. If it didn't work, the transform would fail entirely
+ // because no built-in provider knows EPSG:990001.
+ AtomicInteger requestCount = new AtomicInteger(0);
+ HttpServer server = HttpServer.create(new InetSocketAddress(0), 0);
+ int port = server.getAddress().getPort();
+
+ // Web Mercator with intentional 10M/20M false easting/northing
+ String weirdMercator =
+ "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0"
+ + " +x_0=10000000 +y_0=20000000 +k=1 +units=m +no_defs";
+
+ server.createContext(
+ "/epsg/",
+ exchange -> {
+ String path = exchange.getRequestURI().getPath();
+ if (path.contains("990001")) {
+ requestCount.incrementAndGet();
+ byte[] body = weirdMercator.getBytes(StandardCharsets.UTF_8);
+ exchange.sendResponseHeaders(200, body.length);
+ exchange.getResponseBody().write(body);
+ exchange.getResponseBody().close();
+ } else {
+ // 404 for everything else — built-in providers handle known codes
+ exchange.sendResponseHeaders(404, -1);
+ exchange.getResponseBody().close();
+ }
+ });
+ server.start();
+
+ try {
+ FunctionsProj4.registerUrlCrsProvider(
+ "http://localhost:" + port, "/epsg/{code}.json", "proj");
+
+ Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194,
37.7749));
+ Geometry result = FunctionsProj4.transform(point, "EPSG:4326",
"EPSG:990001");
+
+ assertNotNull("Transform to fake EPSG:990001 should succeed via URL
provider", result);
+ assertEquals(990001, result.getSRID());
+ // Standard Web Mercator: x = -13627665.27, y = 4547675.35
+ // Our weird definition adds +x_0=10000000, +y_0=20000000
+ assertEquals(-3627665.27, result.getCoordinate().x, 1.0);
+ assertEquals(24547675.35, result.getCoordinate().y, 1.0);
+ assertTrue("Local HTTP server should have been hit", requestCount.get()
> 0);
+ } finally {
+ server.stop(0);
+ FunctionsProj4.resetUrlCrsProviderForTest();
+ }
+ }
+
+ @Test
+ public void testRegisterUrlCrsProviderConcurrentThreadSafety() throws
Exception {
+ // Verify that concurrent calls to registerUrlCrsProvider do not produce
+ // duplicate providers or corrupt the registry. This exercises the
+ // synchronized double-checked locking path.
+ final int threadCount = 16;
+ final String testUrl = "https://concurrent-test.example.com";
+ final String pathTemplate = "/epsg/{code}.json";
+ final String format = "projjson";
+
+ ExecutorService pool = Executors.newFixedThreadPool(threadCount);
+ CyclicBarrier barrier = new CyclicBarrier(threadCount);
+
+ try {
+ List<Future<?>> futures = new ArrayList<>();
+ for (int i = 0; i < threadCount; i++) {
+ futures.add(
+ pool.submit(
+ () -> {
+ try {
+ // All threads wait at the barrier then race into
registration
+ barrier.await();
+ FunctionsProj4.registerUrlCrsProvider(testUrl,
pathTemplate, format);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }));
+ }
+
+ // Wait for all threads to complete and propagate any exceptions
+ for (Future<?> f : futures) {
+ f.get();
+ }
+
+ // After all concurrent registrations, there should be exactly 1 provider
+ assertEquals(
+ "Concurrent registration must produce exactly 1 provider",
+ 1,
+ countProvidersByName("sedona-url-crs"));
+ } finally {
+ pool.shutdown();
+ FunctionsProj4.resetUrlCrsProviderForTest();
+ }
+ }
+
+ // Helper: count providers with a given name
+ private int countProvidersByName(String name) {
+ int count = 0;
+ for (org.datasyslab.proj4sedona.defs.CRSProvider p :
+ org.datasyslab.proj4sedona.defs.Defs.getProviders()) {
+ if (name.equals(p.getName())) {
+ count++;
+ }
+ }
+ return count;
+ }
+
+ // Helper: find the registered UrlCRSProvider
+ private org.datasyslab.proj4sedona.defs.UrlCRSProvider findUrlCrsProvider() {
+ for (org.datasyslab.proj4sedona.defs.CRSProvider p :
+ org.datasyslab.proj4sedona.defs.Defs.getProviders()) {
+ if ("sedona-url-crs".equals(p.getName())
+ && p instanceof org.datasyslab.proj4sedona.defs.UrlCRSProvider) {
+ return (org.datasyslab.proj4sedona.defs.UrlCRSProvider) p;
+ }
+ }
+ return null;
+ }
}
diff --git a/docs/api/sql/CRS-Transformation.md
b/docs/api/sql/CRS-Transformation.md
index a61533aab5..24a15c98dc 100644
--- a/docs/api/sql/CRS-Transformation.md
+++ b/docs/api/sql/CRS-Transformation.md
@@ -200,6 +200,172 @@ SELECT ST_Transform(
) AS transformed_point
```
+## URL CRS Provider
+
+Since v1.9.0, Sedona supports resolving CRS definitions from a remote HTTP
server. This is useful when you need custom or internal CRS definitions that
are not included in the built-in database, or when you want to use your own CRS
definition service.
+
+When configured, the URL provider is consulted **before** the built-in CRS
database. If the URL provider returns a valid CRS definition, it is used
directly. If the URL returns a 404 or an error, Sedona falls back to the
built-in definitions.
+
+### Hosting CRS definitions
+
+You can host your custom CRS definitions on any HTTP-accessible location. Two
common approaches:
+
+- **GitHub repository**: Store CRS definition files in a public GitHub repo
and use the raw content URL. This is the easiest way to get started — no server
infrastructure required.
+- **Public S3 bucket**: Upload CRS definition files to an Amazon S3 bucket
with public read access and use the S3 static website URL or CloudFront
distribution.
+
+Each file should contain a single CRS definition in the format you specify via
`spark.sedona.crs.url.format` (PROJJSON, PROJ string, WKT1, or WKT2).
+
+### Configuration
+
+Set the following Spark configuration properties when creating your Sedona
session:
+
+```python
+config = (
+ SedonaContext.builder()
+ .config("spark.sedona.crs.url.base", "https://crs.example.com")
+ .config("spark.sedona.crs.url.pathTemplate", "/{authority}/{code}.json")
+ .config("spark.sedona.crs.url.format", "projjson")
+ .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+```
+
+With the default path template, resolving `EPSG:4326` will fetch:
+
+```
+https://crs.example.com/epsg/4326.json
+```
+
+Only `spark.sedona.crs.url.base` is required. The other two properties have
sensible defaults (`/{authority}/{code}.json` and `projjson`).
+
+### Supported response formats
+
+| Format value | Description | Content example |
+|-------------|-------------|----------------|
+| `projjson` | PROJJSON (default) | `{"type": "GeographicCRS", ...}` |
+| `proj` | PROJ string | `+proj=longlat +datum=WGS84 +no_defs` |
+| `wkt1` | OGC WKT1 | `GEOGCS["WGS 84", ...]` |
+| `wkt2` | ISO 19162 WKT2 | `GEOGCRS["WGS 84", ...]` |
+
+### Example: GitHub repository
+
+Suppose you have a GitHub repo `myorg/crs-definitions` with the following
structure:
+
+```
+crs-definitions/
+ epsg/
+ 990001.proj
+ 990002.proj
+```
+
+where `epsg/990001.proj` contains a PROJ string like:
+
+```
++proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1
+units=m +no_defs
+```
+
+Point Sedona to the raw GitHub content URL:
+
+```python
+config = (
+ SedonaContext.builder()
+ .config(
+ "spark.sedona.crs.url.base",
+ "https://raw.githubusercontent.com/myorg/crs-definitions/main",
+ )
+ .config("spark.sedona.crs.url.pathTemplate", "/epsg/{code}.proj")
+ .config("spark.sedona.crs.url.format", "proj")
+ .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# Resolves EPSG:990001 from:
+# https://raw.githubusercontent.com/myorg/crs-definitions/main/epsg/990001.proj
+sedona.sql("""
+ SELECT ST_Transform(
+ ST_GeomFromText('POINT(-122.4194 37.7749)'),
+ 'EPSG:4326',
+ 'EPSG:990001'
+ ) AS transformed_point
+""").show()
+```
+
+### Example: self-hosted CRS server
+
+```python
+config = (
+ SedonaContext.builder()
+ .config("spark.sedona.crs.url.base", "https://crs.mycompany.com")
+ .config("spark.sedona.crs.url.pathTemplate", "/epsg/{code}.proj")
+ .config("spark.sedona.crs.url.format", "proj")
+ .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# Now ST_Transform will try https://crs.mycompany.com/epsg/3857.proj
+# before falling back to built-in definitions
+sedona.sql("""
+ SELECT ST_Transform(
+ ST_GeomFromText('POINT(-122.4194 37.7749)'),
+ 'EPSG:4326',
+ 'EPSG:3857'
+ ) AS transformed_point
+""").show()
+```
+
+### Example: custom authority codes
+
+The URL provider is especially useful for custom or internal authority codes
that are not in any public database. With the default path template
`/{authority}/{code}.json`, the `{authority}` placeholder is replaced by the
authority name from the CRS string (lowercased):
+
+```python
+config = (
+ SedonaContext.builder()
+ .config("spark.sedona.crs.url.base", "https://crs.mycompany.com")
+ .config("spark.sedona.crs.url.format", "proj")
+ .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# Resolves MYORG:1001 from:
+# https://crs.mycompany.com/myorg/1001.json
+sedona.sql("""
+ SELECT ST_Transform(
+ ST_GeomFromText('POINT(-122.4194 37.7749)'),
+ 'EPSG:4326',
+ 'MYORG:1001'
+ ) AS transformed_point
+""").show()
+```
+
+### Example: using geometry SRID with URL provider
+
+If the geometry already has an SRID set (e.g., via `ST_SetSRID`), you can omit
the source CRS parameter. The source CRS is derived from the geometry's SRID as
an EPSG code:
+
+```python
+config = (
+ SedonaContext.builder()
+ .config("spark.sedona.crs.url.base", "https://crs.mycompany.com")
+ .config("spark.sedona.crs.url.format", "proj")
+ .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# The source CRS is taken from the geometry's SRID (4326 → EPSG:4326).
+# Only the target CRS string is needed.
+sedona.sql("""
+ SELECT ST_Transform(
+ ST_SetSRID(ST_GeomFromText('POINT(-122.4194 37.7749)'), 4326),
+ 'EPSG:3857'
+ ) AS transformed_point
+""").show()
+```
+
+### Disabling the URL provider
+
+To avoid enabling the URL provider, omit `spark.sedona.crs.url.base` or leave
it as an empty string (the default). Note that once a URL provider has been
registered in an executor JVM, it remains active for the lifetime of that JVM.
+
+See also: [Configuration parameters](Parameter.md#crs-transformation) for the
full list of URL CRS provider settings.
+
## Grid File Support
Grid files enable high-accuracy datum transformations, such as NAD27 to NAD83
or OSGB36 to ETRS89. Sedona supports loading grid files from multiple sources.
diff --git a/docs/api/sql/Parameter.md b/docs/api/sql/Parameter.md
index b0888211b8..eac2bd2975 100644
--- a/docs/api/sql/Parameter.md
+++ b/docs/api/sql/Parameter.md
@@ -111,3 +111,22 @@ If you set the same parameter through both `sedona` and
`spark.sedona` prefixes,
* raster: Use proj4sedona for vector transformations, GeoTools
for raster transformations
* all: Use GeoTools for all transformations (legacy behavior)
* Since: v1.9.0
+* spark.sedona.crs.url.base
+ * Base URL of a CRS definition server for resolving authority codes
(e.g., EPSG) via HTTP. When set, ST_Transform will consult this URL provider
before the built-in definitions.
+ * Default: (empty string — URL provider disabled)
+ * Example: `https://crs.example.com`
+ * Since: v1.9.0
+* spark.sedona.crs.url.pathTemplate
+ * URL path template appended to `spark.sedona.crs.url.base`. The
placeholders `{authority}` and `{code}` are replaced with the authority name
(e.g., `epsg`) and numeric code (e.g., `4326`) at runtime.
+ * Default: `/{authority}/{code}.json`
+ * Example: `/epsg/{code}.json` (for a server that only serves EPSG
codes)
+ * Since: v1.9.0
+* spark.sedona.crs.url.format
+ * The CRS definition format returned by the URL provider.
+ * Default: projjson
+ * Possible values:
+ * projjson: PROJJSON format
+ * proj: PROJ string format
+ * wkt1: OGC WKT1 format
+ * wkt2: ISO 19162 WKT2 format
+ * Since: v1.9.0
diff --git a/pom.xml b/pom.xml
index 05ca1cde9c..b8025399e6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -96,7 +96,7 @@
<scala-collection-compat.version>2.5.0</scala-collection-compat.version>
<geoglib.version>1.52</geoglib.version>
<caffeine.version>2.9.2</caffeine.version>
- <proj4sedona.version>0.0.3</proj4sedona.version>
+ <proj4sedona.version>0.0.4</proj4sedona.version>
<geotools.scope>provided</geotools.scope>
<!-- Because it's not in Maven central, make it provided by default -->
diff --git
a/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java
b/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java
index 44b2885815..1b15914f67 100644
--- a/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java
+++ b/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java
@@ -119,6 +119,11 @@ public class SedonaConf implements Serializable {
// Parameter for CRS transformation mode
private CRSTransformMode crsTransformMode;
+ // Parameters for URL-based CRS provider
+ private String crsUrlBase;
+ private String crsUrlPathTemplate;
+ private String crsUrlFormat;
+
public static SedonaConf fromActiveSession() {
return new SedonaConf(SparkSession.active().conf());
}
@@ -234,6 +239,14 @@ public class SedonaConf implements Serializable {
// - "all": Use GeoTools for all transformations (legacy behavior)
this.crsTransformMode =
CRSTransformMode.fromString(confGetter.get("spark.sedona.crs.geotools",
"raster"));
+
+ // URL-based CRS provider configuration
+ // When spark.sedona.crs.url.base is set, a UrlCRSProvider is registered
to resolve
+ // SRID definitions from the given HTTP(S) endpoint before falling back to
built-in defs.
+ this.crsUrlBase = confGetter.get("spark.sedona.crs.url.base", "");
+ this.crsUrlPathTemplate =
+ confGetter.get("spark.sedona.crs.url.pathTemplate",
"/{authority}/{code}.json");
+ this.crsUrlFormat = confGetter.get("spark.sedona.crs.url.format",
"projjson");
}
// Helper method to prioritize `sedona.*` over `spark.sedona.*`
@@ -342,4 +355,36 @@ public class SedonaConf implements Serializable {
public CRSTransformMode getCRSTransformMode() {
return crsTransformMode;
}
+
+ /**
+ * Get the base URL for the URL-based CRS provider. When non-empty, a {@code
UrlCRSProvider} is
+ * registered to resolve SRID definitions from this HTTP(S) endpoint.
+ *
+ * @return The base URL, or empty string if disabled
+ * @since 1.9.0
+ */
+ public String getCrsUrlBase() {
+ return crsUrlBase;
+ }
+
+ /**
+ * Get the path template for the URL-based CRS provider. Supports
placeholders: {@code
+ * {authority}} and {@code {code}}.
+ *
+ * @return The path template (default: "/{authority}/{code}.json")
+ * @since 1.9.0
+ */
+ public String getCrsUrlPathTemplate() {
+ return crsUrlPathTemplate;
+ }
+
+ /**
+ * Get the expected response format for the URL-based CRS provider.
+ *
+ * @return The format string: "projjson", "proj", "wkt1", or "wkt2"
(default: "projjson")
+ * @since 1.9.0
+ */
+ public String getCrsUrlFormat() {
+ return crsUrlFormat;
+ }
}
diff --git
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
index b5f85b8968..da470ef6ff 100644
---
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
+++
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
@@ -307,18 +307,28 @@ private[apache] case class ST_Centroid(inputExpressions:
Seq[Expression])
* @param inputExpressions
* @param useGeoTools
*/
-private[apache] case class ST_Transform(inputExpressions: Seq[Expression],
useGeoTools: Boolean)
+private[apache] case class ST_Transform(
+ inputExpressions: Seq[Expression],
+ useGeoTools: Boolean,
+ crsUrlBase: String,
+ crsUrlPathTemplate: String,
+ crsUrlFormat: String)
extends InferredExpression(
inferrableFunction4(FunctionsProj4.transform),
inferrableFunction3(FunctionsProj4.transform),
inferrableFunction2(FunctionsProj4.transform)) {
- def this(inputExpressions: Seq[Expression]) {
- // We decide whether to use GeoTools based on active session config.
- // SparkSession may not be available on executors, so we need to
- // construct ST_Transform on driver. useGeoTools will be passed down
- // to executors through object serialization/deserialization.
- this(inputExpressions, ST_Transform.useGeoTools())
+ private def this(
+ inputExpressions: Seq[Expression],
+ config: (Boolean, String, String, String)) = {
+ this(inputExpressions, config._1, config._2, config._3, config._4)
+ }
+
+ def this(inputExpressions: Seq[Expression]) = {
+ // Read all config from SedonaConf on the driver and pass to primary
constructor.
+ // SparkSession may not be available on executors, so config is captured
here
+ // and serialized to executors along with the expression node.
+ this(inputExpressions, ST_Transform.readConfig())
}
// Define proj4sedona function overloads (2, 3, 4-arg versions)
@@ -335,6 +345,13 @@ private[apache] case class ST_Transform(inputExpressions:
Seq[Expression], useGe
inferrableFunction2(FunctionsGeoTools.transform))
override lazy val f: InferrableFunction = {
+ // Register URL CRS provider on executor if configured (lazy, once per
JVM).
+ // This runs inside lazy val f so it only executes on executors during row
+ // evaluation, never on the driver during query planning.
+ if (crsUrlBase.nonEmpty) {
+ FunctionsProj4.registerUrlCrsProvider(crsUrlBase, crsUrlPathTemplate,
crsUrlFormat)
+ }
+
// Check config to decide between proj4sedona and GeoTools
// Note: 4-arg lenient parameter is ignored by proj4sedona
val candidateFunctions = if (useGeoTools) geoToolsFunctions else
proj4Functions
@@ -347,13 +364,23 @@ private[apache] case class ST_Transform(inputExpressions:
Seq[Expression], useGe
}
object ST_Transform {
- private def useGeoTools(): Boolean = {
+
+ /**
+ * Read all ST_Transform config from SedonaConf in one call. Defaults are
handled by SedonaConf
+ * itself. Returns safe fallbacks (proj4sedona, no URL provider) when no
active session exists.
+ */
+ private def readConfig(): (Boolean, String, String, String) = {
try {
- SedonaConf.fromActiveSession().getCRSTransformMode.useGeoToolsForVector()
+ val conf = SedonaConf.fromActiveSession()
+ (
+ conf.getCRSTransformMode.useGeoToolsForVector(),
+ conf.getCrsUrlBase,
+ conf.getCrsUrlPathTemplate,
+ conf.getCrsUrlFormat)
} catch {
case _: Exception =>
- // If no active session, fall back to default (proj4sedona)
- false
+ // No active session (e.g., during constant folding) — use safe
defaults
+ (false, "", "", "")
}
}
}
diff --git
a/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java
b/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java
index f90641f76a..0fb24e0235 100644
---
a/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java
+++
b/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java
@@ -60,4 +60,57 @@ public class SedonaConfTest {
// fromSparkEnv means we don't have access to default values so sometimes
we get null as input
assertEquals(0, SedonaConf.bytesFromString(null));
}
+
+ // ==================== URL CRS Provider Config Tests ====================
+
+ @Test
+ public void testCrsUrlBaseDefault() {
+ // Default should be empty string (disabled)
+ assertEquals("", SedonaConf.fromActiveSession().getCrsUrlBase());
+ }
+
+ @Test
+ public void testCrsUrlPathTemplateDefault() {
+ // Default should be "/{authority}/{code}.json"
+ assertEquals(
+ "/{authority}/{code}.json",
SedonaConf.fromActiveSession().getCrsUrlPathTemplate());
+ }
+
+ @Test
+ public void testCrsUrlFormatDefault() {
+ // Default should be "projjson"
+ assertEquals("projjson", SedonaConf.fromActiveSession().getCrsUrlFormat());
+ }
+
+ @Test
+ public void testCrsUrlBaseCustom() {
+ SparkSession.active().conf().set("spark.sedona.crs.url.base",
"https://cdn.proj.org");
+ try {
+ assertEquals("https://cdn.proj.org",
SedonaConf.fromActiveSession().getCrsUrlBase());
+ } finally {
+ SparkSession.active().conf().set("spark.sedona.crs.url.base", "");
+ }
+ }
+
+ @Test
+ public void testCrsUrlPathTemplateCustom() {
+ SparkSession.active().conf().set("spark.sedona.crs.url.pathTemplate",
"/{authority}/{code}");
+ try {
+ assertEquals("/{authority}/{code}",
SedonaConf.fromActiveSession().getCrsUrlPathTemplate());
+ } finally {
+ SparkSession.active()
+ .conf()
+ .set("spark.sedona.crs.url.pathTemplate",
"/{authority}/{code}.json");
+ }
+ }
+
+ @Test
+ public void testCrsUrlFormatCustom() {
+ SparkSession.active().conf().set("spark.sedona.crs.url.format", "wkt2");
+ try {
+ assertEquals("wkt2", SedonaConf.fromActiveSession().getCrsUrlFormat());
+ } finally {
+ SparkSession.active().conf().set("spark.sedona.crs.url.format",
"projjson");
+ }
+ }
}
diff --git
a/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala
b/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala
index 73b0ae55bc..1159ebc6ad 100644
---
a/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala
+++
b/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala
@@ -18,6 +18,10 @@
*/
package org.apache.sedona.sql
+import com.sun.net.httpserver.HttpServer
+import java.net.InetSocketAddress
+import java.nio.charset.StandardCharsets
+import java.util.concurrent.atomic.AtomicInteger
import org.apache.spark.sql.functions.lit
import org.apache.spark.sql.sedona_sql.expressions.st_functions._
import org.junit.Assert.{assertEquals, assertNotNull, assertTrue}
@@ -855,4 +859,121 @@ class CRSTransformProj4Test extends TestBaseScala {
assertEquals("All 40 points should transform successfully", 40,
successCount)
}
}
+
+ describe("URL CRS Provider config integration") {
+
+ it("should still transform correctly when URL provider is not configured")
{
+ // Verify default behavior (no URL provider) still works
+ sparkSession.conf.set("spark.sedona.crs.url.base", "")
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194
37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ }
+
+ it("should fall back to built-in when URL provider returns nothing") {
+ // Point to a non-existent server — provider will fail, should fall back
to built-in
+ sparkSession.conf.set("spark.sedona.crs.url.base", "http://127.0.0.1:1")
+ sparkSession.conf.set("spark.sedona.crs.url.pathTemplate",
"/epsg/{code}.json")
+ sparkSession.conf.set("spark.sedona.crs.url.format", "projjson")
+ try {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT
(-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ // Should succeed via built-in fallback
+ assertNotNull(result)
+ assertEquals(3857, result.getSRID)
+ assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ } finally {
+ sparkSession.conf.set("spark.sedona.crs.url.base", "")
+ org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs")
+ }
+ }
+
+ it("should register URL CRS provider when config is set") {
+ sparkSession.conf.set("spark.sedona.crs.url.base",
"https://test.example.com")
+ sparkSession.conf.set("spark.sedona.crs.url.pathTemplate",
"/epsg/{code}.json")
+ sparkSession.conf.set("spark.sedona.crs.url.format", "projjson")
+ try {
+ // Force a transform to trigger provider registration
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT
(-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull(result)
+
+ // Verify provider was registered
+ val providers = org.datasyslab.proj4sedona.defs.Defs.getProviders
+ val found = providers.stream().anyMatch(p => p.getName ==
"sedona-url-crs")
+ assertTrue("sedona-url-crs provider should be registered", found)
+ } finally {
+ sparkSession.conf.set("spark.sedona.crs.url.base", "")
+ org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs")
+ }
+ }
+
+ it("should transform using local HTTP URL CRS provider with custom CRS") {
+ // Serve a deliberately wrong CRS definition for fake EPSG:990001 that no
+ // built-in provider knows. Uses Mercator with absurd false
easting/northing.
+ // If the transform succeeds with shifted coordinates, the URL provider
was used.
+ // If the URL provider didn't work, the transform would fail entirely.
+ val requestCount = new AtomicInteger(0)
+ val server = HttpServer.create(new InetSocketAddress(0), 0)
+ val port = server.getAddress.getPort
+
+ // Web Mercator with intentional 10M/20M false easting/northing
+ val weirdMercator =
+ "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0" +
+ " +x_0=10000000 +y_0=20000000 +k=1 +units=m +no_defs"
+
+ server.createContext(
+ "/epsg/",
+ exchange => {
+ val path = exchange.getRequestURI.getPath
+ if (path.contains("990001")) {
+ requestCount.incrementAndGet()
+ val body = weirdMercator.getBytes(StandardCharsets.UTF_8)
+ exchange.sendResponseHeaders(200, body.length)
+ exchange.getResponseBody.write(body)
+ exchange.getResponseBody.close()
+ } else {
+ // 404 for everything else — built-in providers handle known codes
+ exchange.sendResponseHeaders(404, -1)
+ exchange.getResponseBody.close()
+ }
+ })
+ server.start()
+
+ sparkSession.conf.set("spark.sedona.crs.url.base",
s"http://localhost:$port")
+ sparkSession.conf.set("spark.sedona.crs.url.pathTemplate",
"/epsg/{code}.json")
+ sparkSession.conf.set("spark.sedona.crs.url.format", "proj")
+ try {
+ val result = sparkSession
+ .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT
(-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:990001')")
+ .first()
+ .getAs[Geometry](0)
+
+ assertNotNull("Transform to fake EPSG:990001 should succeed via URL
provider", result)
+ assertEquals(990001, result.getSRID)
+ // Standard Web Mercator: x = -13627665.27, y = 4547675.35
+ // Our weird definition adds +x_0=10000000, +y_0=20000000
+ assertEquals(-3627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+ assertEquals(24547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+ assertTrue("Local HTTP server should have been hit",
requestCount.get() > 0)
+ } finally {
+ server.stop(0)
+ sparkSession.conf.set("spark.sedona.crs.url.base", "")
+ org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs")
+ }
+ }
+ }
}