This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new e2db567e88 [GH-2657] Upgrade proj4sedona to 0.0.4 and adopt 
UrlCRSProvider (#2658)
e2db567e88 is described below

commit e2db567e886b3bc777f8524731a19042e1e4dc58
Author: Jia Yu <[email protected]>
AuthorDate: Wed Feb 18 15:50:27 2026 -0700

    [GH-2657] Upgrade proj4sedona to 0.0.4 and adopt UrlCRSProvider (#2658)
---
 .../org/apache/sedona/common/FunctionsProj4.java   | 103 ++++++++++
 .../apache/sedona/common/FunctionsProj4Test.java   | 219 +++++++++++++++++++++
 docs/api/sql/CRS-Transformation.md                 | 166 ++++++++++++++++
 docs/api/sql/Parameter.md                          |  19 ++
 pom.xml                                            |   2 +-
 .../org/apache/sedona/core/utils/SedonaConf.java   |  45 +++++
 .../sql/sedona_sql/expressions/Functions.scala     |  49 +++--
 .../apache/sedona/core/utils/SedonaConfTest.java   |  53 +++++
 .../apache/sedona/sql/CRSTransformProj4Test.scala  | 121 ++++++++++++
 9 files changed, 765 insertions(+), 12 deletions(-)

diff --git a/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java 
b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java
index b5a5c1c43e..8b40d93b6a 100644
--- a/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java
+++ b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java
@@ -18,9 +18,14 @@
  */
 package org.apache.sedona.common;
 
+import java.util.Locale;
+import java.util.concurrent.atomic.AtomicReference;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.datasyslab.proj4sedona.core.Proj;
+import org.datasyslab.proj4sedona.defs.CRSResult;
+import org.datasyslab.proj4sedona.defs.Defs;
+import org.datasyslab.proj4sedona.defs.UrlCRSProvider;
 import org.datasyslab.proj4sedona.jts.JTSGeometryTransformer;
 import org.datasyslab.proj4sedona.parser.CRSSerializer;
 import org.locationtech.jts.geom.Geometry;
@@ -62,6 +67,104 @@ public class FunctionsProj4 {
   private static final Pattern EPSG_PATTERN =
       Pattern.compile("^EPSG:(\\d+)$", Pattern.CASE_INSENSITIVE);
 
+  /** Name used for the registered URL CRS provider. */
+  private static final String URL_CRS_PROVIDER_NAME = "sedona-url-crs";
+
+  /**
+   * Tracks the currently registered URL CRS provider config (baseUrl + "|" + 
pathTemplate + "|" +
+   * format). Null means no provider registered yet. Uses AtomicReference for 
thread-safe lazy
+   * initialization on executors.
+   */
+  private static final AtomicReference<String> registeredUrlCrsConfig = new 
AtomicReference<>(null);
+
+  /**
+   * Reset the URL CRS provider state. Package-private for testing only. 
Removes the provider from
+   * Defs and clears the cached config key.
+   */
+  static void resetUrlCrsProviderForTest() {
+    Defs.removeProvider(URL_CRS_PROVIDER_NAME);
+    registeredUrlCrsConfig.set(null);
+  }
+
+  /**
+   * Register a URL-based CRS provider with proj4sedona's Defs registry. This 
provider will be
+   * consulted before the built-in provider when resolving EPSG codes.
+   *
+   * <p>This method is safe to call concurrently from multiple threads — it 
uses double-checked
+   * locking so the fast path (already registered with the same config) is 
lock-free, and the
+   * synchronized slow path executes at most once per JVM (or once per config 
change).
+   *
+   * @param baseUrl The base URL of the CRS definition server
+   * @param pathTemplate The URL path template (e.g., 
"/{authority}/{code}.json")
+   * @param format The expected response format: "projjson", "proj", "wkt1", 
or "wkt2"
+   */
+  public static void registerUrlCrsProvider(String baseUrl, String 
pathTemplate, String format) {
+    if (baseUrl == null || baseUrl.isEmpty()) {
+      return;
+    }
+
+    // Canonicalize format to avoid unnecessary re-registration for equivalent 
configs
+    String canonicalFormat = 
parseCrsFormat(format).name().toLowerCase(Locale.ROOT);
+    String configKey = baseUrl + "|" + pathTemplate + "|" + canonicalFormat;
+
+    // Fast path (lock-free): already registered with the same config.
+    // This handles 99.999%+ of calls with just a volatile read + 
String.equals().
+    if (configKey.equals(registeredUrlCrsConfig.get())) {
+      return;
+    }
+
+    // Slow path: synchronize to make the remove-register-set sequence atomic.
+    // Only the first thread per JVM (or per config change) enters this block.
+    synchronized (registeredUrlCrsConfig) {
+      // Re-check after acquiring lock — another thread may have registered 
already
+      String current = registeredUrlCrsConfig.get();
+      if (configKey.equals(current)) {
+        return;
+      }
+
+      // Remove existing provider if config changed
+      if (current != null) {
+        Defs.removeProvider(URL_CRS_PROVIDER_NAME);
+      }
+
+      CRSResult.Format crsFormat = parseCrsFormat(format);
+
+      UrlCRSProvider provider =
+          UrlCRSProvider.builder(URL_CRS_PROVIDER_NAME)
+              .baseUrl(baseUrl)
+              .pathTemplate(pathTemplate)
+              .format(crsFormat)
+              .build();
+
+      // Priority 50: before built-in (100) and spatialreference.org (101)
+      Defs.registerProvider(provider, 50);
+      registeredUrlCrsConfig.set(configKey);
+    }
+  }
+
+  /**
+   * Parse the CRS format string from config to the CRSResult.Format enum.
+   *
+   * @param format Format string: "projjson", "proj", "wkt1", or "wkt2"
+   * @return The corresponding CRSResult.Format
+   */
+  private static CRSResult.Format parseCrsFormat(String format) {
+    if (format == null || format.isEmpty()) {
+      return CRSResult.Format.PROJJSON;
+    }
+    switch (format.toLowerCase(Locale.ROOT)) {
+      case "proj":
+        return CRSResult.Format.PROJ4;
+      case "wkt1":
+        return CRSResult.Format.WKT1;
+      case "wkt2":
+        return CRSResult.Format.WKT2;
+      case "projjson":
+      default:
+        return CRSResult.Format.PROJJSON;
+    }
+  }
+
   /**
    * Transform a geometry from the source CRS specified by the geometry's SRID 
to the target CRS.
    *
diff --git 
a/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java 
b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java
index 903bf2f9d3..2584f99ccb 100644
--- a/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java
+++ b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java
@@ -21,8 +21,18 @@ package org.apache.sedona.common;
 import static org.junit.Assert.*;
 import static org.junit.Assume.assumeTrue;
 
+import com.sun.net.httpserver.HttpServer;
+import java.net.InetSocketAddress;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.atomic.AtomicInteger;
 import org.junit.Test;
 import org.locationtech.jts.geom.*;
 import org.locationtech.jts.io.WKTReader;
@@ -573,4 +583,213 @@ public class FunctionsProj4Test extends TestBase {
     assertEquals(original.getCoordinate().x, backToWgs84.getCoordinate().x, 
1e-9);
     assertEquals(original.getCoordinate().y, backToWgs84.getCoordinate().y, 
1e-9);
   }
+
+  // ==================== URL CRS Provider Registration Tests 
====================
+
+  @Test
+  public void testRegisterUrlCrsProviderNoOpOnNullOrEmpty() {
+    // null and empty baseUrl should be no-ops, not throw
+    FunctionsProj4.registerUrlCrsProvider(null, "/epsg/{code}.json", 
"projjson");
+    FunctionsProj4.registerUrlCrsProvider("", "/epsg/{code}.json", "projjson");
+    // No provider should have been registered
+    assertNull("No provider should be registered for null/empty baseUrl", 
findUrlCrsProvider());
+  }
+
+  @Test
+  public void testRegisterUrlCrsProviderRegistersAndIsIdempotent() {
+    String testUrl = "https://test-crs-server.example.com";;
+    try {
+      FunctionsProj4.registerUrlCrsProvider(testUrl, "/epsg/{code}.json", 
"projjson");
+      assertNotNull("sedona-url-crs provider should be registered", 
findUrlCrsProvider());
+      int countBefore = countProvidersByName("sedona-url-crs");
+
+      // Second call with same config — should not add a duplicate
+      FunctionsProj4.registerUrlCrsProvider(testUrl, "/epsg/{code}.json", 
"projjson");
+      assertEquals(
+          "Provider should not be duplicated", countBefore, 
countProvidersByName("sedona-url-crs"));
+    } finally {
+      FunctionsProj4.resetUrlCrsProviderForTest();
+    }
+  }
+
+  @Test
+  public void testRegisterUrlCrsProviderReRegistersOnConfigChange() {
+    try {
+      FunctionsProj4.registerUrlCrsProvider(
+          "https://server-a.example.com";, "/epsg/{code}.json", "projjson");
+      assertEquals(
+          org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJJSON,
+          findUrlCrsProvider().getFormat());
+
+      // Change config — should re-register with new settings
+      FunctionsProj4.registerUrlCrsProvider(
+          "https://server-b.example.com";, "/epsg/{code}.json", "wkt2");
+      assertEquals(
+          org.datasyslab.proj4sedona.defs.CRSResult.Format.WKT2, 
findUrlCrsProvider().getFormat());
+    } finally {
+      FunctionsProj4.resetUrlCrsProviderForTest();
+    }
+  }
+
+  @Test
+  public void testParseCrsFormatAllMappings() {
+    // Verify all valid format strings map to the correct enum
+    Object[][] cases = {
+      {"projjson", org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJJSON},
+      {"proj", org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJ4},
+      {"wkt1", org.datasyslab.proj4sedona.defs.CRSResult.Format.WKT1},
+      {"wkt2", org.datasyslab.proj4sedona.defs.CRSResult.Format.WKT2},
+    };
+    for (Object[] c : cases) {
+      try {
+        FunctionsProj4.registerUrlCrsProvider(
+            "https://test.example.com";, "/epsg/{code}", (String) c[0]);
+        assertEquals("Format '" + c[0] + "'", c[1], 
findUrlCrsProvider().getFormat());
+      } finally {
+        FunctionsProj4.resetUrlCrsProviderForTest();
+      }
+    }
+  }
+
+  @Test
+  public void testParseCrsFormatDefaultsAndCaseInsensitive() {
+    // null, empty, unknown, and uppercase should all default to / map to 
PROJJSON
+    String[] inputs = {null, "", "unknown-format", "PROJJSON", "ProjJson"};
+    for (String input : inputs) {
+      try {
+        FunctionsProj4.registerUrlCrsProvider("https://test.example.com";, 
"/epsg/{code}", input);
+        assertEquals(
+            "Format input '" + input + "' should resolve to PROJJSON",
+            org.datasyslab.proj4sedona.defs.CRSResult.Format.PROJJSON,
+            findUrlCrsProvider().getFormat());
+      } finally {
+        // Use the test reset so registeredUrlCrsConfig is also cleared
+        FunctionsProj4.resetUrlCrsProviderForTest();
+      }
+    }
+  }
+
+  @Test
+  public void testTransformWithLocalUrlCrsProvider() throws Exception {
+    // Serve a deliberately wrong CRS definition for a fake EPSG code (990001)
+    // that no built-in provider knows. The definition is a Mercator projection
+    // with absurd false easting/northing (+x_0=10000000 +y_0=20000000).
+    // If the transform succeeds with these shifted coordinates, the URL 
provider
+    // resolved the CRS. If it didn't work, the transform would fail entirely
+    // because no built-in provider knows EPSG:990001.
+    AtomicInteger requestCount = new AtomicInteger(0);
+    HttpServer server = HttpServer.create(new InetSocketAddress(0), 0);
+    int port = server.getAddress().getPort();
+
+    // Web Mercator with intentional 10M/20M false easting/northing
+    String weirdMercator =
+        "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0"
+            + " +x_0=10000000 +y_0=20000000 +k=1 +units=m +no_defs";
+
+    server.createContext(
+        "/epsg/",
+        exchange -> {
+          String path = exchange.getRequestURI().getPath();
+          if (path.contains("990001")) {
+            requestCount.incrementAndGet();
+            byte[] body = weirdMercator.getBytes(StandardCharsets.UTF_8);
+            exchange.sendResponseHeaders(200, body.length);
+            exchange.getResponseBody().write(body);
+            exchange.getResponseBody().close();
+          } else {
+            // 404 for everything else — built-in providers handle known codes
+            exchange.sendResponseHeaders(404, -1);
+            exchange.getResponseBody().close();
+          }
+        });
+    server.start();
+
+    try {
+      FunctionsProj4.registerUrlCrsProvider(
+          "http://localhost:"; + port, "/epsg/{code}.json", "proj");
+
+      Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.4194, 
37.7749));
+      Geometry result = FunctionsProj4.transform(point, "EPSG:4326", 
"EPSG:990001");
+
+      assertNotNull("Transform to fake EPSG:990001 should succeed via URL 
provider", result);
+      assertEquals(990001, result.getSRID());
+      // Standard Web Mercator: x = -13627665.27, y = 4547675.35
+      // Our weird definition adds +x_0=10000000, +y_0=20000000
+      assertEquals(-3627665.27, result.getCoordinate().x, 1.0);
+      assertEquals(24547675.35, result.getCoordinate().y, 1.0);
+      assertTrue("Local HTTP server should have been hit", requestCount.get() 
> 0);
+    } finally {
+      server.stop(0);
+      FunctionsProj4.resetUrlCrsProviderForTest();
+    }
+  }
+
+  @Test
+  public void testRegisterUrlCrsProviderConcurrentThreadSafety() throws 
Exception {
+    // Verify that concurrent calls to registerUrlCrsProvider do not produce
+    // duplicate providers or corrupt the registry. This exercises the
+    // synchronized double-checked locking path.
+    final int threadCount = 16;
+    final String testUrl = "https://concurrent-test.example.com";;
+    final String pathTemplate = "/epsg/{code}.json";
+    final String format = "projjson";
+
+    ExecutorService pool = Executors.newFixedThreadPool(threadCount);
+    CyclicBarrier barrier = new CyclicBarrier(threadCount);
+
+    try {
+      List<Future<?>> futures = new ArrayList<>();
+      for (int i = 0; i < threadCount; i++) {
+        futures.add(
+            pool.submit(
+                () -> {
+                  try {
+                    // All threads wait at the barrier then race into 
registration
+                    barrier.await();
+                    FunctionsProj4.registerUrlCrsProvider(testUrl, 
pathTemplate, format);
+                  } catch (Exception e) {
+                    throw new RuntimeException(e);
+                  }
+                }));
+      }
+
+      // Wait for all threads to complete and propagate any exceptions
+      for (Future<?> f : futures) {
+        f.get();
+      }
+
+      // After all concurrent registrations, there should be exactly 1 provider
+      assertEquals(
+          "Concurrent registration must produce exactly 1 provider",
+          1,
+          countProvidersByName("sedona-url-crs"));
+    } finally {
+      pool.shutdown();
+      FunctionsProj4.resetUrlCrsProviderForTest();
+    }
+  }
+
+  // Helper: count providers with a given name
+  private int countProvidersByName(String name) {
+    int count = 0;
+    for (org.datasyslab.proj4sedona.defs.CRSProvider p :
+        org.datasyslab.proj4sedona.defs.Defs.getProviders()) {
+      if (name.equals(p.getName())) {
+        count++;
+      }
+    }
+    return count;
+  }
+
+  // Helper: find the registered UrlCRSProvider
+  private org.datasyslab.proj4sedona.defs.UrlCRSProvider findUrlCrsProvider() {
+    for (org.datasyslab.proj4sedona.defs.CRSProvider p :
+        org.datasyslab.proj4sedona.defs.Defs.getProviders()) {
+      if ("sedona-url-crs".equals(p.getName())
+          && p instanceof org.datasyslab.proj4sedona.defs.UrlCRSProvider) {
+        return (org.datasyslab.proj4sedona.defs.UrlCRSProvider) p;
+      }
+    }
+    return null;
+  }
 }
diff --git a/docs/api/sql/CRS-Transformation.md 
b/docs/api/sql/CRS-Transformation.md
index a61533aab5..24a15c98dc 100644
--- a/docs/api/sql/CRS-Transformation.md
+++ b/docs/api/sql/CRS-Transformation.md
@@ -200,6 +200,172 @@ SELECT ST_Transform(
 ) AS transformed_point
 ```
 
+## URL CRS Provider
+
+Since v1.9.0, Sedona supports resolving CRS definitions from a remote HTTP 
server. This is useful when you need custom or internal CRS definitions that 
are not included in the built-in database, or when you want to use your own CRS 
definition service.
+
+When configured, the URL provider is consulted **before** the built-in CRS 
database. If the URL provider returns a valid CRS definition, it is used 
directly. If the URL returns a 404 or an error, Sedona falls back to the 
built-in definitions.
+
+### Hosting CRS definitions
+
+You can host your custom CRS definitions on any HTTP-accessible location. Two 
common approaches:
+
+- **GitHub repository**: Store CRS definition files in a public GitHub repo 
and use the raw content URL. This is the easiest way to get started — no server 
infrastructure required.
+- **Public S3 bucket**: Upload CRS definition files to an Amazon S3 bucket 
with public read access and use the S3 static website URL or CloudFront 
distribution.
+
+Each file should contain a single CRS definition in the format you specify via 
`spark.sedona.crs.url.format` (PROJJSON, PROJ string, WKT1, or WKT2).
+
+### Configuration
+
+Set the following Spark configuration properties when creating your Sedona 
session:
+
+```python
+config = (
+    SedonaContext.builder()
+    .config("spark.sedona.crs.url.base", "https://crs.example.com";)
+    .config("spark.sedona.crs.url.pathTemplate", "/{authority}/{code}.json")
+    .config("spark.sedona.crs.url.format", "projjson")
+    .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+```
+
+With the default path template, resolving `EPSG:4326` will fetch:
+
+```
+https://crs.example.com/epsg/4326.json
+```
+
+Only `spark.sedona.crs.url.base` is required. The other two properties have 
sensible defaults (`/{authority}/{code}.json` and `projjson`).
+
+### Supported response formats
+
+| Format value | Description | Content example |
+|-------------|-------------|----------------|
+| `projjson` | PROJJSON (default) | `{"type": "GeographicCRS", ...}` |
+| `proj` | PROJ string | `+proj=longlat +datum=WGS84 +no_defs` |
+| `wkt1` | OGC WKT1 | `GEOGCS["WGS 84", ...]` |
+| `wkt2` | ISO 19162 WKT2 | `GEOGCRS["WGS 84", ...]` |
+
+### Example: GitHub repository
+
+Suppose you have a GitHub repo `myorg/crs-definitions` with the following 
structure:
+
+```
+crs-definitions/
+  epsg/
+    990001.proj
+    990002.proj
+```
+
+where `epsg/990001.proj` contains a PROJ string like:
+
+```
++proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 
+units=m +no_defs
+```
+
+Point Sedona to the raw GitHub content URL:
+
+```python
+config = (
+    SedonaContext.builder()
+    .config(
+        "spark.sedona.crs.url.base",
+        "https://raw.githubusercontent.com/myorg/crs-definitions/main";,
+    )
+    .config("spark.sedona.crs.url.pathTemplate", "/epsg/{code}.proj")
+    .config("spark.sedona.crs.url.format", "proj")
+    .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# Resolves EPSG:990001 from:
+# https://raw.githubusercontent.com/myorg/crs-definitions/main/epsg/990001.proj
+sedona.sql("""
+    SELECT ST_Transform(
+        ST_GeomFromText('POINT(-122.4194 37.7749)'),
+        'EPSG:4326',
+        'EPSG:990001'
+    ) AS transformed_point
+""").show()
+```
+
+### Example: self-hosted CRS server
+
+```python
+config = (
+    SedonaContext.builder()
+    .config("spark.sedona.crs.url.base", "https://crs.mycompany.com";)
+    .config("spark.sedona.crs.url.pathTemplate", "/epsg/{code}.proj")
+    .config("spark.sedona.crs.url.format", "proj")
+    .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# Now ST_Transform will try https://crs.mycompany.com/epsg/3857.proj
+# before falling back to built-in definitions
+sedona.sql("""
+    SELECT ST_Transform(
+        ST_GeomFromText('POINT(-122.4194 37.7749)'),
+        'EPSG:4326',
+        'EPSG:3857'
+    ) AS transformed_point
+""").show()
+```
+
+### Example: custom authority codes
+
+The URL provider is especially useful for custom or internal authority codes 
that are not in any public database. With the default path template 
`/{authority}/{code}.json`, the `{authority}` placeholder is replaced by the 
authority name from the CRS string (lowercased):
+
+```python
+config = (
+    SedonaContext.builder()
+    .config("spark.sedona.crs.url.base", "https://crs.mycompany.com";)
+    .config("spark.sedona.crs.url.format", "proj")
+    .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# Resolves MYORG:1001 from:
+# https://crs.mycompany.com/myorg/1001.json
+sedona.sql("""
+    SELECT ST_Transform(
+        ST_GeomFromText('POINT(-122.4194 37.7749)'),
+        'EPSG:4326',
+        'MYORG:1001'
+    ) AS transformed_point
+""").show()
+```
+
+### Example: using geometry SRID with URL provider
+
+If the geometry already has an SRID set (e.g., via `ST_SetSRID`), you can omit 
the source CRS parameter. The source CRS is derived from the geometry's SRID as 
an EPSG code:
+
+```python
+config = (
+    SedonaContext.builder()
+    .config("spark.sedona.crs.url.base", "https://crs.mycompany.com";)
+    .config("spark.sedona.crs.url.format", "proj")
+    .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# The source CRS is taken from the geometry's SRID (4326 → EPSG:4326).
+# Only the target CRS string is needed.
+sedona.sql("""
+    SELECT ST_Transform(
+        ST_SetSRID(ST_GeomFromText('POINT(-122.4194 37.7749)'), 4326),
+        'EPSG:3857'
+    ) AS transformed_point
+""").show()
+```
+
+### Disabling the URL provider
+
+To avoid enabling the URL provider, omit `spark.sedona.crs.url.base` or leave 
it as an empty string (the default). Note that once a URL provider has been 
registered in an executor JVM, it remains active for the lifetime of that JVM.
+
+See also: [Configuration parameters](Parameter.md#crs-transformation) for the 
full list of URL CRS provider settings.
+
 ## Grid File Support
 
 Grid files enable high-accuracy datum transformations, such as NAD27 to NAD83 
or OSGB36 to ETRS89. Sedona supports loading grid files from multiple sources.
diff --git a/docs/api/sql/Parameter.md b/docs/api/sql/Parameter.md
index b0888211b8..eac2bd2975 100644
--- a/docs/api/sql/Parameter.md
+++ b/docs/api/sql/Parameter.md
@@ -111,3 +111,22 @@ If you set the same parameter through both `sedona` and 
`spark.sedona` prefixes,
                * raster: Use proj4sedona for vector transformations, GeoTools 
for raster transformations
                * all: Use GeoTools for all transformations (legacy behavior)
        * Since: v1.9.0
+* spark.sedona.crs.url.base
+       * Base URL of a CRS definition server for resolving authority codes 
(e.g., EPSG) via HTTP. When set, ST_Transform will consult this URL provider 
before the built-in definitions.
+       * Default: (empty string — URL provider disabled)
+       * Example: `https://crs.example.com`
+       * Since: v1.9.0
+* spark.sedona.crs.url.pathTemplate
+       * URL path template appended to `spark.sedona.crs.url.base`. The 
placeholders `{authority}` and `{code}` are replaced with the authority name 
(e.g., `epsg`) and numeric code (e.g., `4326`) at runtime.
+       * Default: `/{authority}/{code}.json`
+       * Example: `/epsg/{code}.json` (for a server that only serves EPSG 
codes)
+       * Since: v1.9.0
+* spark.sedona.crs.url.format
+       * The CRS definition format returned by the URL provider.
+       * Default: projjson
+       * Possible values:
+               * projjson: PROJJSON format
+               * proj: PROJ string format
+               * wkt1: OGC WKT1 format
+               * wkt2: ISO 19162 WKT2 format
+       * Since: v1.9.0
diff --git a/pom.xml b/pom.xml
index 05ca1cde9c..b8025399e6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -96,7 +96,7 @@
         
<scala-collection-compat.version>2.5.0</scala-collection-compat.version>
         <geoglib.version>1.52</geoglib.version>
         <caffeine.version>2.9.2</caffeine.version>
-        <proj4sedona.version>0.0.3</proj4sedona.version>
+        <proj4sedona.version>0.0.4</proj4sedona.version>
 
         <geotools.scope>provided</geotools.scope>
         <!-- Because it's not in Maven central, make it provided by default -->
diff --git 
a/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java 
b/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java
index 44b2885815..1b15914f67 100644
--- a/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java
+++ b/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java
@@ -119,6 +119,11 @@ public class SedonaConf implements Serializable {
   // Parameter for CRS transformation mode
   private CRSTransformMode crsTransformMode;
 
+  // Parameters for URL-based CRS provider
+  private String crsUrlBase;
+  private String crsUrlPathTemplate;
+  private String crsUrlFormat;
+
   public static SedonaConf fromActiveSession() {
     return new SedonaConf(SparkSession.active().conf());
   }
@@ -234,6 +239,14 @@ public class SedonaConf implements Serializable {
     // - "all": Use GeoTools for all transformations (legacy behavior)
     this.crsTransformMode =
         
CRSTransformMode.fromString(confGetter.get("spark.sedona.crs.geotools", 
"raster"));
+
+    // URL-based CRS provider configuration
+    // When spark.sedona.crs.url.base is set, a UrlCRSProvider is registered 
to resolve
+    // SRID definitions from the given HTTP(S) endpoint before falling back to 
built-in defs.
+    this.crsUrlBase = confGetter.get("spark.sedona.crs.url.base", "");
+    this.crsUrlPathTemplate =
+        confGetter.get("spark.sedona.crs.url.pathTemplate", 
"/{authority}/{code}.json");
+    this.crsUrlFormat = confGetter.get("spark.sedona.crs.url.format", 
"projjson");
   }
 
   // Helper method to prioritize `sedona.*` over `spark.sedona.*`
@@ -342,4 +355,36 @@ public class SedonaConf implements Serializable {
   public CRSTransformMode getCRSTransformMode() {
     return crsTransformMode;
   }
+
+  /**
+   * Get the base URL for the URL-based CRS provider. When non-empty, a {@code 
UrlCRSProvider} is
+   * registered to resolve SRID definitions from this HTTP(S) endpoint.
+   *
+   * @return The base URL, or empty string if disabled
+   * @since 1.9.0
+   */
+  public String getCrsUrlBase() {
+    return crsUrlBase;
+  }
+
+  /**
+   * Get the path template for the URL-based CRS provider. Supports 
placeholders: {@code
+   * {authority}} and {@code {code}}.
+   *
+   * @return The path template (default: "/{authority}/{code}.json")
+   * @since 1.9.0
+   */
+  public String getCrsUrlPathTemplate() {
+    return crsUrlPathTemplate;
+  }
+
+  /**
+   * Get the expected response format for the URL-based CRS provider.
+   *
+   * @return The format string: "projjson", "proj", "wkt1", or "wkt2" 
(default: "projjson")
+   * @since 1.9.0
+   */
+  public String getCrsUrlFormat() {
+    return crsUrlFormat;
+  }
 }
diff --git 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
index b5f85b8968..da470ef6ff 100644
--- 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
+++ 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
@@ -307,18 +307,28 @@ private[apache] case class ST_Centroid(inputExpressions: 
Seq[Expression])
  * @param inputExpressions
  * @param useGeoTools
  */
-private[apache] case class ST_Transform(inputExpressions: Seq[Expression], 
useGeoTools: Boolean)
+private[apache] case class ST_Transform(
+    inputExpressions: Seq[Expression],
+    useGeoTools: Boolean,
+    crsUrlBase: String,
+    crsUrlPathTemplate: String,
+    crsUrlFormat: String)
     extends InferredExpression(
       inferrableFunction4(FunctionsProj4.transform),
       inferrableFunction3(FunctionsProj4.transform),
       inferrableFunction2(FunctionsProj4.transform)) {
 
-  def this(inputExpressions: Seq[Expression]) {
-    // We decide whether to use GeoTools based on active session config.
-    // SparkSession may not be available on executors, so we need to
-    // construct ST_Transform on driver. useGeoTools will be passed down
-    // to executors through object serialization/deserialization.
-    this(inputExpressions, ST_Transform.useGeoTools())
+  private def this(
+      inputExpressions: Seq[Expression],
+      config: (Boolean, String, String, String)) = {
+    this(inputExpressions, config._1, config._2, config._3, config._4)
+  }
+
+  def this(inputExpressions: Seq[Expression]) = {
+    // Read all config from SedonaConf on the driver and pass to primary 
constructor.
+    // SparkSession may not be available on executors, so config is captured 
here
+    // and serialized to executors along with the expression node.
+    this(inputExpressions, ST_Transform.readConfig())
   }
 
   // Define proj4sedona function overloads (2, 3, 4-arg versions)
@@ -335,6 +345,13 @@ private[apache] case class ST_Transform(inputExpressions: 
Seq[Expression], useGe
     inferrableFunction2(FunctionsGeoTools.transform))
 
   override lazy val f: InferrableFunction = {
+    // Register URL CRS provider on executor if configured (lazy, once per 
JVM).
+    // This runs inside lazy val f so it only executes on executors during row
+    // evaluation, never on the driver during query planning.
+    if (crsUrlBase.nonEmpty) {
+      FunctionsProj4.registerUrlCrsProvider(crsUrlBase, crsUrlPathTemplate, 
crsUrlFormat)
+    }
+
     // Check config to decide between proj4sedona and GeoTools
     // Note: 4-arg lenient parameter is ignored by proj4sedona
     val candidateFunctions = if (useGeoTools) geoToolsFunctions else 
proj4Functions
@@ -347,13 +364,23 @@ private[apache] case class ST_Transform(inputExpressions: 
Seq[Expression], useGe
 }
 
 object ST_Transform {
-  private def useGeoTools(): Boolean = {
+
+  /**
+   * Read all ST_Transform config from SedonaConf in one call. Defaults are 
handled by SedonaConf
+   * itself. Returns safe fallbacks (proj4sedona, no URL provider) when no 
active session exists.
+   */
+  private def readConfig(): (Boolean, String, String, String) = {
     try {
-      SedonaConf.fromActiveSession().getCRSTransformMode.useGeoToolsForVector()
+      val conf = SedonaConf.fromActiveSession()
+      (
+        conf.getCRSTransformMode.useGeoToolsForVector(),
+        conf.getCrsUrlBase,
+        conf.getCrsUrlPathTemplate,
+        conf.getCrsUrlFormat)
     } catch {
       case _: Exception =>
-        // If no active session, fall back to default (proj4sedona)
-        false
+        // No active session (e.g., during constant folding) — use safe 
defaults
+        (false, "", "", "")
     }
   }
 }
diff --git 
a/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java 
b/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java
index f90641f76a..0fb24e0235 100644
--- 
a/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java
+++ 
b/spark/common/src/test/java/org/apache/sedona/core/utils/SedonaConfTest.java
@@ -60,4 +60,57 @@ public class SedonaConfTest {
     // fromSparkEnv means we don't have access to default values so sometimes 
we get null as input
     assertEquals(0, SedonaConf.bytesFromString(null));
   }
+
+  // ==================== URL CRS Provider Config Tests ====================
+
+  @Test
+  public void testCrsUrlBaseDefault() {
+    // Default should be empty string (disabled)
+    assertEquals("", SedonaConf.fromActiveSession().getCrsUrlBase());
+  }
+
+  @Test
+  public void testCrsUrlPathTemplateDefault() {
+    // Default should be "/{authority}/{code}.json"
+    assertEquals(
+        "/{authority}/{code}.json", 
SedonaConf.fromActiveSession().getCrsUrlPathTemplate());
+  }
+
+  @Test
+  public void testCrsUrlFormatDefault() {
+    // Default should be "projjson"
+    assertEquals("projjson", SedonaConf.fromActiveSession().getCrsUrlFormat());
+  }
+
+  @Test
+  public void testCrsUrlBaseCustom() {
+    SparkSession.active().conf().set("spark.sedona.crs.url.base", 
"https://cdn.proj.org";);
+    try {
+      assertEquals("https://cdn.proj.org";, 
SedonaConf.fromActiveSession().getCrsUrlBase());
+    } finally {
+      SparkSession.active().conf().set("spark.sedona.crs.url.base", "");
+    }
+  }
+
+  @Test
+  public void testCrsUrlPathTemplateCustom() {
+    SparkSession.active().conf().set("spark.sedona.crs.url.pathTemplate", 
"/{authority}/{code}");
+    try {
+      assertEquals("/{authority}/{code}", 
SedonaConf.fromActiveSession().getCrsUrlPathTemplate());
+    } finally {
+      SparkSession.active()
+          .conf()
+          .set("spark.sedona.crs.url.pathTemplate", 
"/{authority}/{code}.json");
+    }
+  }
+
+  @Test
+  public void testCrsUrlFormatCustom() {
+    SparkSession.active().conf().set("spark.sedona.crs.url.format", "wkt2");
+    try {
+      assertEquals("wkt2", SedonaConf.fromActiveSession().getCrsUrlFormat());
+    } finally {
+      SparkSession.active().conf().set("spark.sedona.crs.url.format", 
"projjson");
+    }
+  }
 }
diff --git 
a/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala 
b/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala
index 73b0ae55bc..1159ebc6ad 100644
--- 
a/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala
+++ 
b/spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala
@@ -18,6 +18,10 @@
  */
 package org.apache.sedona.sql
 
+import com.sun.net.httpserver.HttpServer
+import java.net.InetSocketAddress
+import java.nio.charset.StandardCharsets
+import java.util.concurrent.atomic.AtomicInteger
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.sedona_sql.expressions.st_functions._
 import org.junit.Assert.{assertEquals, assertNotNull, assertTrue}
@@ -855,4 +859,121 @@ class CRSTransformProj4Test extends TestBaseScala {
       assertEquals("All 40 points should transform successfully", 40, 
successCount)
     }
   }
+
+  describe("URL CRS Provider config integration") {
+
+    it("should still transform correctly when URL provider is not configured") 
{
+      // Verify default behavior (no URL provider) still works
+      sparkSession.conf.set("spark.sedona.crs.url.base", "")
+      val result = sparkSession
+        .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194 
37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')")
+        .first()
+        .getAs[Geometry](0)
+
+      assertNotNull(result)
+      assertEquals(3857, result.getSRID)
+      assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+      assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+    }
+
+    it("should fall back to built-in when URL provider returns nothing") {
+      // Point to a non-existent server — provider will fail, should fall back 
to built-in
+      sparkSession.conf.set("spark.sedona.crs.url.base", "http://127.0.0.1:1";)
+      sparkSession.conf.set("spark.sedona.crs.url.pathTemplate", 
"/epsg/{code}.json")
+      sparkSession.conf.set("spark.sedona.crs.url.format", "projjson")
+      try {
+        val result = sparkSession
+          .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT 
(-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')")
+          .first()
+          .getAs[Geometry](0)
+
+        // Should succeed via built-in fallback
+        assertNotNull(result)
+        assertEquals(3857, result.getSRID)
+        assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+        assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+      } finally {
+        sparkSession.conf.set("spark.sedona.crs.url.base", "")
+        org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs")
+      }
+    }
+
+    it("should register URL CRS provider when config is set") {
+      sparkSession.conf.set("spark.sedona.crs.url.base", 
"https://test.example.com";)
+      sparkSession.conf.set("spark.sedona.crs.url.pathTemplate", 
"/epsg/{code}.json")
+      sparkSession.conf.set("spark.sedona.crs.url.format", "projjson")
+      try {
+        // Force a transform to trigger provider registration
+        val result = sparkSession
+          .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT 
(-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')")
+          .first()
+          .getAs[Geometry](0)
+
+        assertNotNull(result)
+
+        // Verify provider was registered
+        val providers = org.datasyslab.proj4sedona.defs.Defs.getProviders
+        val found = providers.stream().anyMatch(p => p.getName == 
"sedona-url-crs")
+        assertTrue("sedona-url-crs provider should be registered", found)
+      } finally {
+        sparkSession.conf.set("spark.sedona.crs.url.base", "")
+        org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs")
+      }
+    }
+
+    it("should transform using local HTTP URL CRS provider with custom CRS") {
+      // Serve a deliberately wrong CRS definition for fake EPSG:990001 that no
+      // built-in provider knows. Uses Mercator with absurd false 
easting/northing.
+      // If the transform succeeds with shifted coordinates, the URL provider 
was used.
+      // If the URL provider didn't work, the transform would fail entirely.
+      val requestCount = new AtomicInteger(0)
+      val server = HttpServer.create(new InetSocketAddress(0), 0)
+      val port = server.getAddress.getPort
+
+      // Web Mercator with intentional 10M/20M false easting/northing
+      val weirdMercator =
+        "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0" +
+          " +x_0=10000000 +y_0=20000000 +k=1 +units=m +no_defs"
+
+      server.createContext(
+        "/epsg/",
+        exchange => {
+          val path = exchange.getRequestURI.getPath
+          if (path.contains("990001")) {
+            requestCount.incrementAndGet()
+            val body = weirdMercator.getBytes(StandardCharsets.UTF_8)
+            exchange.sendResponseHeaders(200, body.length)
+            exchange.getResponseBody.write(body)
+            exchange.getResponseBody.close()
+          } else {
+            // 404 for everything else — built-in providers handle known codes
+            exchange.sendResponseHeaders(404, -1)
+            exchange.getResponseBody.close()
+          }
+        })
+      server.start()
+
+      sparkSession.conf.set("spark.sedona.crs.url.base", 
s"http://localhost:$port";)
+      sparkSession.conf.set("spark.sedona.crs.url.pathTemplate", 
"/epsg/{code}.json")
+      sparkSession.conf.set("spark.sedona.crs.url.format", "proj")
+      try {
+        val result = sparkSession
+          .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT 
(-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:990001')")
+          .first()
+          .getAs[Geometry](0)
+
+        assertNotNull("Transform to fake EPSG:990001 should succeed via URL 
provider", result)
+        assertEquals(990001, result.getSRID)
+        // Standard Web Mercator: x = -13627665.27, y = 4547675.35
+        // Our weird definition adds +x_0=10000000, +y_0=20000000
+        assertEquals(-3627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+        assertEquals(24547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+        assertTrue("Local HTTP server should have been hit", 
requestCount.get() > 0)
+      } finally {
+        server.stop(0)
+        sparkSession.conf.set("spark.sedona.crs.url.base", "")
+        org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs")
+      }
+    }
+  }
 }


Reply via email to