This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch fix/upgrade-proj4sedona-url-crs-provider-2657
in repository https://gitbox.apache.org/repos/asf/sedona.git

commit f5754cb119a962ab97bb50b29a5e7f49e103058a
Author: Jia Yu <[email protected]>
AuthorDate: Wed Feb 18 00:53:36 2026 -0800

    fix: address code review — thread-safe registration & executor-only path
    
    1. Move URL CRS provider registration from ST_Transform class body into
       lazy val f, so it only executes on executors during row evaluation,
       never on the driver during query planning.
    
    2. Wrap registerUrlCrsProvider's remove-register-set sequence in a
       synchronized block with double-checked locking. The fast path
       (already registered) is lock-free (volatile read + String.equals).
    
    3. Add 16-thread concurrency test verifying no duplicate providers
       are registered under contention.
---
 .../org/apache/sedona/common/FunctionsProj4.java   | 49 +++++++++++++--------
 .../apache/sedona/common/FunctionsProj4Test.java   | 51 ++++++++++++++++++++++
 .../sql/sedona_sql/expressions/Functions.scala     | 12 ++---
 3 files changed, 88 insertions(+), 24 deletions(-)

diff --git a/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java 
b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java
index 78e0a94f53..fb7722a276 100644
--- a/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java
+++ b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java
@@ -81,8 +81,9 @@ public class FunctionsProj4 {
    * Register a URL-based CRS provider with proj4sedona's Defs registry. This 
provider will be
    * consulted before the built-in provider when resolving EPSG codes.
    *
-   * <p>This method is safe to call multiple times — it only registers once 
per JVM (or re-registers
-   * if the config changes). It is called lazily on executors before the first 
CRS transformation.
+   * <p>This method is safe to call concurrently from multiple threads — it 
uses double-checked
+   * locking so the fast path (already registered with the same config) is 
lock-free, and the
+   * synchronized slow path executes at most once per JVM (or once per config 
change).
    *
    * @param baseUrl The base URL of the CRS definition server
    * @param pathTemplate The URL path template (e.g., 
"/{authority}/{code}.json")
@@ -94,30 +95,40 @@ public class FunctionsProj4 {
     }
 
     String configKey = baseUrl + "|" + pathTemplate + "|" + format;
-    String current = registeredUrlCrsConfig.get();
 
-    if (configKey.equals(current)) {
-      // Already registered with the same config
+    // Fast path (lock-free): already registered with the same config.
+    // This handles 99.999%+ of calls with just a volatile read + 
String.equals().
+    if (configKey.equals(registeredUrlCrsConfig.get())) {
       return;
     }
 
-    // Remove existing provider if config changed
-    if (current != null) {
-      Defs.removeProvider(URL_CRS_PROVIDER_NAME);
-    }
+    // Slow path: synchronize to make the remove-register-set sequence atomic.
+    // Only the first thread per JVM (or per config change) enters this block.
+    synchronized (registeredUrlCrsConfig) {
+      // Re-check after acquiring lock — another thread may have registered 
already
+      String current = registeredUrlCrsConfig.get();
+      if (configKey.equals(current)) {
+        return;
+      }
 
-    CRSResult.Format crsFormat = parseCrsFormat(format);
+      // Remove existing provider if config changed
+      if (current != null) {
+        Defs.removeProvider(URL_CRS_PROVIDER_NAME);
+      }
 
-    UrlCRSProvider provider =
-        UrlCRSProvider.builder(URL_CRS_PROVIDER_NAME)
-            .baseUrl(baseUrl)
-            .pathTemplate(pathTemplate)
-            .format(crsFormat)
-            .build();
+      CRSResult.Format crsFormat = parseCrsFormat(format);
 
-    // Priority 50: before built-in (100) and spatialreference.org (101)
-    Defs.registerProvider(provider, 50);
-    registeredUrlCrsConfig.set(configKey);
+      UrlCRSProvider provider =
+          UrlCRSProvider.builder(URL_CRS_PROVIDER_NAME)
+              .baseUrl(baseUrl)
+              .pathTemplate(pathTemplate)
+              .format(crsFormat)
+              .build();
+
+      // Priority 50: before built-in (100) and spatialreference.org (101)
+      Defs.registerProvider(provider, 50);
+      registeredUrlCrsConfig.set(configKey);
+    }
   }
 
   /**
diff --git 
a/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java 
b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java
index 7fbedd05b0..472264de15 100644
--- a/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java
+++ b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java
@@ -26,6 +26,12 @@ import java.net.InetSocketAddress;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.junit.Test;
 import org.locationtech.jts.geom.*;
@@ -717,6 +723,51 @@ public class FunctionsProj4Test extends TestBase {
     }
   }
 
+  @Test
+  public void testRegisterUrlCrsProviderConcurrentThreadSafety() throws 
Exception {
+    // Verify that concurrent calls to registerUrlCrsProvider do not produce
+    // duplicate providers or corrupt the registry. This exercises the
+    // synchronized double-checked locking path.
+    final int threadCount = 16;
+    final String testUrl = "https://concurrent-test.example.com";;
+    final String pathTemplate = "/epsg/{code}.json";
+    final String format = "projjson";
+
+    ExecutorService pool = Executors.newFixedThreadPool(threadCount);
+    CyclicBarrier barrier = new CyclicBarrier(threadCount);
+
+    try {
+      List<Future<?>> futures = new ArrayList<>();
+      for (int i = 0; i < threadCount; i++) {
+        futures.add(
+            pool.submit(
+                () -> {
+                  try {
+                    // All threads wait at the barrier then race into 
registration
+                    barrier.await();
+                    FunctionsProj4.registerUrlCrsProvider(testUrl, 
pathTemplate, format);
+                  } catch (Exception e) {
+                    throw new RuntimeException(e);
+                  }
+                }));
+      }
+
+      // Wait for all threads to complete and propagate any exceptions
+      for (Future<?> f : futures) {
+        f.get();
+      }
+
+      // After all concurrent registrations, there should be exactly 1 provider
+      assertEquals(
+          "Concurrent registration must produce exactly 1 provider",
+          1,
+          countProvidersByName("sedona-url-crs"));
+    } finally {
+      pool.shutdown();
+      org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs");
+    }
+  }
+
   // Helper: count providers with a given name
   private int countProvidersByName(String name) {
     int count = 0;
diff --git 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
index 52d62e930d..da470ef6ff 100644
--- 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
+++ 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
@@ -331,11 +331,6 @@ private[apache] case class ST_Transform(
     this(inputExpressions, ST_Transform.readConfig())
   }
 
-  // Register URL CRS provider on executor if configured (lazy, once per JVM)
-  if (crsUrlBase.nonEmpty) {
-    FunctionsProj4.registerUrlCrsProvider(crsUrlBase, crsUrlPathTemplate, 
crsUrlFormat)
-  }
-
   // Define proj4sedona function overloads (2, 3, 4-arg versions)
   // Note: 4-arg version ignores the lenient parameter
   private lazy val proj4Functions: Seq[InferrableFunction] = Seq(
@@ -350,6 +345,13 @@ private[apache] case class ST_Transform(
     inferrableFunction2(FunctionsGeoTools.transform))
 
   override lazy val f: InferrableFunction = {
+    // Register URL CRS provider on executor if configured (lazy, once per 
JVM).
+    // This runs inside lazy val f so it only executes on executors during row
+    // evaluation, never on the driver during query planning.
+    if (crsUrlBase.nonEmpty) {
+      FunctionsProj4.registerUrlCrsProvider(crsUrlBase, crsUrlPathTemplate, 
crsUrlFormat)
+    }
+
     // Check config to decide between proj4sedona and GeoTools
     // Note: 4-arg lenient parameter is ignored by proj4sedona
     val candidateFunctions = if (useGeoTools) geoToolsFunctions else 
proj4Functions

Reply via email to