This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch fix/upgrade-proj4sedona-url-crs-provider-2657 in repository https://gitbox.apache.org/repos/asf/sedona.git
commit f5754cb119a962ab97bb50b29a5e7f49e103058a Author: Jia Yu <[email protected]> AuthorDate: Wed Feb 18 00:53:36 2026 -0800 fix: address code review — thread-safe registration & executor-only path 1. Move URL CRS provider registration from ST_Transform class body into lazy val f, so it only executes on executors during row evaluation, never on the driver during query planning. 2. Wrap registerUrlCrsProvider's remove-register-set sequence in a synchronized block with double-checked locking. The fast path (already registered) is lock-free (volatile read + String.equals). 3. Add 16-thread concurrency test verifying no duplicate providers are registered under contention. --- .../org/apache/sedona/common/FunctionsProj4.java | 49 +++++++++++++-------- .../apache/sedona/common/FunctionsProj4Test.java | 51 ++++++++++++++++++++++ .../sql/sedona_sql/expressions/Functions.scala | 12 ++--- 3 files changed, 88 insertions(+), 24 deletions(-) diff --git a/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java index 78e0a94f53..fb7722a276 100644 --- a/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java +++ b/common/src/main/java/org/apache/sedona/common/FunctionsProj4.java @@ -81,8 +81,9 @@ public class FunctionsProj4 { * Register a URL-based CRS provider with proj4sedona's Defs registry. This provider will be * consulted before the built-in provider when resolving EPSG codes. * - * <p>This method is safe to call multiple times — it only registers once per JVM (or re-registers - * if the config changes). It is called lazily on executors before the first CRS transformation. + * <p>This method is safe to call concurrently from multiple threads — it uses double-checked + * locking so the fast path (already registered with the same config) is lock-free, and the + * synchronized slow path executes at most once per JVM (or once per config change). * * @param baseUrl The base URL of the CRS definition server * @param pathTemplate The URL path template (e.g., "/{authority}/{code}.json") @@ -94,30 +95,40 @@ public class FunctionsProj4 { } String configKey = baseUrl + "|" + pathTemplate + "|" + format; - String current = registeredUrlCrsConfig.get(); - if (configKey.equals(current)) { - // Already registered with the same config + // Fast path (lock-free): already registered with the same config. + // This handles 99.999%+ of calls with just a volatile read + String.equals(). + if (configKey.equals(registeredUrlCrsConfig.get())) { return; } - // Remove existing provider if config changed - if (current != null) { - Defs.removeProvider(URL_CRS_PROVIDER_NAME); - } + // Slow path: synchronize to make the remove-register-set sequence atomic. + // Only the first thread per JVM (or per config change) enters this block. + synchronized (registeredUrlCrsConfig) { + // Re-check after acquiring lock — another thread may have registered already + String current = registeredUrlCrsConfig.get(); + if (configKey.equals(current)) { + return; + } - CRSResult.Format crsFormat = parseCrsFormat(format); + // Remove existing provider if config changed + if (current != null) { + Defs.removeProvider(URL_CRS_PROVIDER_NAME); + } - UrlCRSProvider provider = - UrlCRSProvider.builder(URL_CRS_PROVIDER_NAME) - .baseUrl(baseUrl) - .pathTemplate(pathTemplate) - .format(crsFormat) - .build(); + CRSResult.Format crsFormat = parseCrsFormat(format); - // Priority 50: before built-in (100) and spatialreference.org (101) - Defs.registerProvider(provider, 50); - registeredUrlCrsConfig.set(configKey); + UrlCRSProvider provider = + UrlCRSProvider.builder(URL_CRS_PROVIDER_NAME) + .baseUrl(baseUrl) + .pathTemplate(pathTemplate) + .format(crsFormat) + .build(); + + // Priority 50: before built-in (100) and spatialreference.org (101) + Defs.registerProvider(provider, 50); + registeredUrlCrsConfig.set(configKey); + } } /** diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java index 7fbedd05b0..472264de15 100644 --- a/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java +++ b/common/src/test/java/org/apache/sedona/common/FunctionsProj4Test.java @@ -26,6 +26,12 @@ import java.net.InetSocketAddress; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; import org.junit.Test; import org.locationtech.jts.geom.*; @@ -717,6 +723,51 @@ public class FunctionsProj4Test extends TestBase { } } + @Test + public void testRegisterUrlCrsProviderConcurrentThreadSafety() throws Exception { + // Verify that concurrent calls to registerUrlCrsProvider do not produce + // duplicate providers or corrupt the registry. This exercises the + // synchronized double-checked locking path. + final int threadCount = 16; + final String testUrl = "https://concurrent-test.example.com"; + final String pathTemplate = "/epsg/{code}.json"; + final String format = "projjson"; + + ExecutorService pool = Executors.newFixedThreadPool(threadCount); + CyclicBarrier barrier = new CyclicBarrier(threadCount); + + try { + List<Future<?>> futures = new ArrayList<>(); + for (int i = 0; i < threadCount; i++) { + futures.add( + pool.submit( + () -> { + try { + // All threads wait at the barrier then race into registration + barrier.await(); + FunctionsProj4.registerUrlCrsProvider(testUrl, pathTemplate, format); + } catch (Exception e) { + throw new RuntimeException(e); + } + })); + } + + // Wait for all threads to complete and propagate any exceptions + for (Future<?> f : futures) { + f.get(); + } + + // After all concurrent registrations, there should be exactly 1 provider + assertEquals( + "Concurrent registration must produce exactly 1 provider", + 1, + countProvidersByName("sedona-url-crs")); + } finally { + pool.shutdown(); + org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs"); + } + } + // Helper: count providers with a given name private int countProvidersByName(String name) { int count = 0; diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala index 52d62e930d..da470ef6ff 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala @@ -331,11 +331,6 @@ private[apache] case class ST_Transform( this(inputExpressions, ST_Transform.readConfig()) } - // Register URL CRS provider on executor if configured (lazy, once per JVM) - if (crsUrlBase.nonEmpty) { - FunctionsProj4.registerUrlCrsProvider(crsUrlBase, crsUrlPathTemplate, crsUrlFormat) - } - // Define proj4sedona function overloads (2, 3, 4-arg versions) // Note: 4-arg version ignores the lenient parameter private lazy val proj4Functions: Seq[InferrableFunction] = Seq( @@ -350,6 +345,13 @@ private[apache] case class ST_Transform( inferrableFunction2(FunctionsGeoTools.transform)) override lazy val f: InferrableFunction = { + // Register URL CRS provider on executor if configured (lazy, once per JVM). + // This runs inside lazy val f so it only executes on executors during row + // evaluation, never on the driver during query planning. + if (crsUrlBase.nonEmpty) { + FunctionsProj4.registerUrlCrsProvider(crsUrlBase, crsUrlPathTemplate, crsUrlFormat) + } + // Check config to decide between proj4sedona and GeoTools // Note: 4-arg lenient parameter is ignored by proj4sedona val candidateFunctions = if (useGeoTools) geoToolsFunctions else proj4Functions
