This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch proj4sedona in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 987170b9c795f2a0534ca7d578f3fe50a2293533 Author: Jia Yu <[email protected]> AuthorDate: Wed Jan 28 04:37:46 2026 -0700 fix: add a ST_Transform performance benchmark on cache, update docs (#583) This pull request introduces a new performance test suite for CRS (Coordinate Reference System) transformation using Proj4sedona, and updates the documentation for the `ST_Transform` function in both Flink and Snowflake APIs. The main focus is on clarifying supported CRS formats, grid file usage, and simplifying documentation to reflect the latest capabilities of Sedona. **Testing enhancements:** - **New Proj4 Performance Test Suite:** - Added `FunctionsProj4PerformanceTest.java`, which benchmarks Proj4sedona CRS transformation performance, cache effects, and compares it with GeoTools. It covers scenarios like built-in and remote EPSG codes, PROJ/WKT strings, and grid file usage (both local and remote). **Documentation improvements:** - **Flink API Documentation (`Function.md`):** - Updated to clarify that Sedona now supports multiple CRS formats (EPSG, WKT1/2, PROJ strings, PROJJSON) and grid files for high-accuracy transformations. - Removed outdated explanations about lon/lat order handling, deprecated optional parameters, and lengthy WKT examples. - Added a tip directing users to the Spark SQL documentation for comprehensive CRS transformation details. - Simplified the function signature and removed the deprecated `lenientMode` parameter from examples. - **Snowflake API Documentation (`Function.md`):** - Updated to reflect support for multiple CRS formats and grid files, and direct users to the Spark SQL documentation for more details. - Simplified the function signature and removed references to deprecated parameters and redundant examples. These changes ensure that both the codebase and the documentation are up-to-date with Sedona’s latest CRS transformation features and best practices, making it easier for users and developers to understand and utilize these capabilities. --- .../common/FunctionsProj4PerformanceTest.java | 349 +++++++++++++++++++++ docs/api/flink/Function.md | 59 +--- docs/api/snowflake/vector-data/Function.md | 23 +- docs/api/sql/CRS-Transformation.md | 285 +++++++++++++++++ docs/api/sql/Function.md | 67 +--- mkdocs.yml | 1 + pom.xml | 2 +- 7 files changed, 651 insertions(+), 135 deletions(-) diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsProj4PerformanceTest.java b/common/src/test/java/org/apache/sedona/common/FunctionsProj4PerformanceTest.java new file mode 100644 index 0000000000..c9c3951fa4 --- /dev/null +++ b/common/src/test/java/org/apache/sedona/common/FunctionsProj4PerformanceTest.java @@ -0,0 +1,349 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common; + +import static org.junit.Assert.*; +import static org.junit.Assume.assumeTrue; + +import java.nio.file.Files; +import java.nio.file.Path; +import org.datasyslab.proj4sedona.Proj4; +import org.datasyslab.proj4sedona.defs.Defs; +import org.datasyslab.proj4sedona.grid.NadgridRegistry; +import org.junit.Test; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.geom.GeometryFactory; +import org.locationtech.jts.geom.Point; + +/** + * Performance tests for Proj4sedona CRS transformation. + * + * <p>These tests measure: + * + * <ol> + * <li>Proj4sedona vs GeoTools performance comparison + * <li>Cache effects in Proj4sedona: + * <ul> + * <li>2.1 Built-in EPSG codes + * <li>2.2 EPSG codes with remote fetching from spatialreference.org + * <li>2.3 PROJ and WKT strings + * <li>2.4 Grid files (local and remote) + * </ul> + * </ol> + * + * <p>Each test uses the pattern: 1 cold call (cache miss) + N warm calls (cache hits) + */ +public class FunctionsProj4PerformanceTest extends TestBase { + + private static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory(); + private static final int WARM_ITERATIONS = 10; + + // Test coordinates + private static final double SF_LON = -122.4194; + private static final double SF_LAT = 37.7749; + + // Remote grid file URL (OSTN15 from GitHub) + private static final String REMOTE_GRID_URL = + "https://raw.githubusercontent.com/jiayuasu/grid-files/main/us_os/OSTN15-NTv2/OSTN15_NTv2_ETRStoOSGB.gsb"; + + // ==================== Helper Methods ==================== + + private Point createTestPoint(double lon, double lat) { + return GEOMETRY_FACTORY.createPoint(new Coordinate(lon, lat)); + } + + private void printHeader(String title) { + System.out.println(); + System.out.println("=".repeat(70)); + System.out.println(title); + System.out.println("=".repeat(70)); + } + + private void printResult(String label, double coldMs, double warmAvgUs, int cacheEntries) { + double speedup = (coldMs * 1000) / warmAvgUs; + System.out.printf("Cold (1 call): %10.2f ms%n", coldMs); + System.out.printf("Warm (%d calls): %10.2f μs avg%n", WARM_ITERATIONS, warmAvgUs); + System.out.printf("Cache speedup: %10.0fx%n", speedup); + if (cacheEntries >= 0) { + System.out.printf("Proj cache entries: %10d%n", cacheEntries); + } + } + + // ==================== 1. Proj4sedona vs GeoTools ==================== + + @Test + public void testProj4VsGeoToolsEpsgPerformance() throws Exception { + printHeader("1. Proj4sedona vs GeoTools (EPSG:4326 -> EPSG:3857)"); + + Point point = createTestPoint(SF_LON, SF_LAT); + + // ===== Proj4sedona ===== + System.out.println("\nProj4sedona:"); + Proj4.clearCache(); + + // Cold call + long coldStart = System.nanoTime(); + Geometry proj4ColdResult = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + double proj4ColdMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + } + double proj4WarmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double proj4WarmAvgUs = (proj4WarmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("Proj4sedona", proj4ColdMs, proj4WarmAvgUs, Proj4.getCacheSize()); + assertNotNull(proj4ColdResult); + assertEquals(3857, proj4ColdResult.getSRID()); + + // ===== GeoTools ===== + System.out.println("\nGeoTools:"); + + // Cold call + coldStart = System.nanoTime(); + Geometry gtColdResult = FunctionsGeoTools.transform(point, "EPSG:4326", "EPSG:3857"); + double gtColdMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsGeoTools.transform(point, "EPSG:4326", "EPSG:3857"); + } + double gtWarmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double gtWarmAvgUs = (gtWarmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("GeoTools", gtColdMs, gtWarmAvgUs, -1); + assertNotNull(gtColdResult); + + // ===== Comparison ===== + double warmSpeedup = gtWarmAvgUs / proj4WarmAvgUs; + System.out.printf( + "%nComparison: Proj4sedona is %.1fx faster than GeoTools (warm)%n", warmSpeedup); + + // Verify both produce similar results + assertEquals( + proj4ColdResult.getCoordinate().x, + gtColdResult.getCoordinate().x, + 1.0); // 1 meter tolerance + assertEquals(proj4ColdResult.getCoordinate().y, gtColdResult.getCoordinate().y, 1.0); + } + + // ==================== 2.1 Cache Effect: Built-in EPSG ==================== + + @Test + public void testCacheEffectBuiltInEpsgCode() { + printHeader("2.1 Cache Effect: Built-in EPSG (EPSG:4326 -> EPSG:3857)"); + + Point point = createTestPoint(SF_LON, SF_LAT); + Proj4.clearCache(); + + // Cold call + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, "EPSG:4326", "EPSG:3857"); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("Built-in EPSG", coldMs, warmAvgUs, Proj4.getCacheSize()); + assertNotNull(coldResult); + assertEquals(3857, coldResult.getSRID()); + } + + // ==================== 2.2 Cache Effect: Remote Fetch EPSG ==================== + + @Test + public void testCacheEffectRemoteFetchEpsgCode() { + printHeader("2.2 Cache Effect: Remote Fetch EPSG (EPSG:2154 - French Lambert)"); + + // EPSG:2154 (RGF93 / Lambert-93) is NOT in the built-in list + // It requires fetching from spatialreference.org + + Point point = createTestPoint(2.3522, 48.8566); // Paris coordinates + Proj4.clearCache(); + Defs.reset(); // Clear fetched definitions + + try { + // Cold call (network fetch) + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, "EPSG:4326", "EPSG:2154"); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, "EPSG:4326", "EPSG:2154"); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("Remote Fetch EPSG", coldMs, warmAvgUs, Proj4.getCacheSize()); + System.out.printf("Note: Cold time includes network fetch from spatialreference.org%n"); + assertNotNull(coldResult); + assertEquals(2154, coldResult.getSRID()); + } catch (Exception e) { + System.out.println("Skipped: Network fetch failed - " + e.getMessage()); + // Don't fail the test if network is unavailable + } + } + + // ==================== 2.3 Cache Effect: PROJ String ==================== + + @Test + public void testCacheEffectProjString() { + printHeader("2.3 Cache Effect: PROJ String"); + + Point point = createTestPoint(SF_LON, SF_LAT); + String sourceCRS = "+proj=longlat +datum=WGS84 +no_defs"; + String targetCRS = + "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +no_defs"; + + Proj4.clearCache(); + + // Cold call + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, sourceCRS, targetCRS); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, sourceCRS, targetCRS); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("PROJ String", coldMs, warmAvgUs, Proj4.getCacheSize()); + assertNotNull(coldResult); + // Web Mercator coordinates + assertEquals(-13627665.27, coldResult.getCoordinate().x, 1.0); + assertEquals(4547675.35, coldResult.getCoordinate().y, 1.0); + } + + @Test + public void testCacheEffectWktString() { + printHeader("2.3 Cache Effect: WKT String"); + + Point point = createTestPoint(120, 60); + String sourceWkt = + "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433]]"; + String targetWkt = + "PROJCS[\"WGS 84 / UTM zone 51N\",GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433]],PROJECTION[\"Transverse_Mercator\"],PARAMETER[\"latitude_of_origin\",0],PARAMETER[\"central_meridian\",123],PARAMETER[\"scale_factor\",0.9996],PARAMETER[\"false_easting\",500000],PARAMETER[\"false_northing\",0],UNIT[\"metre\",1]]"; + + Proj4.clearCache(); + + // Cold call + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, sourceWkt, targetWkt); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, sourceWkt, targetWkt); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("WKT String", coldMs, warmAvgUs, Proj4.getCacheSize()); + assertNotNull(coldResult); + } + + // ==================== 2.4 Cache Effect: Grid Files ==================== + + @Test + public void testCacheEffectGridFileLocal() { + printHeader("2.4 Cache Effect: Grid File (local)"); + + Path gridPath = Path.of("src/test/resources/grids/ca_nrc_ntv2_0.tif").toAbsolutePath(); + assumeTrue("Grid file not found: " + gridPath, Files.exists(gridPath)); + + // Toronto coordinates for Canadian grid + Point point = createTestPoint(-79.3832, 43.6532); + String sourceCRS = "+proj=longlat +datum=NAD27 +nadgrids=" + gridPath + " +no_defs"; + String targetCRS = "EPSG:4326"; // WGS84 + + Proj4.clearCache(); + NadgridRegistry.clear(); + + // Cold call (loads grid file) + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, sourceCRS, targetCRS); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, sourceCRS, targetCRS); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("Grid File (local)", coldMs, warmAvgUs, Proj4.getCacheSize()); + System.out.printf("Grid cache entries: %10d%n", NadgridRegistry.size()); + System.out.printf("Note: Cold time includes loading grid file from disk%n"); + assertNotNull(coldResult); + assertTrue(NadgridRegistry.size() > 0); + } + + @Test + public void testCacheEffectGridFileRemote() { + printHeader("2.4 Cache Effect: Grid File (remote)"); + + // Use OSTN15 grid file from GitHub + Point point = createTestPoint(-0.1276, 51.5074); // London coordinates + String sourceCRS = "+proj=longlat +ellps=GRS80 +nadgrids=" + REMOTE_GRID_URL + " +no_defs"; + String targetCRS = "+proj=longlat +ellps=airy +no_defs"; + + Proj4.clearCache(); + NadgridRegistry.clear(); + + try { + // Cold call (downloads grid file) + long coldStart = System.nanoTime(); + Geometry coldResult = FunctionsProj4.transform(point, sourceCRS, targetCRS); + double coldMs = (System.nanoTime() - coldStart) / 1e6; + + // Warm calls + long warmStart = System.nanoTime(); + for (int i = 0; i < WARM_ITERATIONS; i++) { + FunctionsProj4.transform(point, sourceCRS, targetCRS); + } + double warmTotalMs = (System.nanoTime() - warmStart) / 1e6; + double warmAvgUs = (warmTotalMs * 1000) / WARM_ITERATIONS; + + printResult("Grid File (remote)", coldMs, warmAvgUs, Proj4.getCacheSize()); + System.out.printf("Grid cache entries: %10d%n", NadgridRegistry.size()); + System.out.printf("Note: Cold time includes downloading grid file (~15MB)%n"); + assertNotNull(coldResult); + assertTrue(NadgridRegistry.size() > 0); + } catch (Exception e) { + System.out.println("Skipped: Remote grid download failed - " + e.getMessage()); + // Don't fail the test if network is unavailable + } + } +} diff --git a/docs/api/flink/Function.md b/docs/api/flink/Function.md index 0e9f323852..58871582a1 100644 --- a/docs/api/flink/Function.md +++ b/docs/api/flink/Function.md @@ -4275,62 +4275,17 @@ MULTIPOLYGON (((-2 -3, -3 -3, -3 3, -2 3, -2 -3)), ((3 -3, 3 3, 4 3, 4 -3, 3 -3) Introduction: -Transform the Spatial Reference System / Coordinate Reference System of A, from SourceCRS to TargetCRS. For SourceCRS and TargetCRS, WKT format is also available since v1.3.1. +Transform the Spatial Reference System / Coordinate Reference System of A, from SourceCRS to TargetCRS. -Since `v1.9.0`, Sedona Flink uses the proj4sedona library for CRS transformations, which provides better performance and does not require the GeoTools dependency. +Since `v1.9.0`, Sedona supports multiple CRS formats including EPSG codes, WKT1, WKT2, PROJ strings, and PROJJSON. Grid files for high-accuracy datum transformations are also supported. -**Lon/Lat Order in the input geometry** - -If the input geometry is in lat/lon order, it might throw an error such as `too close to pole`, `latitude or longitude exceeded limits`, or give unexpected results. -You need to make sure that the input geometry is in lon/lat order. If the input geometry is in lat/lon order, you can use ==ST_FlipCoordinates== to swap X and Y. - -**Lon/Lat Order in the source and target CRS** - -Sedona will force the source and target CRS to be in lon/lat order. If the source CRS or target CRS is in lat/lon order, it will be swapped to lon/lat order. - -**CRS code** - -The CRS code is the code of the CRS in the official EPSG database (https://epsg.org/) in the format of `EPSG:XXXX`. A community tool [EPSG.io](https://epsg.io/) can help you quick identify a CRS code. For example, the code of WGS84 is `EPSG:4326`. - -**WKT format** - -You can also use OGC WKT v1 format to specify the source CRS and target CRS. An example OGC WKT v1 CRS of `EPGS:3857` is as follows: - -``` -PROJCS["WGS 84 / Pseudo-Mercator", - GEOGCS["WGS 84", - DATUM["WGS_1984", - SPHEROID["WGS 84",6378137,298.257223563, - AUTHORITY["EPSG","7030"]], - AUTHORITY["EPSG","6326"]], - PRIMEM["Greenwich",0, - AUTHORITY["EPSG","8901"]], - UNIT["degree",0.0174532925199433, - AUTHORITY["EPSG","9122"]], - AUTHORITY["EPSG","4326"]], - PROJECTION["Mercator_1SP"], - PARAMETER["central_meridian",0], - PARAMETER["scale_factor",1], - PARAMETER["false_easting",0], - PARAMETER["false_northing",0], - UNIT["metre",1, - AUTHORITY["EPSG","9001"]], - AXIS["Easting",EAST], - AXIS["Northing",NORTH], - EXTENSION["PROJ4","+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs"], - AUTHORITY["EPSG","3857"]] -``` - -!!!note - By default, this function uses lon/lat order since `v1.5.0`. Before, it used lat/lon order. - -!!!note - Since `v1.9.0`, the `lenient` parameter is accepted for API compatibility but is ignored. Transformations using proj4sedona are always strict. +!!!tip + For comprehensive details on supported CRS formats, grid file usage, and examples, see the Spark SQL [CRS Transformation](../sql/CRS-Transformation.md) documentation. Format: ``` -ST_Transform (A: Geometry, SourceCRS: String, TargetCRS: String, [Optional] lenientMode: Boolean) +ST_Transform (A: Geometry, SourceCRS: String, TargetCRS: String) ``` Since: `v1.2.0` @@ -4341,10 +4296,6 @@ Example: SELECT ST_AsText(ST_Transform(ST_GeomFromText('POLYGON((170 50,170 72,-130 72,-130 50,170 50))'),'EPSG:4326', 'EPSG:32649')) ``` -```sql -SELECT ST_AsText(ST_Transform(ST_GeomFromText('POLYGON((170 50,170 72,-130 72,-130 50,170 50))'),'EPSG:4326', 'EPSG:32649', false)) -``` - Output: ``` diff --git a/docs/api/snowflake/vector-data/Function.md b/docs/api/snowflake/vector-data/Function.md index 46c6559c4f..fdb09919fb 100644 --- a/docs/api/snowflake/vector-data/Function.md +++ b/docs/api/snowflake/vector-data/Function.md @@ -3388,35 +3388,24 @@ MULTIPOLYGON (((-2 -3, -3 -3, -3 3, -2 3, -2 -3)), ((3 -3, 3 3, 4 3, 4 -3, 3 -3) Introduction: Transform the Spatial Reference System / Coordinate Reference System of A, from SourceCRS to TargetCRS. -For SourceCRS and TargetCRS, WKT format is also available. -Since `v1.9.0`, Sedona Snowflake uses the proj4sedona library for CRS transformations, which provides better performance and broader CRS format support. +Since `v1.9.0`, Sedona supports multiple CRS formats including EPSG codes, WKT1, WKT2, PROJ strings, and PROJJSON. Grid files for high-accuracy datum transformations are also supported. -!!!note - By default, this function uses lat/lon order. You can use ==ST_FlipCoordinates== to swap X and Y. +!!!tip + For comprehensive details on supported CRS formats, grid file usage, and examples, see the Spark SQL [CRS Transformation](../../sql/CRS-Transformation.md) documentation. !!!note - Since `v1.9.0`, the optional boolean parameter is accepted for API compatibility but is ignored. Transformations using proj4sedona are always strict. + By default, this function uses lat/lon order. You can use ==ST_FlipCoordinates== to swap X and Y. -Format: `ST_Transform (A:geometry, SourceCRS:string, TargetCRS:string ,[Optional] lenient)` +Format: `ST_Transform (A:geometry, SourceCRS:string, TargetCRS:string)` -SQL example (simple): +SQL example: ```sql SELECT ST_Transform(polygondf.countyshape, 'epsg:4326','epsg:3857') FROM polygondf ``` -SQL example (with optional parameters): - -```sql -SELECT ST_Transform(polygondf.countyshape, 'epsg:4326','epsg:3857', false) -FROM polygondf -``` - -!!!note - The detailed EPSG information can be searched on [EPSG.io](https://epsg.io/). - ## ST_Translate Introduction: Returns the input geometry with its X, Y and Z coordinates (if present in the geometry) translated by deltaX, deltaY and deltaZ (if specified) diff --git a/docs/api/sql/CRS-Transformation.md b/docs/api/sql/CRS-Transformation.md new file mode 100644 index 0000000000..a61533aab5 --- /dev/null +++ b/docs/api/sql/CRS-Transformation.md @@ -0,0 +1,285 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + --> + +# CRS Transformation + +Sedona provides coordinate reference system (CRS) transformation through the `ST_Transform` function. Since v1.9.0, Sedona uses the proj4sedona library, a pure Java implementation that supports multiple CRS input formats and grid-based transformations. + +## Supported CRS Formats + +Sedona supports the following formats for specifying source and target coordinate reference systems: + +### Authority Code + +The most common way to specify a CRS is using an authority code in the format `AUTHORITY:CODE`. Sedona uses [spatialreference.org](https://spatialreference.org/projjson_index.json) as an open-source CRS database, which supports multiple authorities: + +| Authority | Description | Example | +|-----------|-------------|---------| +| EPSG | European Petroleum Survey Group | `EPSG:4326`, `EPSG:3857` | +| ESRI | Esri coordinate systems | `ESRI:102008`, `ESRI:54012` | +| IAU | International Astronomical Union (planetary CRS) | `IAU:30100` | +| SR-ORG | User-contributed definitions | `SR-ORG:6864` | + +```sql +-- Transform from WGS84 (EPSG:4326) to Web Mercator (EPSG:3857) +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + 'EPSG:3857' +) AS transformed_point +``` + +Output: + +``` +POINT (-13627665.271218014 4548257.702387721) +``` + +```sql +-- Transform using ESRI authority code (North America Albers Equal Area Conic) +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + 'ESRI:102008' +) AS transformed_point +``` + +```sql +-- Transform from WGS84 to UTM Zone 10N (EPSG:32610) +SELECT ST_Transform( + ST_GeomFromText('POLYGON((-122.5 37.5, -122.5 38.0, -122.0 38.0, -122.0 37.5, -122.5 37.5))'), + 'EPSG:4326', + 'EPSG:32610' +) AS transformed_polygon +``` + +You can browse available CRS codes at [spatialreference.org](https://spatialreference.org/projjson_index.json) or [EPSG.io](https://epsg.io/). + +### WKT1 (OGC Well-Known Text) + +WKT1 is the OGC Well-Known Text format for CRS definitions. It starts with `PROJCS[...]` for projected CRS or `GEOGCS[...]` for geographic CRS. + +```sql +-- Transform using WKT1 format for target CRS +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + 'PROJCS["WGS 84 / Pseudo-Mercator", + GEOGCS["WGS 84", + DATUM["WGS_1984", + SPHEROID["WGS 84",6378137,298.257223563]], + PRIMEM["Greenwich",0], + UNIT["degree",0.0174532925199433]], + PROJECTION["Mercator_1SP"], + PARAMETER["central_meridian",0], + PARAMETER["scale_factor",1], + PARAMETER["false_easting",0], + PARAMETER["false_northing",0], + UNIT["metre",1], + AUTHORITY["EPSG","3857"]]' +) AS transformed_point +``` + +### WKT2 (ISO 19162:2019) + +WKT2 is the modern ISO 19162:2019 standard format. It starts with `PROJCRS[...]` for projected CRS or `GEOGCRS[...]` for geographic CRS. + +```sql +-- Transform using WKT2 format for target CRS +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + 'PROJCRS["WGS 84 / UTM zone 10N", + BASEGEOGCRS["WGS 84", + DATUM["World Geodetic System 1984", + ELLIPSOID["WGS 84",6378137,298.257223563]]], + CONVERSION["UTM zone 10N", + METHOD["Transverse Mercator"], + PARAMETER["Latitude of natural origin",0], + PARAMETER["Longitude of natural origin",-123], + PARAMETER["Scale factor at natural origin",0.9996], + PARAMETER["False easting",500000], + PARAMETER["False northing",0]], + CS[Cartesian,2], + AXIS["easting",east], + AXIS["northing",north], + UNIT["metre",1], + ID["EPSG",32610]]' +) AS transformed_point +``` + +### PROJ String + +PROJ strings provide a compact way to define CRS using projection parameters. They start with `+proj=`. + +```sql +-- Transform using PROJ string for UTM Zone 10N +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + '+proj=longlat +datum=WGS84 +no_defs', + '+proj=utm +zone=10 +datum=WGS84 +units=m +no_defs' +) AS transformed_point +``` + +```sql +-- Transform using PROJ string for Lambert Conformal Conic +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + '+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs' +) AS transformed_point +``` + +### PROJJSON + +PROJJSON is a JSON representation of CRS, useful when working with JSON-based workflows. + +```sql +-- Transform using PROJJSON for target CRS +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + 'EPSG:4326', + '{ + "type": "ProjectedCRS", + "name": "WGS 84 / UTM zone 10N", + "base_crs": { + "name": "WGS 84", + "datum": { + "type": "GeodeticReferenceFrame", + "name": "World Geodetic System 1984", + "ellipsoid": { + "name": "WGS 84", + "semi_major_axis": 6378137, + "inverse_flattening": 298.257223563 + } + }, + "coordinate_system": { + "subtype": "ellipsoidal", + "axis": [ + {"name": "Longitude", "abbreviation": "lon", "direction": "east", "unit": "degree"}, + {"name": "Latitude", "abbreviation": "lat", "direction": "north", "unit": "degree"} + ] + } + }, + "conversion": { + "name": "UTM zone 10N", + "method": {"name": "Transverse Mercator"}, + "parameters": [ + {"name": "Latitude of natural origin", "value": 0, "unit": "degree"}, + {"name": "Longitude of natural origin", "value": -123, "unit": "degree"}, + {"name": "Scale factor at natural origin", "value": 0.9996}, + {"name": "False easting", "value": 500000, "unit": "metre"}, + {"name": "False northing", "value": 0, "unit": "metre"} + ] + }, + "coordinate_system": { + "subtype": "Cartesian", + "axis": [ + {"name": "Easting", "abbreviation": "E", "direction": "east", "unit": "metre"}, + {"name": "Northing", "abbreviation": "N", "direction": "north", "unit": "metre"} + ] + }, + "id": {"authority": "EPSG", "code": 32610} + }' +) AS transformed_point +``` + +## Grid File Support + +Grid files enable high-accuracy datum transformations, such as NAD27 to NAD83 or OSGB36 to ETRS89. Sedona supports loading grid files from multiple sources. + +### Grid File Sources + +Grid files can be specified using the `+nadgrids` parameter in PROJ strings: + +| Source | Format | Example | +|--------|--------|---------| +| Local file | Absolute path | `+nadgrids=/path/to/grid.gsb` | +| PROJ CDN | `@` prefix | `+nadgrids=@us_noaa_conus.tif` | +| HTTPS URL | Full URL | `+nadgrids=https://cdn.proj.org/us_noaa_conus.tif` | + +When using the `@` prefix, grid files are automatically fetched from [PROJ CDN](https://cdn.proj.org/). + +### Optional vs Mandatory Grids + +- **`@` prefix (optional)**: The transformation continues without the grid if it's unavailable. Use this when the grid improves accuracy but isn't required. +- **No prefix (mandatory)**: An error is thrown if the grid file cannot be found. + +### SQL Examples with Grid Files + +```sql +-- Transform NAD27 to NAD83 using PROJ CDN grid (optional) +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + '+proj=longlat +datum=NAD27 +no_defs +nadgrids=@us_noaa_conus.tif', + 'EPSG:4269' +) AS transformed_point +``` + +```sql +-- Transform using mandatory grid file (error if not found) +SELECT ST_Transform( + ST_GeomFromText('POINT(-122.4194 37.7749)'), + '+proj=longlat +datum=NAD27 +no_defs +nadgrids=us_noaa_conus.tif', + 'EPSG:4269' +) AS transformed_point +``` + +```sql +-- Transform OSGB36 to ETRS89 using UK grid +SELECT ST_Transform( + ST_GeomFromText('POINT(-0.1276 51.5074)'), + '+proj=longlat +datum=OSGB36 +nadgrids=@uk_os_OSTN15_NTv2_OSGBtoETRS.gsb +no_defs', + 'EPSG:4258' +) AS transformed_point +``` + +## Coordinate Order + +Sedona expects geometries to be in **longitude/latitude (lon/lat)** order. If your data is in lat/lon order, use `ST_FlipCoordinates` to swap the coordinates before transformation. + +```sql +-- If your data is in lat/lon order, flip first +SELECT ST_Transform( + ST_FlipCoordinates(ST_GeomFromText('POINT(37.7749 -122.4194)')), + 'EPSG:4326', + 'EPSG:3857' +) AS transformed_point +``` + +Sedona automatically handles coordinate order in the CRS definition, ensuring the source and target CRS use lon/lat order internally. + +## Using Geometry SRID + +If the geometry already has an SRID set, you can omit the source CRS parameter: + +```sql +-- Set SRID on geometry and transform using only target CRS +SELECT ST_Transform( + ST_SetSRID(ST_GeomFromText('POINT(-122.4194 37.7749)'), 4326), + 'EPSG:3857' +) AS transformed_point +``` + +## See Also + +- [ST_Transform](Function.md#st_transform) - Function reference +- [ST_SetSRID](Function.md#st_setsrid) - Set the SRID of a geometry +- [ST_SRID](Function.md#st_srid) - Get the SRID of a geometry +- [ST_FlipCoordinates](Function.md#st_flipcoordinates) - Swap X and Y coordinates diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md index edb01fc9ba..31a8f3d682 100644 --- a/docs/api/sql/Function.md +++ b/docs/api/sql/Function.md @@ -4648,70 +4648,15 @@ MULTIPOLYGON (((-2 -3, -3 -3, -3 3, -2 3, -2 -3)), ((3 -3, 3 3, 4 3, 4 -3, 3 -3) Introduction: -Transform the Spatial Reference System / Coordinate Reference System of A, from SourceCRS to TargetCRS. For SourceCRS and TargetCRS, WKT format is also available since `v1.3.1`. Since `v1.5.1`, if the `SourceCRS` is not specified, CRS will be fetched from the geometry using [ST_SRID](#st_srid). +Transform the Spatial Reference System / Coordinate Reference System of A, from SourceCRS to TargetCRS. If the `SourceCRS` is not specified, CRS will be fetched from the geometry using [ST_SRID](#st_srid). -**CRS Transformation Backend** +Since `v1.9.0`, Sedona supports multiple CRS formats including EPSG codes, WKT1, WKT2, PROJ strings, and PROJJSON. Grid files for high-accuracy datum transformations are also supported. -Since `v1.9.0`, Sedona uses the proj4sedona library by default for vector CRS transformations, which provides better performance and does not require the GeoTools dependency. The backend can be configured using `spark.sedona.crs.geotools`: - -- `none`: Use proj4sedona for all transformations -- `raster` (default): Use proj4sedona for vector, GeoTools for raster -- `all`: Use GeoTools for all transformations (legacy behavior) - -**Lon/Lat Order in the input geometry** - -If the input geometry is in lat/lon order, it might throw an error such as `too close to pole`, `latitude or longitude exceeded limits`, or give unexpected results. -You need to make sure that the input geometry is in lon/lat order. If the input geometry is in lat/lon order, you can use ==ST_FlipCoordinates== to swap X and Y. - -**Lon/Lat Order in the source and target CRS** - -Sedona will make sure the source and target CRS to be in lon/lat order. If the source CRS or target CRS is in lat/lon order, these CRS will be swapped to lon/lat order. - -**CRS code** - -The CRS code is the code of the CRS in the official EPSG database (https://epsg.org/) in the format of `EPSG:XXXX`. A community tool [EPSG.io](https://epsg.io/) can help you quick identify a CRS code. For example, the code of WGS84 is `EPSG:4326`. - -**WKT format** - -You can also use OGC WKT v1 format to specify the source CRS and target CRS. An example OGC WKT v1 CRS of `EPGS:3857` is as follows: - -``` -PROJCS["WGS 84 / Pseudo-Mercator", - GEOGCS["WGS 84", - DATUM["WGS_1984", - SPHEROID["WGS 84",6378137,298.257223563, - AUTHORITY["EPSG","7030"]], - AUTHORITY["EPSG","6326"]], - PRIMEM["Greenwich",0, - AUTHORITY["EPSG","8901"]], - UNIT["degree",0.0174532925199433, - AUTHORITY["EPSG","9122"]], - AUTHORITY["EPSG","4326"]], - PROJECTION["Mercator_1SP"], - PARAMETER["central_meridian",0], - PARAMETER["scale_factor",1], - PARAMETER["false_easting",0], - PARAMETER["false_northing",0], - UNIT["metre",1, - AUTHORITY["EPSG","9001"]], - AXIS["Easting",EAST], - AXIS["Northing",NORTH], - EXTENSION["PROJ4","+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs"], - AUTHORITY["EPSG","3857"]] -``` - -!!!note - By default, this function uses lon/lat order since `v1.5.0`. Before, it used lat/lon order. - -!!!note - The `lenient` parameter controls error handling during transformation. When using the default proj4sedona backend (since `v1.9.0`), the lenient parameter is ignored and transformations are always strict. When using GeoTools (`spark.sedona.crs.geotools=all`), lenient mode (default: true) tries to fix issues automatically, while strict mode (false) throws an error if any issue is found. +!!!tip + For comprehensive details on supported CRS formats, grid file usage, and more examples, see [CRS Transformation](CRS-Transformation.md). Format: -``` -ST_Transform (A: Geometry, SourceCRS: String, TargetCRS: String, lenientMode: Boolean) -``` - ``` ST_Transform (A: Geometry, SourceCRS: String, TargetCRS: String) ``` @@ -4728,10 +4673,6 @@ SQL Example SELECT ST_AsText(ST_Transform(ST_GeomFromText('POLYGON((170 50,170 72,-130 72,-130 50,170 50))'),'EPSG:4326', 'EPSG:32649')) ``` -```sql -SELECT ST_AsText(ST_Transform(ST_GeomFromText('POLYGON((170 50,170 72,-130 72,-130 50,170 50))'),'EPSG:4326', 'EPSG:32649', false)) -``` - Output: ``` diff --git a/mkdocs.yml b/mkdocs.yml index 27246b7280..1e5d0a3e07 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -84,6 +84,7 @@ nav: - Aggregate function: api/sql/geography/AggregateFunction.md - DataFrame Style functions: api/sql/DataFrameAPI.md - Query optimization: api/sql/Optimizer.md + - CRS Transformation: api/sql/CRS-Transformation.md - Nearest-Neighbour searching: api/sql/NearestNeighbourSearching.md - 'Spider:Spatial Data Generator': api/sql/Spider.md - Reading Legacy Parquet Files: api/sql/Reading-legacy-parquet.md diff --git a/pom.xml b/pom.xml index fa8b3b8fa7..542c6c9465 100644 --- a/pom.xml +++ b/pom.xml @@ -96,7 +96,7 @@ <scala-collection-compat.version>2.5.0</scala-collection-compat.version> <geoglib.version>1.52</geoglib.version> <caffeine.version>2.9.2</caffeine.version> - <proj4sedona.version>0.0.2</proj4sedona.version> + <proj4sedona.version>0.0.3</proj4sedona.version> <geotools.scope>provided</geotools.scope> <!-- Because it's not in Maven central, make it provided by default -->
