Copilot commented on code in PR #2677: URL: https://github.com/apache/sedona/pull/2677#discussion_r2868645636
########## docs/api/sql/Raster-Operators/RS_CRS.md: ########## @@ -0,0 +1,103 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + --> + +# RS_CRS + +Introduction: Returns the coordinate reference system (CRS) of a raster as a string in the specified format. If no format is specified, the CRS is returned in PROJJSON format. Returns null if the raster has no CRS defined. + +Format: + +``` +RS_CRS (raster: Raster) +``` + +``` +RS_CRS (raster: Raster, format: String) +``` + +Since: `v1.9.0` + +## Supported output formats + +| Format | Description | +| :--- | :--- | +| `'projjson'` | PROJJSON format (default). Modern, lossless, machine-readable JSON representation. | Review Comment: The docs describe `projjson` as "lossless" here, but the Limitations section below calls out datum name loss and other conversion issues for `projjson`. Consider rewording the `projjson` description to avoid claiming losslessness (or clarify what is/isn't preserved). ```suggestion | `'projjson'` | PROJJSON format (default). Modern, machine-readable JSON representation. | ``` ########## common/src/main/java/org/apache/sedona/common/raster/RasterAccessors.java: ########## @@ -359,4 +362,67 @@ public static RasterMetadata rasterMetadata(GridCoverage2D raster) throws Factor (int) meta[10], (int) meta[11]); } + + /** + * Returns the CRS of a raster as PROJJSON string. + * + * @param raster The input raster. + * @return The CRS definition as PROJJSON string, or null if no CRS is set. + */ + public static String crs(GridCoverage2D raster) { + return crs(raster, "projjson"); + } + + /** + * Returns the CRS of a raster in the specified format. + * + * @param raster The input raster. + * @param format The desired output format: "projjson", "wkt2", "wkt1", or "proj". + * @return The CRS definition string in the requested format, or null if no CRS is set. + */ + public static String crs(GridCoverage2D raster, String format) { + CoordinateReferenceSystem crsDef = raster.getCoordinateReferenceSystem(); + if (crsDef instanceof DefaultEngineeringCRS) { + if (((DefaultEngineeringCRS) crsDef).isWildcard()) { + return null; + } + } + + // Get WKT1 representation from GeoTools (native, no conversion needed) + String wkt1; + if (crsDef instanceof Formattable) { + wkt1 = ((Formattable) crsDef).toWKT(2, false); + } else { + wkt1 = crsDef.toWKT(); + } + + String fmt = format.toLowerCase(Locale.ROOT).trim(); + if ("wkt1".equals(fmt) || "wkt".equals(fmt)) { Review Comment: `crs(GridCoverage2D raster, String format)` can throw a `NullPointerException` when `format` is null because of `format.toLowerCase(...)`. Since this is a public API, consider validating `format` up front and throwing a clear `IllegalArgumentException` (or defaulting null/blank to the default format) to avoid surprising NPEs for Java/Scala callers. ########## common/src/main/java/org/apache/sedona/common/raster/RasterEditors.java: ########## @@ -102,7 +114,278 @@ public static GridCoverage2D setSrid(GridCoverage2D raster, int srid) { } else { crs = FunctionsGeoTools.sridToCRS(srid); } + return replaceCrs(raster, crs); + } + + /** + * Sets the CRS of a raster using a CRS string. Accepts EPSG codes (e.g. "EPSG:4326"), WKT1, WKT2, + * PROJ strings, and PROJJSON. + * + * @param raster The input raster. + * @param crsString The CRS definition string. + * @return The raster with the new CRS. + */ + public static GridCoverage2D setCrs(GridCoverage2D raster, String crsString) { + CoordinateReferenceSystem crs = parseCrsString(crsString); + return replaceCrs(raster, crs); + } + + /** + * Parse a CRS string in any supported format into a GeoTools CoordinateReferenceSystem. + * + * <p>Parsing priority: + * + * <ol> + * <li>GeoTools CRS.decode — handles authority codes like EPSG:4326 + * <li>GeoTools CRS.parseWKT — handles WKT1 strings + * <li>proj4sedona — handles WKT2, PROJ strings, PROJJSON. If an EPSG authority can be resolved, + * uses CRS.decode for a lossless result. Otherwise falls back to WKT1 conversion. + * </ol> + * + * @param crsString The CRS definition string. + * @return The parsed CoordinateReferenceSystem. + * @throws IllegalArgumentException if the CRS string cannot be parsed. + */ + static CoordinateReferenceSystem parseCrsString(String crsString) { + // Step 1: Try GeoTools CRS.decode (handles EPSG:xxxx, AUTO:xxxx, etc.) + try { + return CRS.decode(crsString, true); + } catch (FactoryException e) { + // Not an authority code, continue + } + + // Step 2: Try GeoTools CRS.parseWKT (handles WKT1) + try { + return CRS.parseWKT(crsString); + } catch (FactoryException e) { + // Not WKT1, continue + } + + // Step 3: Use proj4sedona (handles WKT2, PROJ, PROJJSON) + try { + Proj proj = new Proj(crsString); + + // Try to resolve to an EPSG authority code for a lossless result + String authority = proj.toEpsgCode(); + if (authority != null && !authority.isEmpty()) { + try { + return CRS.decode(authority, true); + } catch (FactoryException ex) { + // Authority code not recognized by GeoTools, fall through to WKT1 + } + } + + // Fallback: convert to WKT1 via proj4sedona and parse with GeoTools. + // proj4sedona may include parameters GeoTools doesn't expect (e.g. standard_parallel_1 + // for projections that don't use it). We handle this by trying several parse strategies: + // 1. Raw WKT1 (proj4sedona's projection names may already be recognized by GeoTools) + // 2. Normalized WKT1 (resolve projection names to canonical OGC names) + // 3. Strip unexpected parameters iteratively + String wkt1 = proj.toWkt1(); + if (wkt1 != null && !wkt1.isEmpty()) { + // Strategy 1: Try raw WKT1 directly + try { + return CRS.parseWKT(wkt1); + } catch (FactoryException ex) { + // Raw WKT1 failed, continue with normalization + } + + // Strategy 2: Try with normalized projection name + String normalizedWkt = normalizeWkt1ProjectionName(wkt1); + // Strategy 3: If parsing fails due to unexpected parameters, strip them iteratively. + // proj4sedona sometimes includes parameters like standard_parallel_1 for projections + // that don't use it. We parse the error message to identify and remove the offending + // parameter, then retry. + String currentWkt = normalizedWkt; + for (int attempt = 0; attempt < 5; attempt++) { + try { + return CRS.parseWKT(currentWkt); + } catch (FactoryException ex) { + String msg = ex.getMessage(); + if (msg != null) { + Matcher paramMatcher = UNEXPECTED_PARAM_PATTERN.matcher(msg); + if (paramMatcher.find()) { + currentWkt = stripWktParameter(currentWkt, paramMatcher.group(1)); + continue; + } + } + break; // Different kind of error, give up + } + } + } + } catch (Exception e) { + // proj4sedona could not parse it either + } + + throw new IllegalArgumentException( + "Cannot parse CRS string. Supported formats: EPSG code (e.g. 'EPSG:4326'), " + + "WKT1, WKT2, PROJ string, PROJJSON. Input: " + + crsString); + } + + // Fallback map for proj4sedona projection names that have no equivalent in GeoTools' + // alias database and cannot be resolved via normalized matching. These are proj4sedona-specific + // long-form alias names. Verified via exhaustive testing of all 58 proj4sedona registered names. + private static final Map<String, String> PROJECTION_NAME_FALLBACK; + + static { + Map<String, String> m = new HashMap<>(); + m.put("Lambert_Cylindrical_Equal_Area", "Cylindrical_Equal_Area"); + m.put("Extended_Transverse_Mercator", "Transverse_Mercator"); + m.put("Extended Transverse Mercator", "Transverse_Mercator"); + m.put("Lambert Tangential Conformal Conic Projection", "Lambert_Conformal_Conic"); + m.put("Mercator_Variant_A", "Mercator_1SP"); + m.put("Polar_Stereographic_variant_A", "Polar_Stereographic"); + m.put("Polar_Stereographic_variant_B", "Polar_Stereographic"); + m.put("Universal Transverse Mercator System", "Transverse_Mercator"); + m.put("Universal_Transverse_Mercator", "Transverse_Mercator"); + PROJECTION_NAME_FALLBACK = Collections.unmodifiableMap(m); + } + + // Lazy-initialized caches built once from GeoTools' registered OperationMethod objects. + // aliasCache: exact alias string -> canonical OGC name + // normalizedCache: normalized form (lowercase, no spaces/underscores) -> set of canonical names + private static volatile Map<String, String> aliasCache; + private static volatile Map<String, Set<String>> normalizedCache; + + private static final Pattern PROJECTION_PATTERN = Pattern.compile("PROJECTION\\[\"([^\"]+)\"\\]"); + private static final Pattern UNEXPECTED_PARAM_PATTERN = + Pattern.compile("Parameter \"([^\"]+)\" was not expected"); + + /** + * Strip a named PARAMETER from a WKT1 string. Used to remove parameters that proj4sedona includes + * but GeoTools does not expect (e.g. standard_parallel_1 for Transverse Mercator). + * + * @param wkt The WKT1 string. + * @param paramName The parameter name to strip (e.g. "standard_parallel_1"). + * @return The WKT1 string with the parameter removed. + */ + private static String stripWktParameter(String wkt, String paramName) { + // Remove ,PARAMETER["paramName",value] or PARAMETER["paramName",value], + String escaped = Pattern.quote(paramName); + String result = wkt.replaceAll(",\\s*PARAMETER\\[\"" + escaped + "\",[^\\]]*\\]", ""); + if (result.equals(wkt)) { + result = wkt.replaceAll("PARAMETER\\[\"" + escaped + "\",[^\\]]*\\]\\s*,?", ""); + } + return result; + } + + /** + * Normalize a projection name for loose matching: lowercase, remove spaces and underscores. + * + * @param name The projection name to normalize. + * @return The normalized form (e.g. "Lambert_Conformal_Conic_2SP" → "lambertconformalconic2sp"). + */ + private static String normalizeForMatch(String name) { + return name.toLowerCase().replaceAll("[_ ]", ""); + } + + /** + * Resolve a projection name to its canonical OGC WKT1 name. Uses a three-tier strategy: + * + * <ol> + * <li><b>Exact alias matching</b> — uses all aliases registered in GeoTools' {@link + * OperationMethod} objects from OGC, EPSG, GeoTIFF, ESRI, and PROJ authorities. This is a + * direct case-sensitive lookup into the alias cache. + * <li><b>Normalized matching</b> — strips spaces, underscores, and lowercases both the input + * and all known GeoTools projection names/aliases. If this yields exactly one canonical + * name, it is used. This handles formatting differences (e.g. spaces vs underscores) that + * arise when proj4sedona WKT1 output uses different conventions than GeoTools. Ambiguous + * normalized forms (mapping to multiple canonical names) are skipped to avoid incorrect + * resolution. + * <li><b>Hardcoded fallback</b> — for proj4sedona-specific projection names that have no + * equivalent in GeoTools' alias database (e.g. "Extended_Transverse_Mercator", + * "Lambert_Cylindrical_Equal_Area"). + * </ol> + * + * <p>Verified via exhaustive testing against all 58 proj4sedona registered projection names: 42 + * resolve via exact alias matching, 5 via normalized matching, and 9 via hardcoded fallback. The + * remaining 2 (longlat, identity) are geographic CRS codes that produce no PROJECTION[] element + * in WKT1. + * + * @param projName The projection name to resolve (e.g. "Lambert Conformal Conic"). + * @return The canonical OGC name (e.g. "Lambert_Conformal_Conic"), or the input unchanged. + */ + private static String resolveProjectionName(String projName) { + ensureCachesBuilt(); + + // Tier 1: Exact alias match from GeoTools + String resolved = aliasCache.get(projName); + if (resolved != null) { + return resolved; + } + + // Tier 2: Normalized match (handles space/underscore differences automatically) + String normalized = normalizeForMatch(projName); + Set<String> candidates = normalizedCache.get(normalized); + if (candidates != null && candidates.size() == 1) { + String canonical = candidates.iterator().next(); + aliasCache.put(projName, canonical); + return canonical; + } Review Comment: `resolveProjectionName` writes to `aliasCache` (`aliasCache.put(...)`) after the caches are published. Since `aliasCache` is a plain `HashMap` shared across threads, concurrent Spark execution can hit race conditions / map corruption. Consider making the caches immutable after initialization, or switching to a thread-safe map and avoiding unsynchronized writes. ########## common/src/main/java/org/apache/sedona/common/raster/RasterEditors.java: ########## @@ -102,7 +114,278 @@ public static GridCoverage2D setSrid(GridCoverage2D raster, int srid) { } else { crs = FunctionsGeoTools.sridToCRS(srid); } + return replaceCrs(raster, crs); + } + + /** + * Sets the CRS of a raster using a CRS string. Accepts EPSG codes (e.g. "EPSG:4326"), WKT1, WKT2, + * PROJ strings, and PROJJSON. + * + * @param raster The input raster. + * @param crsString The CRS definition string. + * @return The raster with the new CRS. + */ + public static GridCoverage2D setCrs(GridCoverage2D raster, String crsString) { + CoordinateReferenceSystem crs = parseCrsString(crsString); + return replaceCrs(raster, crs); + } + + /** + * Parse a CRS string in any supported format into a GeoTools CoordinateReferenceSystem. + * + * <p>Parsing priority: + * + * <ol> + * <li>GeoTools CRS.decode — handles authority codes like EPSG:4326 + * <li>GeoTools CRS.parseWKT — handles WKT1 strings + * <li>proj4sedona — handles WKT2, PROJ strings, PROJJSON. If an EPSG authority can be resolved, + * uses CRS.decode for a lossless result. Otherwise falls back to WKT1 conversion. + * </ol> + * + * @param crsString The CRS definition string. + * @return The parsed CoordinateReferenceSystem. + * @throws IllegalArgumentException if the CRS string cannot be parsed. + */ + static CoordinateReferenceSystem parseCrsString(String crsString) { + // Step 1: Try GeoTools CRS.decode (handles EPSG:xxxx, AUTO:xxxx, etc.) + try { + return CRS.decode(crsString, true); + } catch (FactoryException e) { + // Not an authority code, continue + } + + // Step 2: Try GeoTools CRS.parseWKT (handles WKT1) + try { + return CRS.parseWKT(crsString); + } catch (FactoryException e) { + // Not WKT1, continue + } + + // Step 3: Use proj4sedona (handles WKT2, PROJ, PROJJSON) + try { + Proj proj = new Proj(crsString); + + // Try to resolve to an EPSG authority code for a lossless result + String authority = proj.toEpsgCode(); + if (authority != null && !authority.isEmpty()) { + try { + return CRS.decode(authority, true); + } catch (FactoryException ex) { + // Authority code not recognized by GeoTools, fall through to WKT1 + } + } + + // Fallback: convert to WKT1 via proj4sedona and parse with GeoTools. + // proj4sedona may include parameters GeoTools doesn't expect (e.g. standard_parallel_1 + // for projections that don't use it). We handle this by trying several parse strategies: + // 1. Raw WKT1 (proj4sedona's projection names may already be recognized by GeoTools) + // 2. Normalized WKT1 (resolve projection names to canonical OGC names) + // 3. Strip unexpected parameters iteratively + String wkt1 = proj.toWkt1(); + if (wkt1 != null && !wkt1.isEmpty()) { + // Strategy 1: Try raw WKT1 directly + try { + return CRS.parseWKT(wkt1); + } catch (FactoryException ex) { + // Raw WKT1 failed, continue with normalization + } + + // Strategy 2: Try with normalized projection name + String normalizedWkt = normalizeWkt1ProjectionName(wkt1); + // Strategy 3: If parsing fails due to unexpected parameters, strip them iteratively. + // proj4sedona sometimes includes parameters like standard_parallel_1 for projections + // that don't use it. We parse the error message to identify and remove the offending + // parameter, then retry. + String currentWkt = normalizedWkt; + for (int attempt = 0; attempt < 5; attempt++) { + try { + return CRS.parseWKT(currentWkt); + } catch (FactoryException ex) { + String msg = ex.getMessage(); + if (msg != null) { + Matcher paramMatcher = UNEXPECTED_PARAM_PATTERN.matcher(msg); + if (paramMatcher.find()) { + currentWkt = stripWktParameter(currentWkt, paramMatcher.group(1)); + continue; + } + } + break; // Different kind of error, give up + } + } + } + } catch (Exception e) { + // proj4sedona could not parse it either + } Review Comment: The proj4sedona parsing branch catches a broad `Exception` and silently discards it (lines 216-218), then later throws a generic parse error. This makes real failures hard to debug and can hide unexpected runtime problems. Consider catching only expected parse exceptions and/or attaching the original exception as the cause/suppressed exception in the final `IllegalArgumentException`. ########## common/src/main/java/org/apache/sedona/common/raster/RasterEditors.java: ########## @@ -102,7 +114,278 @@ public static GridCoverage2D setSrid(GridCoverage2D raster, int srid) { } else { crs = FunctionsGeoTools.sridToCRS(srid); } + return replaceCrs(raster, crs); + } + + /** + * Sets the CRS of a raster using a CRS string. Accepts EPSG codes (e.g. "EPSG:4326"), WKT1, WKT2, + * PROJ strings, and PROJJSON. + * + * @param raster The input raster. + * @param crsString The CRS definition string. + * @return The raster with the new CRS. + */ + public static GridCoverage2D setCrs(GridCoverage2D raster, String crsString) { + CoordinateReferenceSystem crs = parseCrsString(crsString); + return replaceCrs(raster, crs); + } + + /** + * Parse a CRS string in any supported format into a GeoTools CoordinateReferenceSystem. + * + * <p>Parsing priority: + * + * <ol> + * <li>GeoTools CRS.decode — handles authority codes like EPSG:4326 + * <li>GeoTools CRS.parseWKT — handles WKT1 strings + * <li>proj4sedona — handles WKT2, PROJ strings, PROJJSON. If an EPSG authority can be resolved, + * uses CRS.decode for a lossless result. Otherwise falls back to WKT1 conversion. + * </ol> + * + * @param crsString The CRS definition string. + * @return The parsed CoordinateReferenceSystem. + * @throws IllegalArgumentException if the CRS string cannot be parsed. + */ + static CoordinateReferenceSystem parseCrsString(String crsString) { + // Step 1: Try GeoTools CRS.decode (handles EPSG:xxxx, AUTO:xxxx, etc.) + try { + return CRS.decode(crsString, true); + } catch (FactoryException e) { + // Not an authority code, continue + } + + // Step 2: Try GeoTools CRS.parseWKT (handles WKT1) + try { + return CRS.parseWKT(crsString); + } catch (FactoryException e) { + // Not WKT1, continue + } + + // Step 3: Use proj4sedona (handles WKT2, PROJ, PROJJSON) + try { + Proj proj = new Proj(crsString); + + // Try to resolve to an EPSG authority code for a lossless result + String authority = proj.toEpsgCode(); + if (authority != null && !authority.isEmpty()) { + try { + return CRS.decode(authority, true); + } catch (FactoryException ex) { + // Authority code not recognized by GeoTools, fall through to WKT1 + } + } + + // Fallback: convert to WKT1 via proj4sedona and parse with GeoTools. + // proj4sedona may include parameters GeoTools doesn't expect (e.g. standard_parallel_1 + // for projections that don't use it). We handle this by trying several parse strategies: + // 1. Raw WKT1 (proj4sedona's projection names may already be recognized by GeoTools) + // 2. Normalized WKT1 (resolve projection names to canonical OGC names) + // 3. Strip unexpected parameters iteratively + String wkt1 = proj.toWkt1(); + if (wkt1 != null && !wkt1.isEmpty()) { + // Strategy 1: Try raw WKT1 directly + try { + return CRS.parseWKT(wkt1); + } catch (FactoryException ex) { + // Raw WKT1 failed, continue with normalization + } + + // Strategy 2: Try with normalized projection name + String normalizedWkt = normalizeWkt1ProjectionName(wkt1); + // Strategy 3: If parsing fails due to unexpected parameters, strip them iteratively. + // proj4sedona sometimes includes parameters like standard_parallel_1 for projections + // that don't use it. We parse the error message to identify and remove the offending + // parameter, then retry. + String currentWkt = normalizedWkt; + for (int attempt = 0; attempt < 5; attempt++) { + try { + return CRS.parseWKT(currentWkt); + } catch (FactoryException ex) { + String msg = ex.getMessage(); + if (msg != null) { + Matcher paramMatcher = UNEXPECTED_PARAM_PATTERN.matcher(msg); + if (paramMatcher.find()) { + currentWkt = stripWktParameter(currentWkt, paramMatcher.group(1)); + continue; + } + } + break; // Different kind of error, give up + } + } + } + } catch (Exception e) { + // proj4sedona could not parse it either + } + + throw new IllegalArgumentException( + "Cannot parse CRS string. Supported formats: EPSG code (e.g. 'EPSG:4326'), " + + "WKT1, WKT2, PROJ string, PROJJSON. Input: " + + crsString); + } + + // Fallback map for proj4sedona projection names that have no equivalent in GeoTools' + // alias database and cannot be resolved via normalized matching. These are proj4sedona-specific + // long-form alias names. Verified via exhaustive testing of all 58 proj4sedona registered names. + private static final Map<String, String> PROJECTION_NAME_FALLBACK; + + static { + Map<String, String> m = new HashMap<>(); + m.put("Lambert_Cylindrical_Equal_Area", "Cylindrical_Equal_Area"); + m.put("Extended_Transverse_Mercator", "Transverse_Mercator"); + m.put("Extended Transverse Mercator", "Transverse_Mercator"); + m.put("Lambert Tangential Conformal Conic Projection", "Lambert_Conformal_Conic"); + m.put("Mercator_Variant_A", "Mercator_1SP"); + m.put("Polar_Stereographic_variant_A", "Polar_Stereographic"); + m.put("Polar_Stereographic_variant_B", "Polar_Stereographic"); + m.put("Universal Transverse Mercator System", "Transverse_Mercator"); + m.put("Universal_Transverse_Mercator", "Transverse_Mercator"); + PROJECTION_NAME_FALLBACK = Collections.unmodifiableMap(m); + } + + // Lazy-initialized caches built once from GeoTools' registered OperationMethod objects. + // aliasCache: exact alias string -> canonical OGC name + // normalizedCache: normalized form (lowercase, no spaces/underscores) -> set of canonical names + private static volatile Map<String, String> aliasCache; + private static volatile Map<String, Set<String>> normalizedCache; + + private static final Pattern PROJECTION_PATTERN = Pattern.compile("PROJECTION\\[\"([^\"]+)\"\\]"); + private static final Pattern UNEXPECTED_PARAM_PATTERN = + Pattern.compile("Parameter \"([^\"]+)\" was not expected"); + + /** + * Strip a named PARAMETER from a WKT1 string. Used to remove parameters that proj4sedona includes + * but GeoTools does not expect (e.g. standard_parallel_1 for Transverse Mercator). + * + * @param wkt The WKT1 string. + * @param paramName The parameter name to strip (e.g. "standard_parallel_1"). + * @return The WKT1 string with the parameter removed. + */ + private static String stripWktParameter(String wkt, String paramName) { + // Remove ,PARAMETER["paramName",value] or PARAMETER["paramName",value], + String escaped = Pattern.quote(paramName); + String result = wkt.replaceAll(",\\s*PARAMETER\\[\"" + escaped + "\",[^\\]]*\\]", ""); + if (result.equals(wkt)) { + result = wkt.replaceAll("PARAMETER\\[\"" + escaped + "\",[^\\]]*\\]\\s*,?", ""); + } + return result; + } + + /** + * Normalize a projection name for loose matching: lowercase, remove spaces and underscores. + * + * @param name The projection name to normalize. + * @return The normalized form (e.g. "Lambert_Conformal_Conic_2SP" → "lambertconformalconic2sp"). + */ + private static String normalizeForMatch(String name) { + return name.toLowerCase().replaceAll("[_ ]", ""); + } + + /** + * Resolve a projection name to its canonical OGC WKT1 name. Uses a three-tier strategy: + * + * <ol> + * <li><b>Exact alias matching</b> — uses all aliases registered in GeoTools' {@link + * OperationMethod} objects from OGC, EPSG, GeoTIFF, ESRI, and PROJ authorities. This is a + * direct case-sensitive lookup into the alias cache. + * <li><b>Normalized matching</b> — strips spaces, underscores, and lowercases both the input + * and all known GeoTools projection names/aliases. If this yields exactly one canonical + * name, it is used. This handles formatting differences (e.g. spaces vs underscores) that + * arise when proj4sedona WKT1 output uses different conventions than GeoTools. Ambiguous + * normalized forms (mapping to multiple canonical names) are skipped to avoid incorrect + * resolution. + * <li><b>Hardcoded fallback</b> — for proj4sedona-specific projection names that have no + * equivalent in GeoTools' alias database (e.g. "Extended_Transverse_Mercator", + * "Lambert_Cylindrical_Equal_Area"). + * </ol> + * + * <p>Verified via exhaustive testing against all 58 proj4sedona registered projection names: 42 + * resolve via exact alias matching, 5 via normalized matching, and 9 via hardcoded fallback. The + * remaining 2 (longlat, identity) are geographic CRS codes that produce no PROJECTION[] element + * in WKT1. + * + * @param projName The projection name to resolve (e.g. "Lambert Conformal Conic"). + * @return The canonical OGC name (e.g. "Lambert_Conformal_Conic"), or the input unchanged. + */ + private static String resolveProjectionName(String projName) { + ensureCachesBuilt(); + + // Tier 1: Exact alias match from GeoTools + String resolved = aliasCache.get(projName); + if (resolved != null) { + return resolved; + } + + // Tier 2: Normalized match (handles space/underscore differences automatically) + String normalized = normalizeForMatch(projName); + Set<String> candidates = normalizedCache.get(normalized); + if (candidates != null && candidates.size() == 1) { + String canonical = candidates.iterator().next(); + aliasCache.put(projName, canonical); + return canonical; + } + + // Tier 3: Hardcoded fallback for proj4sedona-specific names not in GeoTools + return PROJECTION_NAME_FALLBACK.getOrDefault(projName, projName); + } + + /** + * Build caches mapping projection aliases and normalized names to canonical OGC names. Scans all + * GeoTools {@link OperationMethod} objects registered for {@link Projection}. Thread-safe via + * double-checked locking. + */ + private static void ensureCachesBuilt() { + if (aliasCache != null) { + return; + } + synchronized (RasterEditors.class) { + if (aliasCache != null) { + return; + } + DefaultMathTransformFactory factory = + (DefaultMathTransformFactory) ReferencingFactoryFinder.getMathTransformFactory(null); + Set<OperationMethod> methods = factory.getAvailableMethods(Projection.class); Review Comment: `ensureCachesBuilt` downcasts the `MathTransformFactory` returned by `ReferencingFactoryFinder.getMathTransformFactory(null)` to `DefaultMathTransformFactory`. That cast is not guaranteed and can throw `ClassCastException` if a different factory implementation is configured. Consider avoiding the downcast (or guarding it with `instanceof` and falling back to a new `DefaultMathTransformFactory`) so cache initialization is robust across GeoTools setups. ########## common/src/main/java/org/apache/sedona/common/raster/RasterEditors.java: ########## @@ -102,7 +114,278 @@ public static GridCoverage2D setSrid(GridCoverage2D raster, int srid) { } else { crs = FunctionsGeoTools.sridToCRS(srid); } + return replaceCrs(raster, crs); + } + + /** + * Sets the CRS of a raster using a CRS string. Accepts EPSG codes (e.g. "EPSG:4326"), WKT1, WKT2, + * PROJ strings, and PROJJSON. + * + * @param raster The input raster. + * @param crsString The CRS definition string. + * @return The raster with the new CRS. + */ + public static GridCoverage2D setCrs(GridCoverage2D raster, String crsString) { + CoordinateReferenceSystem crs = parseCrsString(crsString); + return replaceCrs(raster, crs); + } + + /** + * Parse a CRS string in any supported format into a GeoTools CoordinateReferenceSystem. + * + * <p>Parsing priority: + * + * <ol> + * <li>GeoTools CRS.decode — handles authority codes like EPSG:4326 + * <li>GeoTools CRS.parseWKT — handles WKT1 strings + * <li>proj4sedona — handles WKT2, PROJ strings, PROJJSON. If an EPSG authority can be resolved, + * uses CRS.decode for a lossless result. Otherwise falls back to WKT1 conversion. + * </ol> + * + * @param crsString The CRS definition string. + * @return The parsed CoordinateReferenceSystem. + * @throws IllegalArgumentException if the CRS string cannot be parsed. + */ + static CoordinateReferenceSystem parseCrsString(String crsString) { + // Step 1: Try GeoTools CRS.decode (handles EPSG:xxxx, AUTO:xxxx, etc.) + try { + return CRS.decode(crsString, true); + } catch (FactoryException e) { + // Not an authority code, continue + } + + // Step 2: Try GeoTools CRS.parseWKT (handles WKT1) + try { + return CRS.parseWKT(crsString); + } catch (FactoryException e) { Review Comment: Step 2 uses `CRS.parseWKT(crsString)` without forcing longitude-first axis order, while other CRS decoding in Sedona forces lon/lat (e.g., `FunctionsGeoTools.sridToCRS` / `parseCRSString`). This can yield inconsistent axis ordering depending on the WKT/AUTHORITY, which can break downstream transforms. Consider parsing WKT with GeoTools hints (e.g., `Hints.FORCE_LONGITUDE_FIRST_AXIS_ORDER`) similar to `FunctionsGeoTools.parseCRSString`. ########## common/src/test/java/org/apache/sedona/common/raster/CrsRoundTripComplianceTest.java: ########## @@ -0,0 +1,723 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.raster; + +import static org.junit.Assert.*; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.geotools.api.referencing.FactoryException; +import org.geotools.coverage.grid.GridCoverage2D; +import org.junit.Test; + +/** + * Round-trip compliance tests for RS_SetCRS and RS_CRS across representative EPSG codes. + * + * <p>For each EPSG code and each format (PROJ, PROJJSON, WKT1), this test: Review Comment: The class-level Javadoc says the compliance suite iterates formats "(PROJ, PROJJSON, WKT1)", but the file also contains a full set of WKT2 round-trip tests. Update the description to include WKT2 so the documentation matches the actual coverage. ```suggestion * <p>For each EPSG code and each format (PROJ, PROJJSON, WKT1, WKT2), this test: ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
