This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 5b9d2bfd2f [GH-2652] Add RS_AsCOG SQL function for Cloud Optimized
GeoTiff output (#2669)
5b9d2bfd2f is described below
commit 5b9d2bfd2ff70b0257722d37995ff16562ac097e
Author: Jia Yu <[email protected]>
AuthorDate: Sun Feb 22 02:02:48 2026 -0700
[GH-2652] Add RS_AsCOG SQL function for Cloud Optimized GeoTiff output
(#2669)
---
.../apache/sedona/common/raster/RasterOutputs.java | 56 ++++++++++++
.../sedona/common/raster/cog/CogOptions.java | 53 ++++++++----
.../sedona/common/raster/RasterOutputTest.java | 99 ++++++++++++++++++++++
docs/api/sql/Raster-writer.md | 65 ++++++++++++++
docs/tutorial/raster.md | 8 ++
.../scala/org/apache/sedona/sql/UDF/Catalog.scala | 1 +
.../expressions/raster/RasterOutputs.scala | 13 +++
.../org/apache/sedona/sql/rasteralgebraTest.scala | 81 ++++++++++++++++++
8 files changed, 360 insertions(+), 16 deletions(-)
diff --git
a/common/src/main/java/org/apache/sedona/common/raster/RasterOutputs.java
b/common/src/main/java/org/apache/sedona/common/raster/RasterOutputs.java
index da49f3201e..1fb834651c 100644
--- a/common/src/main/java/org/apache/sedona/common/raster/RasterOutputs.java
+++ b/common/src/main/java/org/apache/sedona/common/raster/RasterOutputs.java
@@ -108,6 +108,62 @@ public class RasterOutputs {
}
}
+ // ---------- asCOG overloads (used by RS_AsCOG via InferredExpression)
----------
+
+ public static byte[] asCOG(GridCoverage2D raster) {
+ return asCloudOptimizedGeoTiff(raster, CogOptions.defaults());
+ }
+
+ public static byte[] asCOG(GridCoverage2D raster, String compression) {
+ return asCloudOptimizedGeoTiff(raster,
CogOptions.builder().compression(compression).build());
+ }
+
+ public static byte[] asCOG(GridCoverage2D raster, String compression, int
tileSize) {
+ return asCloudOptimizedGeoTiff(
+ raster,
CogOptions.builder().compression(compression).tileSize(tileSize).build());
+ }
+
+ public static byte[] asCOG(
+ GridCoverage2D raster, String compression, int tileSize, double quality)
{
+ return asCloudOptimizedGeoTiff(
+ raster,
+ CogOptions.builder()
+ .compression(compression)
+ .tileSize(tileSize)
+ .compressionQuality(quality)
+ .build());
+ }
+
+ public static byte[] asCOG(
+ GridCoverage2D raster, String compression, int tileSize, double quality,
String resampling) {
+ return asCloudOptimizedGeoTiff(
+ raster,
+ CogOptions.builder()
+ .compression(compression)
+ .tileSize(tileSize)
+ .compressionQuality(quality)
+ .resampling(resampling)
+ .build());
+ }
+
+ public static byte[] asCOG(
+ GridCoverage2D raster,
+ String compression,
+ int tileSize,
+ double quality,
+ String resampling,
+ int overviewCount) {
+ return asCloudOptimizedGeoTiff(
+ raster,
+ CogOptions.builder()
+ .compression(compression)
+ .tileSize(tileSize)
+ .compressionQuality(quality)
+ .resampling(resampling)
+ .overviewCount(overviewCount)
+ .build());
+ }
+
/**
* Creates a GeoTiff file with the provided raster. Primarily used for
testing.
*
diff --git
a/common/src/main/java/org/apache/sedona/common/raster/cog/CogOptions.java
b/common/src/main/java/org/apache/sedona/common/raster/cog/CogOptions.java
index a5fc1814fc..d0ec21ca30 100644
--- a/common/src/main/java/org/apache/sedona/common/raster/cog/CogOptions.java
+++ b/common/src/main/java/org/apache/sedona/common/raster/cog/CogOptions.java
@@ -20,7 +20,6 @@ package org.apache.sedona.common.raster.cog;
import java.util.Arrays;
import java.util.List;
-import java.util.Locale;
/**
* Options for Cloud Optimized GeoTIFF (COG) generation.
@@ -206,10 +205,19 @@ public final class CogOptions {
if (compression == null || compression.isEmpty()) {
throw new IllegalArgumentException("compression must not be null or
empty");
}
- if (!VALID_COMPRESSION.contains(compression)) {
+ // Preserve the original input for error reporting
+ String originalCompression = compression;
+ // Case-insensitive matching: find the canonical value from the valid
list
+ String normalizedCompression = matchIgnoreCase(VALID_COMPRESSION,
originalCompression);
+ if (normalizedCompression == null) {
throw new IllegalArgumentException(
- "compression must be one of " + VALID_COMPRESSION + ", got: '" +
compression + "'");
+ "compression must be one of "
+ + VALID_COMPRESSION
+ + ", got: '"
+ + originalCompression
+ + "'");
}
+ this.compression = normalizedCompression;
if (compressionQuality < 0 || compressionQuality > 1.0) {
throw new IllegalArgumentException(
"compressionQuality must be between 0.0 and 1.0, got: " +
compressionQuality);
@@ -225,27 +233,40 @@ public final class CogOptions {
"overviewCount must be -1 (auto), 0 (none), or positive, got: " +
overviewCount);
}
- // Normalize resampling to title-case for matching
- String normalized = normalizeResampling(resampling);
- if (!VALID_RESAMPLING.contains(normalized)) {
- throw new IllegalArgumentException(
- "resampling must be one of " + VALID_RESAMPLING + ", got: '" +
resampling + "'");
+ // Case-insensitive matching for resampling; treat null/blank as default
(Nearest)
+ if (resampling == null || resampling.isEmpty()) {
+ this.resampling = "Nearest";
+ } else {
+ String originalResampling = resampling;
+ String normalizedResampling = matchIgnoreCase(VALID_RESAMPLING,
originalResampling);
+ if (normalizedResampling == null) {
+ throw new IllegalArgumentException(
+ "resampling must be one of "
+ + VALID_RESAMPLING
+ + ", got: '"
+ + originalResampling
+ + "'");
+ }
+ this.resampling = normalizedResampling;
}
- this.resampling = normalized;
return new CogOptions(this);
}
/**
- * Normalize the resampling string to title-case (first letter uppercase,
rest lowercase) so
- * callers can pass "nearest", "BILINEAR", etc.
+ * Find the canonical value from a list that matches the input
case-insensitively. Returns null
+ * if no match found. This allows callers to pass "lzw", "PACKBITS",
"bilinear", etc.
*/
- private static String normalizeResampling(String value) {
- if (value == null || value.isEmpty()) {
- return "Nearest";
+ private static String matchIgnoreCase(List<String> validValues, String
input) {
+ if (input == null) {
+ return null;
+ }
+ for (String valid : validValues) {
+ if (valid.equalsIgnoreCase(input)) {
+ return valid;
+ }
}
- String lower = value.toLowerCase(Locale.ROOT);
- return Character.toUpperCase(lower.charAt(0)) + lower.substring(1);
+ return null;
}
}
}
diff --git
a/common/src/test/java/org/apache/sedona/common/raster/RasterOutputTest.java
b/common/src/test/java/org/apache/sedona/common/raster/RasterOutputTest.java
index 846414ab1b..2d1c6fd76f 100644
--- a/common/src/test/java/org/apache/sedona/common/raster/RasterOutputTest.java
+++ b/common/src/test/java/org/apache/sedona/common/raster/RasterOutputTest.java
@@ -254,4 +254,103 @@ public class RasterOutputTest extends RasterTestBase {
}
}
}
+
+ // ---- RS_AsCOG / asCOG tests ----
+
+ @Test
+ public void testAsCOGDefaults() throws IOException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder +
"raster/test1.tiff");
+ byte[] cogBytes = RasterOutputs.asCOG(raster);
+ assertNotNull(cogBytes);
+ assertTrue(cogBytes.length >= 2);
+ // Verify it is a valid TIFF (starts with II or MM)
+ assertTrue(
+ (cogBytes[0] == 'I' && cogBytes[1] == 'I') || (cogBytes[0] == 'M' &&
cogBytes[1] == 'M'));
+ }
+
+ @Test
+ public void testAsCOGRoundTrip() throws IOException {
+ GridCoverage2D original = rasterFromGeoTiff(resourceFolder +
"raster/test1.tiff");
+ byte[] cogBytes = RasterOutputs.asCOG(original, "LZW", 256);
+ // Read COG bytes back as a raster via GeoTiff reader
+ GridCoverage2D roundTripped = RasterConstructors.fromGeoTiff(cogBytes);
+ assertNotNull(roundTripped);
+ // Verify envelope is preserved
+ assertEquals(original.getEnvelope2D().toString(),
roundTripped.getEnvelope2D().toString());
+ // Verify dimensions are preserved
+ assertEquals(
+ original.getRenderedImage().getWidth(),
roundTripped.getRenderedImage().getWidth());
+ assertEquals(
+ original.getRenderedImage().getHeight(),
roundTripped.getRenderedImage().getHeight());
+ // Verify number of bands is preserved
+ assertEquals(original.getNumSampleDimensions(),
roundTripped.getNumSampleDimensions());
+ }
+
+ @Test
+ public void testAsCOGWithCompression() throws IOException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder +
"raster/test1.tiff");
+ byte[] cogLZW = RasterOutputs.asCOG(raster, "LZW");
+ byte[] cogDeflate = RasterOutputs.asCOG(raster, "Deflate");
+ assertNotNull(cogLZW);
+ assertNotNull(cogDeflate);
+ assertTrue(cogLZW.length > 0);
+ assertTrue(cogDeflate.length > 0);
+ // Different compressions should produce different sizes
+ assertNotEquals(cogLZW.length, cogDeflate.length);
+ }
+
+ @Test
+ public void testAsCOGWithCompressionAndTileSize() throws IOException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder +
"raster/test1.tiff");
+ byte[] cog256 = RasterOutputs.asCOG(raster, "Deflate", 256);
+ byte[] cog512 = RasterOutputs.asCOG(raster, "Deflate", 512);
+ assertNotNull(cog256);
+ assertNotNull(cog512);
+ assertTrue(cog256.length > 0);
+ assertTrue(cog512.length > 0);
+ }
+
+ @Test
+ public void testAsCOGWithCompressionTileSizeAndQuality() throws IOException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder +
"raster/test1.tiff");
+ byte[] cogHighQ = RasterOutputs.asCOG(raster, "Deflate", 256, 1.0);
+ byte[] cogLowQ = RasterOutputs.asCOG(raster, "Deflate", 256, 0.1);
+ assertNotNull(cogHighQ);
+ assertNotNull(cogLowQ);
+ assertTrue(cogHighQ.length > 0);
+ assertTrue(cogLowQ.length > 0);
+ }
+
+ @Test
+ public void testAsCOGWithResampling() throws IOException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder +
"raster/test1.tiff");
+ byte[] cog = RasterOutputs.asCOG(raster, "Deflate", 256, 0.2, "Bilinear");
+ assertNotNull(cog);
+ assertTrue(cog.length > 0);
+ }
+
+ @Test
+ public void testAsCOGAllArgs() throws IOException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder +
"raster/test1.tiff");
+ byte[] cog = RasterOutputs.asCOG(raster, "LZW", 256, 0.5, "Nearest", 2);
+ assertNotNull(cog);
+ assertTrue(cog.length > 0);
+ }
+
+ @Test
+ public void testAsCOGCaseInsensitive() throws IOException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder +
"raster/test1.tiff");
+ // compression and resampling should be case-insensitive
+ byte[] cog = RasterOutputs.asCOG(raster, "lzw", 256, 0.5, "bilinear", 2);
+ assertNotNull(cog);
+ assertTrue(cog.length > 0);
+ // uppercase
+ byte[] cog2 = RasterOutputs.asCOG(raster, "DEFLATE", 256, 0.5, "NEAREST",
2);
+ assertNotNull(cog2);
+ assertTrue(cog2.length > 0);
+ // mixed case: packbits
+ byte[] cog3 = RasterOutputs.asCOG(raster, "packbits");
+ assertNotNull(cog3);
+ assertTrue(cog3.length > 0);
+ }
}
diff --git a/docs/api/sql/Raster-writer.md b/docs/api/sql/Raster-writer.md
index df709ebc96..a7d682a2e0 100644
--- a/docs/api/sql/Raster-writer.md
+++ b/docs/api/sql/Raster-writer.md
@@ -116,6 +116,71 @@ root
|-- geotiff: binary (nullable = true)
```
+#### RS_AsCOG
+
+Introduction: Returns a binary DataFrame from a Raster DataFrame. Each raster
object in the resulting DataFrame is a [Cloud Optimized
GeoTIFF](https://www.cogeo.org/) (COG) image in binary format. COG is a GeoTIFF
that is internally organized to enable efficient range-read access over HTTP,
making it ideal for cloud-hosted raster data.
+
+Possible values for `compression`: `Deflate` (default), `LZW`, `JPEG`,
`PackBits`. Case-insensitive.
+
+`tileSize` must be a power of 2 (e.g., 128, 256, 512). Default value: `256`
+
+Possible values for `quality`: any decimal number between 0 and 1. 0 means
maximum compression and 1 means minimum compression. Default value: `0.2`
+
+Possible values for `resampling`: `Nearest` (default), `Bilinear`, `Bicubic`.
Case-insensitive. This controls the resampling algorithm used to build overview
levels.
+
+`overviewCount` controls the number of overview levels. Use `-1` for automatic
(default), `0` for no overviews, or any positive integer for a specific count.
+
+Format:
+
+`RS_AsCOG(raster: Raster)`
+
+`RS_AsCOG(raster: Raster, compression: String)`
+
+`RS_AsCOG(raster: Raster, compression: String, tileSize: Integer)`
+
+`RS_AsCOG(raster: Raster, compression: String, tileSize: Integer, quality:
Double)`
+
+`RS_AsCOG(raster: Raster, compression: String, tileSize: Integer, quality:
Double, resampling: String)`
+
+`RS_AsCOG(raster: Raster, compression: String, tileSize: Integer, quality:
Double, resampling: String, overviewCount: Integer)`
+
+Since: `v1.9.0`
+
+SQL Example
+
+```sql
+SELECT RS_AsCOG(raster) FROM my_raster_table
+```
+
+SQL Example
+
+```sql
+SELECT RS_AsCOG(raster, 'LZW') FROM my_raster_table
+```
+
+SQL Example
+
+```sql
+SELECT RS_AsCOG(raster, 'LZW', 512, 0.75, 'Bilinear', 3) FROM my_raster_table
+```
+
+Output:
+
+```html
++--------------------+
+| cog|
++--------------------+
+|[4D 4D 00 2A 00 0...|
++--------------------+
+```
+
+Output schema:
+
+```sql
+root
+ |-- cog: binary (nullable = true)
+```
+
#### RS_AsPNG
Introduction: Returns a PNG byte array, that can be written to raster files as
PNGs using the [sedona function](#write-a-binary-dataframe-to-raster-files).
This function can only accept pixel data type of unsigned integer. PNG can
accept 1 or 3 bands of data from the raster, refer to
[RS_Band](Raster-operators.md#rs_band) for more details.
diff --git a/docs/tutorial/raster.md b/docs/tutorial/raster.md
index 717b688ab5..d2b21e65c0 100644
--- a/docs/tutorial/raster.md
+++ b/docs/tutorial/raster.md
@@ -527,6 +527,14 @@ Use
[RS_AsGeoTiff](../api/sql/Raster-writer.md#rs_asgeotiff) to get the binary D
SELECT RS_AsGeoTiff(raster)
```
+### As Cloud Optimized GeoTiff
+
+Use [RS_AsCOG](../api/sql/Raster-writer.md#rs_ascog) to get the binary
Dataframe of the raster in [Cloud Optimized GeoTiff](https://www.cogeo.org/)
(COG) format. COG is ideal for cloud-hosted raster data because it supports
efficient range-read access over HTTP.
+
+```sql
+SELECT RS_AsCOG(raster)
+```
+
### As PNG
Use [RS_AsPNG](../api/sql/Raster-writer.md#rs_aspng) to get the binary
Dataframe of the raster in PNG format.
diff --git
a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index 1c20dc5577..3f8ebf193e 100644
--- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -314,6 +314,7 @@ object Catalog extends AbstractCatalog with Logging {
function[RS_Intersects](),
function[RS_Interpolate](),
function[RS_AsGeoTiff](),
+ function[RS_AsCOG](),
function[RS_AsRaster](),
function[RS_AsArcGrid](),
function[RS_AsBase64](),
diff --git
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterOutputs.scala
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterOutputs.scala
index f4799c9098..c333104579 100644
---
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterOutputs.scala
+++
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterOutputs.scala
@@ -78,3 +78,16 @@ private[apache] case class RS_AsImage(inputExpressions:
Seq[Expression])
copy(inputExpressions = newChildren)
}
}
+
+private[apache] case class RS_AsCOG(inputExpressions: Seq[Expression])
+ extends InferredExpression(
+ inferrableFunction6(RasterOutputs.asCOG),
+ inferrableFunction5(RasterOutputs.asCOG),
+ inferrableFunction4(RasterOutputs.asCOG),
+ inferrableFunction3(RasterOutputs.asCOG),
+ inferrableFunction2(RasterOutputs.asCOG),
+ inferrableFunction1(RasterOutputs.asCOG)) {
+ protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) =
{
+ copy(inputExpressions = newChildren)
+ }
+}
diff --git
a/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
b/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
index 1b6490c234..fb5db1993a 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
@@ -1089,6 +1089,87 @@ class rasteralgebraTest extends TestBaseScala with
BeforeAndAfter with GivenWhen
"iVBORw0KGgoAAAANSUhEUgAABaAAAALQCAMAAABR+ye1AAADAFBMVEXE9/W48vOq7PGa5u6L3"))
}
+ it("Passed RS_AsCOG with defaults") {
+ val df = sparkSession.read.format("binaryFile").load(resourceFolder +
"raster/test1.tiff")
+ val rasterDf = df.selectExpr("RS_FromGeoTiff(content) as raster")
+ val original = rasterDf.first().getAs[GridCoverage2D]("raster")
+
+ // Round-trip: GeoTiff -> raster -> COG bytes -> raster
+ val roundTripped = rasterDf
+ .selectExpr("RS_AsCOG(raster) as cog")
+ .selectExpr("RS_FromGeoTiff(cog) as raster_new")
+ .first()
+ .getAs[GridCoverage2D]("raster_new")
+ assert(roundTripped != null)
+ assertEquals(original.getEnvelope.toString,
roundTripped.getEnvelope.toString)
+ assertEquals(original.getRenderedImage.getWidth,
roundTripped.getRenderedImage.getWidth)
+ assertEquals(original.getRenderedImage.getHeight,
roundTripped.getRenderedImage.getHeight)
+ }
+
+ it("Passed RS_AsCOG round-trip with compression") {
+ val df = sparkSession.read.format("binaryFile").load(resourceFolder +
"raster/test1.tiff")
+ val rasterDf = df.selectExpr("RS_FromGeoTiff(content) as raster")
+ val original = rasterDf.first().getAs[GridCoverage2D]("raster")
+
+ val roundTripped = rasterDf
+ .selectExpr("RS_AsCOG(raster, 'LZW', 256) as cog")
+ .selectExpr("RS_FromGeoTiff(cog) as raster_new")
+ .first()
+ .getAs[GridCoverage2D]("raster_new")
+ assert(roundTripped != null)
+ assertEquals(original.getEnvelope.toString,
roundTripped.getEnvelope.toString)
+ assertEquals(original.getRenderedImage.getWidth,
roundTripped.getRenderedImage.getWidth)
+ assertEquals(original.getRenderedImage.getHeight,
roundTripped.getRenderedImage.getHeight)
+ }
+
+ it("Passed RS_AsCOG with compression") {
+ val df = sparkSession.read.format("binaryFile").load(resourceFolder +
"raster/test1.tiff")
+ val resultDf = df
+ .selectExpr("RS_FromGeoTiff(content) as raster")
+ .selectExpr(
+ "RS_AsCOG(raster, 'LZW') as cog_lzw",
+ "RS_AsCOG(raster, 'Deflate') as cog_deflate")
+ val row = resultDf.first()
+ val cogLzw = row.getAs[Array[Byte]]("cog_lzw")
+ val cogDeflate = row.getAs[Array[Byte]]("cog_deflate")
+ assert(cogLzw.length > 0)
+ assert(cogDeflate.length > 0)
+ assert(cogLzw.length != cogDeflate.length)
+ }
+
+ it("Passed RS_AsCOG with compression and tileSize") {
+ val df = sparkSession.read.format("binaryFile").load(resourceFolder +
"raster/test1.tiff")
+ val cogBytes = df
+ .selectExpr("RS_FromGeoTiff(content) as raster")
+ .selectExpr("RS_AsCOG(raster, 'Deflate', 512) as cog")
+ .first()
+ .getAs[Array[Byte]]("cog")
+ assert(cogBytes != null)
+ assert(cogBytes.length > 0)
+ }
+
+ it("Passed RS_AsCOG with all arguments") {
+ val df = sparkSession.read.format("binaryFile").load(resourceFolder +
"raster/test1.tiff")
+ val cogBytes = df
+ .selectExpr("RS_FromGeoTiff(content) as raster")
+ .selectExpr("RS_AsCOG(raster, 'LZW', 256, 0.5, 'Nearest', 2) as cog")
+ .first()
+ .getAs[Array[Byte]]("cog")
+ assert(cogBytes != null)
+ assert(cogBytes.length > 0)
+ }
+
+ it("Passed RS_AsCOG case-insensitive args") {
+ val df = sparkSession.read.format("binaryFile").load(resourceFolder +
"raster/test1.tiff")
+ val cogBytes = df
+ .selectExpr("RS_FromGeoTiff(content) as raster")
+ .selectExpr("RS_AsCOG(raster, 'lzw', 256, 0.5, 'BILINEAR', 2) as cog")
+ .first()
+ .getAs[Array[Byte]]("cog")
+ assert(cogBytes != null)
+ assert(cogBytes.length > 0)
+ }
+
it("Passed RS_AsArcGrid") {
val df = sparkSession.read.format("binaryFile").load(resourceFolder +
"raster_asc/*")
val resultRaw = df.selectExpr("RS_FromArcInfoAsciiGrid(content) as
raster").first().get(0)