This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch feature/cog-writer in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 866b402567b825fa939034300e88a69070690c50 Author: Jia Yu <[email protected]> AuthorDate: Thu Feb 19 00:00:36 2026 -0800 Add Cloud Optimized GeoTIFF (COG) writer - TiffIfdParser: extracts IFD structure from TIFF byte arrays - CogAssembler: reassembles parsed IFDs into COG byte order (ported from GeoTrellis) - CogWriter: orchestrates decimation, overview generation, tiled writing, and COG assembly - RasterOutputs: adds asCloudOptimizedGeoTiff() public API - CogWriterTest: 8 unit tests covering decimation, overview, round-trip, multiband, compression --- .../apache/sedona/common/raster/RasterOutputs.java | 36 +++ .../sedona/common/raster/cog/CogAssembler.java | 258 +++++++++++++++++ .../apache/sedona/common/raster/cog/CogWriter.java | 268 ++++++++++++++++++ .../sedona/common/raster/cog/TiffIfdParser.java | 315 +++++++++++++++++++++ .../sedona/common/raster/cog/CogWriterTest.java | 245 ++++++++++++++++ 5 files changed, 1122 insertions(+) diff --git a/common/src/main/java/org/apache/sedona/common/raster/RasterOutputs.java b/common/src/main/java/org/apache/sedona/common/raster/RasterOutputs.java index c2dc22782f..791e2d27e7 100644 --- a/common/src/main/java/org/apache/sedona/common/raster/RasterOutputs.java +++ b/common/src/main/java/org/apache/sedona/common/raster/RasterOutputs.java @@ -36,6 +36,7 @@ import javax.imageio.ImageWriteParam; import javax.media.jai.InterpolationNearest; import javax.media.jai.JAI; import javax.media.jai.RenderedOp; +import org.apache.sedona.common.raster.cog.CogWriter; import org.apache.sedona.common.utils.RasterUtils; import org.geotools.api.coverage.grid.GridCoverageWriter; import org.geotools.api.metadata.spatial.PixelOrientation; @@ -87,6 +88,41 @@ public class RasterOutputs { return asGeoTiff(raster, null, -1); } + /** + * Creates a Cloud Optimized GeoTIFF (COG) byte array from the given raster. The COG format + * arranges tiles and overviews in an order optimized for HTTP range-request based access, + * enabling efficient partial reads from cloud storage. + * + * @param raster The input raster + * @param compressionType Compression type: "Deflate", "LZW", "JPEG", "PackBits", or null for + * default (Deflate) + * @param compressionQuality Quality 0.0 (max compression) to 1.0 (no compression) + * @return COG file as byte array + */ + public static byte[] asCloudOptimizedGeoTiff( + GridCoverage2D raster, String compressionType, double compressionQuality) { + try { + return CogWriter.write(raster, compressionType, compressionQuality); + } catch (IOException e) { + throw new RuntimeException("Failed to write Cloud Optimized GeoTIFF", e); + } + } + + /** + * Creates a Cloud Optimized GeoTIFF (COG) byte array with default settings (Deflate compression, + * 256x256 tiles). + * + * @param raster The input raster + * @return COG file as byte array + */ + public static byte[] asCloudOptimizedGeoTiff(GridCoverage2D raster) { + try { + return CogWriter.write(raster); + } catch (IOException e) { + throw new RuntimeException("Failed to write Cloud Optimized GeoTIFF", e); + } + } + /** * Creates a GeoTiff file with the provided raster. Primarily used for testing. * diff --git a/common/src/main/java/org/apache/sedona/common/raster/cog/CogAssembler.java b/common/src/main/java/org/apache/sedona/common/raster/cog/CogAssembler.java new file mode 100644 index 0000000000..579f9be9d4 --- /dev/null +++ b/common/src/main/java/org/apache/sedona/common/raster/cog/CogAssembler.java @@ -0,0 +1,258 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.raster.cog; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.List; + +/** + * Assembles multiple parsed TIFF IFDs into Cloud Optimized GeoTIFF (COG) byte order. + * + * <p>COG layout (per the spec): + * + * <pre> + * [TIFF header - 8 bytes] + * [IFD 0: full-res tags + overflow data] + * [IFD 1: overview 2x tags + overflow data] + * ... + * [IFD N: smallest overview tags + overflow data] + * [smallest overview image data] + * ... + * [overview 2x image data] + * [full-res image data] + * </pre> + * + * <p>Key COG requirements: + * + * <ul> + * <li>All IFDs are contiguous at the start of the file + * <li>Image data follows all IFDs, ordered smallest overview first, full-res last + * <li>TileOffsets/StripOffsets point forward to where image data will be located + * <li>Overviews have NewSubfileType = 1 (ReducedImage) + * </ul> + * + * <p>Ported from GeoTrellis's {@code GeoTiffWriter.appendCloudOptimized()}. + */ +public class CogAssembler { + + /** NewSubfileType value for reduced-resolution (overview) images */ + private static final int REDUCED_IMAGE = 1; + + /** + * Assemble parsed TIFF IFDs into COG byte order. + * + * @param parsedTiffs List of parsed TIFFs, ordered: [full-res, overview-2x, overview-4x, ... + * smallest]. The first element is the full resolution image, subsequent elements are + * progressively smaller overviews. + * @return A byte array containing the complete COG file + * @throws IOException if writing fails + */ + public static byte[] assemble(List<TiffIfdParser.ParsedTiff> parsedTiffs) throws IOException { + if (parsedTiffs.isEmpty()) { + throw new IllegalArgumentException("No TIFFs to assemble"); + } + + ByteOrder byteOrder = parsedTiffs.get(0).byteOrder; + int ifdCount = parsedTiffs.size(); + + // Phase 1: Compute sizes of all IFD regions (IFD entries + overflow data) + int[] ifdRegionSizes = new int[ifdCount]; + for (int i = 0; i < ifdCount; i++) { + TiffIfdParser.ParsedTiff pt = parsedTiffs.get(i); + ifdRegionSizes[i] = pt.getIfdAndOverflowSize(); + } + + // Phase 2: Compute absolute offsets for each IFD and its image data. + // Layout: [header=8] [IFD0+overflow] [IFD1+overflow] ... [IFDN+overflow] + // [imageN] ... [image1] [image0] + int[] ifdAbsoluteOffsets = new int[ifdCount]; + int cursor = 8; // After TIFF header + for (int i = 0; i < ifdCount; i++) { + ifdAbsoluteOffsets[i] = cursor; + cursor += ifdRegionSizes[i]; + } + int imageDataRegionStart = cursor; + + // Image data is written in reverse order (smallest overview first, full-res last) + // Compute absolute offset of each IFD's image data + int[] imageDataAbsoluteOffsets = new int[ifdCount]; + int imageDataCursor = imageDataRegionStart; + for (int i = ifdCount - 1; i >= 0; i--) { + imageDataAbsoluteOffsets[i] = imageDataCursor; + imageDataCursor += parsedTiffs.get(i).imageData.length; + } + int totalSize = imageDataCursor; + + // Phase 3: Write the COG + ByteArrayOutputStream bos = new ByteArrayOutputStream(totalSize); + DataOutputStream dos = new DataOutputStream(bos); + + // Write TIFF header + if (byteOrder == ByteOrder.LITTLE_ENDIAN) { + dos.writeByte('I'); + dos.writeByte('I'); + } else { + dos.writeByte('M'); + dos.writeByte('M'); + } + writeShort(dos, byteOrder, 42); // TIFF magic + writeInt(dos, byteOrder, ifdAbsoluteOffsets[0]); // Offset to first IFD + + // Write each IFD + its overflow data + for (int i = 0; i < ifdCount; i++) { + TiffIfdParser.ParsedTiff pt = parsedTiffs.get(i); + int ifdStart = ifdAbsoluteOffsets[i]; + int nextIfdOffset = (i + 1 < ifdCount) ? ifdAbsoluteOffsets[i + 1] : 0; + + // Compute where this IFD's overflow data will be in the output + int overflowStartInOutput = ifdStart + pt.getIfdSize(); + + // Patch the IFD entries: + // - Rebase overflow pointers from original file offsets to new output offsets + // - Rewrite TileOffsets/StripOffsets to point to the new image data location + byte[] patchedEntries = + patchIfdEntries(pt, overflowStartInOutput, imageDataAbsoluteOffsets[i], i > 0, byteOrder); + + // Write: tag count (2 bytes) + entries (tagCount*12) + next IFD offset (4 bytes) + writeShort(dos, byteOrder, pt.tagCount); + dos.write(patchedEntries); + writeInt(dos, byteOrder, nextIfdOffset); + + // Write overflow data + dos.write(pt.overflowData); + } + + // Write image data in reverse order (smallest overview first) + for (int i = ifdCount - 1; i >= 0; i--) { + dos.write(parsedTiffs.get(i).imageData); + } + + dos.flush(); + return bos.toByteArray(); + } + + /** + * Patch IFD entries to update: + * + * <ol> + * <li>Overflow data pointers (rebase from original file offset to new output offset) + * <li>TileOffsets/StripOffsets values (point to new image data location) + * <li>Inject NewSubfileType=1 for overview IFDs (if not already present) + * </ol> + */ + private static byte[] patchIfdEntries( + TiffIfdParser.ParsedTiff pt, + int newOverflowStart, + int newImageDataStart, + boolean isOverview, + ByteOrder byteOrder) { + + byte[] entries = pt.ifdEntries.clone(); + ByteBuffer buf = ByteBuffer.wrap(entries).order(byteOrder); + + int overflowDelta = newOverflowStart - pt.overflowDataStart; + + for (int i = 0; i < pt.tagCount; i++) { + int offset = i * 12; + int tag = buf.getShort(offset) & 0xFFFF; + int fieldType = buf.getShort(offset + 2) & 0xFFFF; + int count = buf.getInt(offset + 4); + int valueSize = count * getFieldTypeSize(fieldType); + + // Handle NewSubfileType tag for overviews + if (tag == TiffIfdParser.TAG_NEW_SUBFILE_TYPE && isOverview) { + buf.putInt(offset + 8, REDUCED_IMAGE); + continue; + } + + // Handle TileOffsets/StripOffsets — rewrite to point to new image data location + if (tag == TiffIfdParser.TAG_TILE_OFFSETS || tag == TiffIfdParser.TAG_STRIP_OFFSETS) { + if (count == 1 && valueSize <= 4) { + // Single segment: offset stored inline + buf.putInt(offset + 8, newImageDataStart + pt.segmentOffsets[0]); + } else { + // Multiple segments: the entry points to an overflow array. + // We need to rewrite the overflow array with new absolute offsets. + // First, rebase the pointer to the overflow data. + int origPointer = buf.getInt(offset + 8); + int newPointer = origPointer + overflowDelta; + buf.putInt(offset + 8, newPointer); + + // Now patch the overflow data array with new image data offsets + int overflowArrayOffset = origPointer - pt.overflowDataStart; + ByteBuffer overflowBuf = ByteBuffer.wrap(pt.overflowData).order(byteOrder); + for (int j = 0; j < count; j++) { + int newSegmentOffset = newImageDataStart + pt.segmentOffsets[j]; + overflowBuf.putInt(overflowArrayOffset + j * 4, newSegmentOffset); + } + } + continue; + } + + // For all other tags with overflow data (value > 4 bytes), rebase the pointer + if (valueSize > 4) { + int origPointer = buf.getInt(offset + 8); + buf.putInt(offset + 8, origPointer + overflowDelta); + } + } + + return entries; + } + + /** Write a 16-bit value respecting byte order */ + private static void writeShort(DataOutputStream dos, ByteOrder order, int value) + throws IOException { + if (order == ByteOrder.LITTLE_ENDIAN) { + dos.writeByte(value & 0xFF); + dos.writeByte((value >>> 8) & 0xFF); + } else { + dos.writeByte((value >>> 8) & 0xFF); + dos.writeByte(value & 0xFF); + } + } + + /** Write a 32-bit value respecting byte order */ + private static void writeInt(DataOutputStream dos, ByteOrder order, int value) + throws IOException { + if (order == ByteOrder.LITTLE_ENDIAN) { + dos.writeByte(value & 0xFF); + dos.writeByte((value >>> 8) & 0xFF); + dos.writeByte((value >>> 16) & 0xFF); + dos.writeByte((value >>> 24) & 0xFF); + } else { + dos.writeByte((value >>> 24) & 0xFF); + dos.writeByte((value >>> 16) & 0xFF); + dos.writeByte((value >>> 8) & 0xFF); + dos.writeByte(value & 0xFF); + } + } + + /** Get the byte size of a TIFF field type */ + private static int getFieldTypeSize(int fieldType) { + int[] sizes = {0, 1, 1, 2, 4, 8, 1, 1, 2, 4, 8, 4, 8}; + if (fieldType >= 1 && fieldType < sizes.length) { + return sizes[fieldType]; + } + return 1; + } +} diff --git a/common/src/main/java/org/apache/sedona/common/raster/cog/CogWriter.java b/common/src/main/java/org/apache/sedona/common/raster/cog/CogWriter.java new file mode 100644 index 0000000000..5a0bad8c26 --- /dev/null +++ b/common/src/main/java/org/apache/sedona/common/raster/cog/CogWriter.java @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.raster.cog; + +import java.awt.image.RenderedImage; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import javax.imageio.ImageWriteParam; +import javax.media.jai.Interpolation; +import javax.media.jai.InterpolationNearest; +import org.geotools.api.coverage.grid.GridCoverageWriter; +import org.geotools.api.parameter.GeneralParameterValue; +import org.geotools.api.parameter.ParameterValueGroup; +import org.geotools.api.referencing.crs.CoordinateReferenceSystem; +import org.geotools.api.referencing.datum.PixelInCell; +import org.geotools.coverage.grid.GridCoverage2D; +import org.geotools.coverage.grid.GridEnvelope2D; +import org.geotools.coverage.grid.GridGeometry2D; +import org.geotools.coverage.grid.io.AbstractGridFormat; +import org.geotools.coverage.processing.Operations; +import org.geotools.gce.geotiff.GeoTiffWriteParams; +import org.geotools.gce.geotiff.GeoTiffWriter; +import org.geotools.geometry.jts.ReferencedEnvelope; +import org.geotools.referencing.operation.transform.AffineTransform2D; + +/** + * Creates Cloud Optimized GeoTIFF (COG) files from GeoTools GridCoverage2D rasters. + * + * <p>The COG generation process: + * + * <ol> + * <li>Compute overview decimation factors (power of 2: 2, 4, 8, ...) + * <li>Generate overview images by downsampling + * <li>Write each (full-res + overviews) as a separate tiled GeoTIFF via GeoTools + * <li>Parse each TIFF's IFD structure + * <li>Reassemble into COG byte order using {@link CogAssembler} + * </ol> + * + * <p>Overview decimation algorithm ported from GeoTrellis's {@code + * GeoTiff.defaultOverviewDecimations}. + */ +public class CogWriter { + + /** Default tile size for COG output, matching GDAL's default */ + public static final int DEFAULT_TILE_SIZE = 256; + + /** Minimum image dimension to create an overview for */ + private static final int MIN_OVERVIEW_SIZE = 2; + + /** + * Write a GridCoverage2D as a Cloud Optimized GeoTIFF byte array. + * + * @param raster The input raster + * @param compressionType Compression type: "Deflate", "LZW", "JPEG", "PackBits", or null for + * default (Deflate) + * @param compressionQuality Quality 0.0 (max compression) to 1.0 (no compression), or -1 for + * default + * @param tileSize Tile width and height in pixels + * @return COG file as byte array + * @throws IOException if writing fails + */ + public static byte[] write( + GridCoverage2D raster, String compressionType, double compressionQuality, int tileSize) + throws IOException { + + if (compressionType == null) { + compressionType = "Deflate"; + } + if (compressionQuality < 0) { + compressionQuality = 0.2; + } + + RenderedImage image = raster.getRenderedImage(); + int cols = image.getWidth(); + int rows = image.getHeight(); + + // Step 1: Compute overview decimation factors + List<Integer> decimations = computeOverviewDecimations(cols, rows, tileSize); + + // Step 2: Generate overview coverages + List<GridCoverage2D> overviews = new ArrayList<>(); + for (int decimation : decimations) { + GridCoverage2D overview = generateOverview(raster, decimation); + overviews.add(overview); + } + + // Step 3: Write each as a tiled GeoTIFF byte array + List<byte[]> tiffBytes = new ArrayList<>(); + tiffBytes.add( + writeAsTiledGeoTiff(raster, compressionType, compressionQuality, tileSize, false)); + for (GridCoverage2D overview : overviews) { + tiffBytes.add( + writeAsTiledGeoTiff(overview, compressionType, compressionQuality, tileSize, true)); + } + + // Step 4: Parse each TIFF's IFD structure + List<TiffIfdParser.ParsedTiff> parsedTiffs = new ArrayList<>(); + for (byte[] bytes : tiffBytes) { + parsedTiffs.add(TiffIfdParser.parse(bytes)); + } + + // Step 5: Reassemble into COG byte order + return CogAssembler.assemble(parsedTiffs); + } + + /** + * Write a GridCoverage2D as COG with default settings (Deflate compression, 256x256 tiles). + * + * @param raster The input raster + * @return COG file as byte array + * @throws IOException if writing fails + */ + public static byte[] write(GridCoverage2D raster) throws IOException { + return write(raster, "Deflate", 0.2, DEFAULT_TILE_SIZE); + } + + /** + * Write a GridCoverage2D as COG with specified compression. + * + * @param raster The input raster + * @param compressionType Compression type + * @param compressionQuality Quality 0.0 to 1.0 + * @return COG file as byte array + * @throws IOException if writing fails + */ + public static byte[] write( + GridCoverage2D raster, String compressionType, double compressionQuality) throws IOException { + return write(raster, compressionType, compressionQuality, DEFAULT_TILE_SIZE); + } + + /** + * Compute overview decimation factors. Each level is a power of 2. + * + * <p>Ported from GeoTrellis: {@code GeoTiff.defaultOverviewDecimations()} + * + * @param cols Image width in pixels + * @param rows Image height in pixels + * @param blockSize Tile size for the overview + * @return List of decimation factors [2, 4, 8, ...] or empty if image is too small + */ + static List<Integer> computeOverviewDecimations(int cols, int rows, int blockSize) { + List<Integer> decimations = new ArrayList<>(); + double pixels = Math.max(cols, rows); + double blocks = pixels / blockSize; + int overviewLevels = (int) Math.ceil(Math.log(blocks) / Math.log(2)); + + for (int level = 0; level < overviewLevels; level++) { + int decimation = (int) Math.pow(2, level + 1); + int overviewCols = (int) Math.ceil((double) cols / decimation); + int overviewRows = (int) Math.ceil((double) rows / decimation); + if (overviewCols < MIN_OVERVIEW_SIZE || overviewRows < MIN_OVERVIEW_SIZE) { + break; + } + decimations.add(decimation); + } + return decimations; + } + + /** + * Generate an overview (reduced resolution) coverage by downsampling. + * + * @param raster The full resolution raster + * @param decimationFactor Factor to reduce by (2 = half size, 4 = quarter, etc.) + * @return A new GridCoverage2D at reduced resolution + */ + static GridCoverage2D generateOverview(GridCoverage2D raster, int decimationFactor) { + RenderedImage image = raster.getRenderedImage(); + int newWidth = (int) Math.ceil((double) image.getWidth() / decimationFactor); + int newHeight = (int) Math.ceil((double) image.getHeight() / decimationFactor); + + // Use GeoTools Operations.DEFAULT.resample to downsample + ReferencedEnvelope envelope = raster.getEnvelope2D(); + CoordinateReferenceSystem crs = raster.getCoordinateReferenceSystem2D(); + + AffineTransform2D originalTransform = + (AffineTransform2D) raster.getGridGeometry().getGridToCRS2D(); + double newScaleX = originalTransform.getScaleX() * decimationFactor; + double newScaleY = originalTransform.getScaleY() * decimationFactor; + + AffineTransform2D newTransform = + new AffineTransform2D( + newScaleX, + originalTransform.getShearY(), + originalTransform.getShearX(), + newScaleY, + originalTransform.getTranslateX(), + originalTransform.getTranslateY()); + + GridGeometry2D gridGeometry = + new GridGeometry2D( + new GridEnvelope2D(0, 0, newWidth, newHeight), + PixelInCell.CELL_CORNER, + newTransform, + crs, + null); + + Interpolation interpolation = new InterpolationNearest(); + return (GridCoverage2D) Operations.DEFAULT.resample(raster, null, gridGeometry, interpolation); + } + + /** + * Write a GridCoverage2D as a tiled GeoTIFF byte array using GeoTools. + * + * @param raster The input raster + * @param compressionType Compression type + * @param compressionQuality Quality 0.0 to 1.0 + * @param tileSize Tile dimensions in pixels + * @param isOverview If true, sets NewSubfileType=1 (ReducedImage) — note: GeoTools may not + * support this directly, in which case the CogAssembler handles it during assembly. + * @return Tiled GeoTIFF as byte array + * @throws IOException if writing fails + */ + private static byte[] writeAsTiledGeoTiff( + GridCoverage2D raster, + String compressionType, + double compressionQuality, + int tileSize, + boolean isOverview) + throws IOException { + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + GridCoverageWriter writer = new GeoTiffWriter(out); + + ParameterValueGroup defaultParams = writer.getFormat().getWriteParameters(); + GeoTiffWriteParams params = new GeoTiffWriteParams(); + + // Set tiling — must use the 2-arg overload from GeoToolsWriteParams + // which delegates to the inner write param. The 4-arg ImageWriteParam.setTiling() + // writes to the wrong fields (parent vs inner param). + params.setTilingMode(ImageWriteParam.MODE_EXPLICIT); + params.setTiling(tileSize, tileSize); + + // Set compression + params.setCompressionMode(ImageWriteParam.MODE_EXPLICIT); + params.setCompressionType(compressionType); + params.setCompressionQuality((float) compressionQuality); + + defaultParams + .parameter(AbstractGridFormat.GEOTOOLS_WRITE_PARAMS.getName().toString()) + .setValue(params); + + GeneralParameterValue[] wps = defaultParams.values().toArray(new GeneralParameterValue[0]); + + writer.write(raster, wps); + writer.dispose(); + out.close(); + + return out.toByteArray(); + } +} diff --git a/common/src/main/java/org/apache/sedona/common/raster/cog/TiffIfdParser.java b/common/src/main/java/org/apache/sedona/common/raster/cog/TiffIfdParser.java new file mode 100644 index 0000000000..7c0d31cd1e --- /dev/null +++ b/common/src/main/java/org/apache/sedona/common/raster/cog/TiffIfdParser.java @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.raster.cog; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * Parses the IFD (Image File Directory) structure from a TIFF byte array. This is used to extract + * the structural components needed for COG assembly: the IFD entries, overflow tag data, and image + * data regions. + * + * <p>Reference: TIFF 6.0 Specification, Section 2 (TIFF Structure). + */ +public class TiffIfdParser { + + /** Tag code for TileOffsets (0x0144 = 324) */ + public static final int TAG_TILE_OFFSETS = 324; + + /** Tag code for StripOffsets (0x0111 = 273) */ + public static final int TAG_STRIP_OFFSETS = 273; + + /** Tag code for TileByteCounts (0x0145 = 325) */ + public static final int TAG_TILE_BYTE_COUNTS = 325; + + /** Tag code for StripByteCounts (0x0117 = 279) */ + public static final int TAG_STRIP_BYTE_COUNTS = 279; + + /** Tag code for NewSubfileType (0x00FE = 254) */ + public static final int TAG_NEW_SUBFILE_TYPE = 254; + + /** TIFF field type sizes in bytes */ + private static final int[] FIELD_TYPE_SIZES = { + 0, // 0: unused + 1, // 1: BYTE + 1, // 2: ASCII + 2, // 3: SHORT + 4, // 4: LONG + 8, // 5: RATIONAL + 1, // 6: SBYTE + 1, // 7: UNDEFINED + 2, // 8: SSHORT + 4, // 9: SLONG + 8, // 10: SRATIONAL + 4, // 11: FLOAT + 8 // 12: DOUBLE + }; + + /** + * Result of parsing a TIFF file. Contains the byte order and the parsed IFD data for the first + * IFD only (we write each overview as a separate TIFF, so there's always exactly one IFD). + */ + public static class ParsedTiff { + /** Byte order of the TIFF file */ + public final ByteOrder byteOrder; + + /** Offset where the first IFD starts (always 8 for standard TIFF) */ + public final int ifdOffset; + + /** Number of tag entries in the IFD */ + public final int tagCount; + + /** + * Raw bytes of all IFD tag entries (tagCount * 12 bytes). This includes the 12-byte entries but + * NOT the 2-byte tag count or the 4-byte next-IFD pointer. + */ + public final byte[] ifdEntries; + + /** + * Overflow tag data — values that exceed 4 bytes and are stored outside the IFD entries. This + * is collected in the order the tags reference them. + */ + public final byte[] overflowData; + + /** + * The start offset of the overflow data region in the original TIFF file. Used to rebase + * overflow pointers when reassembling. + */ + public final int overflowDataStart; + + /** The raw image data (all tiles/strips concatenated) */ + public final byte[] imageData; + + /** Byte offsets of each tile/strip within imageData (relative to imageData start) */ + public final int[] segmentOffsets; + + /** Byte counts of each tile/strip */ + public final int[] segmentByteCounts; + + /** The total size of the IFD region: 2 (count) + tagCount*12 + 4 (next pointer) */ + public int getIfdSize() { + return 2 + tagCount * 12 + 4; + } + + /** The total size of IFD + overflow data (everything before image data) */ + public int getIfdAndOverflowSize() { + return getIfdSize() + overflowData.length; + } + + ParsedTiff( + ByteOrder byteOrder, + int ifdOffset, + int tagCount, + byte[] ifdEntries, + byte[] overflowData, + int overflowDataStart, + byte[] imageData, + int[] segmentOffsets, + int[] segmentByteCounts) { + this.byteOrder = byteOrder; + this.ifdOffset = ifdOffset; + this.tagCount = tagCount; + this.ifdEntries = ifdEntries; + this.overflowData = overflowData; + this.overflowDataStart = overflowDataStart; + this.imageData = imageData; + this.segmentOffsets = segmentOffsets; + this.segmentByteCounts = segmentByteCounts; + } + } + + /** + * Parse a standard TIFF byte array and extract its first IFD structure. + * + * @param tiffBytes The complete TIFF file as a byte array + * @return ParsedTiff with all structural components extracted + * @throws IllegalArgumentException if the TIFF header is invalid + */ + public static ParsedTiff parse(byte[] tiffBytes) { + if (tiffBytes.length < 8) { + throw new IllegalArgumentException("TIFF data too short: " + tiffBytes.length + " bytes"); + } + + // Read byte order from first 2 bytes + ByteOrder byteOrder; + if (tiffBytes[0] == 'I' && tiffBytes[1] == 'I') { + byteOrder = ByteOrder.LITTLE_ENDIAN; + } else if (tiffBytes[0] == 'M' && tiffBytes[1] == 'M') { + byteOrder = ByteOrder.BIG_ENDIAN; + } else { + throw new IllegalArgumentException( + "Invalid TIFF byte order marker: " + tiffBytes[0] + ", " + tiffBytes[1]); + } + + ByteBuffer buf = ByteBuffer.wrap(tiffBytes).order(byteOrder); + + // Verify TIFF magic number (42) + int magic = buf.getShort(2) & 0xFFFF; + if (magic != 42) { + throw new IllegalArgumentException("Not a standard TIFF file (magic=" + magic + ")"); + } + + // Read first IFD offset + int ifdOffset = buf.getInt(4); + + // Read number of directory entries + int tagCount = buf.getShort(ifdOffset) & 0xFFFF; + + // Read all IFD entries (12 bytes each) + int entriesStart = ifdOffset + 2; + int entriesLen = tagCount * 12; + byte[] ifdEntries = new byte[entriesLen]; + System.arraycopy(tiffBytes, entriesStart, ifdEntries, 0, entriesLen); + + // Find the offsets tag and bytecounts tag to locate image data + int offsetsTag = -1; + int byteCountsTag = -1; + int segmentCount = 0; + + // Also track the overflow data region + int overflowStart = Integer.MAX_VALUE; + int overflowEnd = 0; + + // First pass: find offset/bytecount tags and overflow region + for (int i = 0; i < tagCount; i++) { + int entryOffset = entriesStart + i * 12; + int tag = buf.getShort(entryOffset) & 0xFFFF; + int fieldType = buf.getShort(entryOffset + 2) & 0xFFFF; + int count = buf.getInt(entryOffset + 4); + int valueSize = count * getFieldTypeSize(fieldType); + + if (tag == TAG_TILE_OFFSETS || tag == TAG_STRIP_OFFSETS) { + offsetsTag = tag; + segmentCount = count; + } else if (tag == TAG_TILE_BYTE_COUNTS || tag == TAG_STRIP_BYTE_COUNTS) { + byteCountsTag = tag; + } + + // Track overflow data region (values > 4 bytes stored outside IFD entries) + if (valueSize > 4) { + int valOffset = buf.getInt(entryOffset + 8); + overflowStart = Math.min(overflowStart, valOffset); + overflowEnd = Math.max(overflowEnd, valOffset + valueSize); + } + } + + if (offsetsTag < 0 || byteCountsTag < 0) { + throw new IllegalArgumentException( + "TIFF missing TileOffsets/StripOffsets or TileByteCounts/StripByteCounts tags"); + } + + // Read segment offsets and byte counts + int[] segmentOffsets = readIntArray(buf, tiffBytes, entriesStart, tagCount, offsetsTag); + int[] segmentByteCounts = readIntArray(buf, tiffBytes, entriesStart, tagCount, byteCountsTag); + + // Extract overflow data + byte[] overflowData; + int overflowDataStart; + if (overflowStart < Integer.MAX_VALUE) { + overflowDataStart = overflowStart; + overflowData = new byte[overflowEnd - overflowStart]; + System.arraycopy(tiffBytes, overflowStart, overflowData, 0, overflowData.length); + } else { + overflowDataStart = 0; + overflowData = new byte[0]; + } + + // Find image data bounds + int imageDataStart = Integer.MAX_VALUE; + int imageDataEnd = 0; + for (int i = 0; i < segmentCount; i++) { + imageDataStart = Math.min(imageDataStart, segmentOffsets[i]); + imageDataEnd = Math.max(imageDataEnd, segmentOffsets[i] + segmentByteCounts[i]); + } + + // Extract image data + byte[] imageData = new byte[imageDataEnd - imageDataStart]; + System.arraycopy(tiffBytes, imageDataStart, imageData, 0, imageData.length); + + // Make segment offsets relative to imageData start + int[] relativeOffsets = new int[segmentCount]; + for (int i = 0; i < segmentCount; i++) { + relativeOffsets[i] = segmentOffsets[i] - imageDataStart; + } + + return new ParsedTiff( + byteOrder, + ifdOffset, + tagCount, + ifdEntries, + overflowData, + overflowDataStart, + imageData, + relativeOffsets, + segmentByteCounts); + } + + /** + * Read an array of int values from an IFD tag entry. Handles both inline (count=1, value in + * entry) and overflow (count>1, pointer in entry) cases. + */ + private static int[] readIntArray( + ByteBuffer buf, byte[] tiffBytes, int entriesStart, int tagCount, int targetTag) { + for (int i = 0; i < tagCount; i++) { + int entryOffset = entriesStart + i * 12; + int tag = buf.getShort(entryOffset) & 0xFFFF; + if (tag != targetTag) continue; + + int fieldType = buf.getShort(entryOffset + 2) & 0xFFFF; + int count = buf.getInt(entryOffset + 4); + + int valueSize = count * getFieldTypeSize(fieldType); + int[] result = new int[count]; + + if (valueSize <= 4) { + // Value stored inline in the entry + if (fieldType == 3) { // SHORT + for (int j = 0; j < count; j++) { + result[j] = buf.getShort(entryOffset + 8 + j * 2) & 0xFFFF; + } + } else { // LONG + result[0] = buf.getInt(entryOffset + 8); + } + } else { + // Value stored at offset + int valOffset = buf.getInt(entryOffset + 8); + if (fieldType == 3) { // SHORT + for (int j = 0; j < count; j++) { + result[j] = buf.getShort(valOffset + j * 2) & 0xFFFF; + } + } else { // LONG + for (int j = 0; j < count; j++) { + result[j] = buf.getInt(valOffset + j * 4); + } + } + } + return result; + } + throw new IllegalArgumentException("Tag " + targetTag + " not found in IFD"); + } + + /** Get the byte size of a TIFF field type. */ + private static int getFieldTypeSize(int fieldType) { + if (fieldType >= 1 && fieldType < FIELD_TYPE_SIZES.length) { + return FIELD_TYPE_SIZES[fieldType]; + } + return 1; // default for unknown types + } +} diff --git a/common/src/test/java/org/apache/sedona/common/raster/cog/CogWriterTest.java b/common/src/test/java/org/apache/sedona/common/raster/cog/CogWriterTest.java new file mode 100644 index 0000000000..45c7be3006 --- /dev/null +++ b/common/src/test/java/org/apache/sedona/common/raster/cog/CogWriterTest.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.raster.cog; + +import static org.junit.Assert.*; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; +import org.apache.sedona.common.raster.MapAlgebra; +import org.apache.sedona.common.raster.RasterConstructors; +import org.apache.sedona.common.raster.RasterOutputs; +import org.geotools.coverage.grid.GridCoverage2D; +import org.junit.Test; + +public class CogWriterTest { + + private static final String resourceFolder = + System.getProperty("user.dir") + "/../spark/common/src/test/resources/"; + + private GridCoverage2D rasterFromGeoTiff(String filePath) throws IOException { + byte[] bytes = Files.readAllBytes(Paths.get(filePath)); + return RasterConstructors.fromGeoTiff(bytes); + } + + @Test + public void testComputeOverviewDecimations() { + // 1000x1000 with blockSize=256: ceil(log2(1000/256)) = ceil(1.97) = 2 levels -> [2, 4] + List<Integer> decimations = CogWriter.computeOverviewDecimations(1000, 1000, 256); + assertEquals(2, decimations.size()); + assertEquals(Integer.valueOf(2), decimations.get(0)); + assertEquals(Integer.valueOf(4), decimations.get(1)); + + // 10000x10000 with blockSize=256: ceil(log2(10000/256)) = ceil(5.29) = 6 levels + decimations = CogWriter.computeOverviewDecimations(10000, 10000, 256); + assertEquals(6, decimations.size()); + assertEquals(Integer.valueOf(2), decimations.get(0)); + assertEquals(Integer.valueOf(4), decimations.get(1)); + assertEquals(Integer.valueOf(8), decimations.get(2)); + assertEquals(Integer.valueOf(16), decimations.get(3)); + assertEquals(Integer.valueOf(32), decimations.get(4)); + assertEquals(Integer.valueOf(64), decimations.get(5)); + + // Very small image: 50x50 with blockSize=256 -> no overviews + decimations = CogWriter.computeOverviewDecimations(50, 50, 256); + assertEquals(0, decimations.size()); + + // Exactly one tile: 256x256 with blockSize=256 -> no overviews + decimations = CogWriter.computeOverviewDecimations(256, 256, 256); + assertEquals(0, decimations.size()); + } + + @Test + public void testGenerateOverview() { + // Create a 100x100 single-band raster + double[] bandValues = new double[100 * 100]; + for (int i = 0; i < bandValues.length; i++) { + bandValues[i] = i % 256; + } + GridCoverage2D raster = + RasterConstructors.makeNonEmptyRaster( + 1, "d", 100, 100, 0, 0, 1, -1, 0, 0, 4326, new double[][] {bandValues}); + + // Downsample by factor of 2 + GridCoverage2D overview = CogWriter.generateOverview(raster, 2); + assertNotNull(overview); + assertEquals(50, overview.getRenderedImage().getWidth()); + assertEquals(50, overview.getRenderedImage().getHeight()); + } + + @Test + public void testWriteSmallRasterAsCog() throws IOException { + // Create a small raster (no overviews expected due to small size) + double[] bandValues = new double[50 * 50]; + for (int i = 0; i < bandValues.length; i++) { + bandValues[i] = i % 256; + } + GridCoverage2D raster = + RasterConstructors.makeNonEmptyRaster( + 1, "d", 50, 50, 0, 0, 1, -1, 0, 0, 4326, new double[][] {bandValues}); + + byte[] cogBytes = RasterOutputs.asCloudOptimizedGeoTiff(raster); + assertNotNull(cogBytes); + assertTrue(cogBytes.length > 0); + + // Verify it's a valid TIFF + assertTrue( + (cogBytes[0] == 'I' && cogBytes[1] == 'I') || (cogBytes[0] == 'M' && cogBytes[1] == 'M')); + + // Verify it can be read back + GridCoverage2D readBack = RasterConstructors.fromGeoTiff(cogBytes); + assertNotNull(readBack); + assertEquals(50, readBack.getRenderedImage().getWidth()); + assertEquals(50, readBack.getRenderedImage().getHeight()); + } + + @Test + public void testWriteMediumRasterAsCog() throws IOException { + // Create a 512x512 raster (should produce overviews with 256 tile size) + double[] bandValues = new double[512 * 512]; + for (int i = 0; i < bandValues.length; i++) { + bandValues[i] = (i * 7) % 256; + } + GridCoverage2D raster = + RasterConstructors.makeNonEmptyRaster( + 1, "d", 512, 512, 0, 0, 1, -1, 0, 0, 4326, new double[][] {bandValues}); + + byte[] cogBytes = RasterOutputs.asCloudOptimizedGeoTiff(raster, "Deflate", 0.5); + assertNotNull(cogBytes); + assertTrue(cogBytes.length > 0); + + // Verify COG structure: IFDs should be at the beginning of the file + ByteOrder byteOrder = (cogBytes[0] == 'I') ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN; + ByteBuffer buf = ByteBuffer.wrap(cogBytes).order(byteOrder); + + // First IFD should be at offset 8 (right after header) + int firstIfdOffset = buf.getInt(4); + assertEquals(8, firstIfdOffset); + + // Read first IFD tag count + int tagCount = buf.getShort(firstIfdOffset) & 0xFFFF; + assertTrue("First IFD should have tags", tagCount > 0); + + // Check that nextIFDOffset points to another IFD (should have at least 1 overview) + int nextIfdPointerPos = firstIfdOffset + 2 + tagCount * 12; + int nextIfdOffset = buf.getInt(nextIfdPointerPos); + // For a 512x512 image with 256 tile size, we expect at least one overview + assertTrue("Should have at least one overview IFD", nextIfdOffset > 0); + // The next IFD should be before any image data (COG requirement) + assertTrue( + "Overview IFD should immediately follow first IFD region", + nextIfdOffset < cogBytes.length / 2); + + // Verify it can be read back by GeoTools + GridCoverage2D readBack = RasterConstructors.fromGeoTiff(cogBytes); + assertNotNull(readBack); + assertEquals(512, readBack.getRenderedImage().getWidth()); + assertEquals(512, readBack.getRenderedImage().getHeight()); + + // Verify pixel values are preserved + double[] originalValues = MapAlgebra.bandAsArray(raster, 1); + double[] readBackValues = MapAlgebra.bandAsArray(readBack, 1); + assertArrayEquals(originalValues, readBackValues, 0.01); + } + + @Test + public void testWriteMultibandRasterAsCog() throws IOException { + // Create a 3-band 256x256 raster + int width = 256; + int height = 256; + int numBands = 3; + double[][] bandData = new double[numBands][width * height]; + for (int b = 0; b < numBands; b++) { + for (int i = 0; i < width * height; i++) { + bandData[b][i] = (i * (b + 1)) % 256; + } + } + + GridCoverage2D raster = + RasterConstructors.makeNonEmptyRaster( + numBands, "b", width, height, 0, 0, 1, -1, 0, 0, 4326, bandData); + + byte[] cogBytes = RasterOutputs.asCloudOptimizedGeoTiff(raster); + assertNotNull(cogBytes); + + // Verify it can be read back + GridCoverage2D readBack = RasterConstructors.fromGeoTiff(cogBytes); + assertNotNull(readBack); + assertEquals(width, readBack.getRenderedImage().getWidth()); + assertEquals(height, readBack.getRenderedImage().getHeight()); + } + + @Test + public void testWriteWithLZWCompression() throws IOException { + double[] bandValues = new double[100 * 100]; + for (int i = 0; i < bandValues.length; i++) { + bandValues[i] = i % 10; // Highly compressible + } + GridCoverage2D raster = + RasterConstructors.makeNonEmptyRaster( + 1, "d", 100, 100, 0, 0, 1, -1, 0, 0, 4326, new double[][] {bandValues}); + + byte[] cogBytes = RasterOutputs.asCloudOptimizedGeoTiff(raster, "LZW", 0.5); + assertNotNull(cogBytes); + assertTrue(cogBytes.length > 0); + + GridCoverage2D readBack = RasterConstructors.fromGeoTiff(cogBytes); + assertNotNull(readBack); + } + + @Test + public void testCogFromExistingGeoTiff() throws IOException { + // Test with a real GeoTIFF file from test resources + GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster/test1.tiff"); + + byte[] cogBytes = RasterOutputs.asCloudOptimizedGeoTiff(raster); + assertNotNull(cogBytes); + assertTrue(cogBytes.length > 0); + + // Verify it can be read back + GridCoverage2D readBack = RasterConstructors.fromGeoTiff(cogBytes); + assertNotNull(readBack); + assertEquals(raster.getRenderedImage().getWidth(), readBack.getRenderedImage().getWidth()); + assertEquals(raster.getRenderedImage().getHeight(), readBack.getRenderedImage().getHeight()); + } + + @Test + public void testTiffIfdParser() throws IOException { + // Write a tiled GeoTIFF and parse it + double[] bandValues = new double[256 * 256]; + for (int i = 0; i < bandValues.length; i++) { + bandValues[i] = i % 256; + } + GridCoverage2D raster = + RasterConstructors.makeNonEmptyRaster( + 1, "d", 256, 256, 0, 0, 1, -1, 0, 0, 4326, new double[][] {bandValues}); + + byte[] tiffBytes = RasterOutputs.asGeoTiff(raster, "Deflate", 0.5); + + TiffIfdParser.ParsedTiff parsed = TiffIfdParser.parse(tiffBytes); + assertNotNull(parsed); + assertTrue(parsed.tagCount > 0); + assertTrue(parsed.imageData.length > 0); + assertTrue(parsed.ifdEntries.length == parsed.tagCount * 12); + } +}
