This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch feature/cog-writer in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 6889c286c2fe994647df14ea73fffd35476cb033 Author: Jia Yu <[email protected]> AuthorDate: Thu Feb 19 00:50:30 2026 -0800 Address COG writer code review findings - Remove unused ReferencedEnvelope variable (Finding 5) - Remove unused isOverview parameter from writeAsTiledGeoTiff (Finding 2) - Add input validation: compressionQuality range, tileSize positive & power-of-2 (Finding 3) - Add parser bounds checks: IFD offset, entries length, overflow/image data ranges (Finding 4) - Inject NewSubfileType tag (254=1) into overview IFDs when missing (Finding 1) - Add 4 new tests: NewSubfileType presence, invalid inputs, malformed TIFF rejection, forward-pointing tile offsets --- .../sedona/common/raster/cog/CogAssembler.java | 88 +++++++++- .../apache/sedona/common/raster/cog/CogWriter.java | 26 +-- .../sedona/common/raster/cog/TiffIfdParser.java | 47 +++++- .../sedona/common/raster/cog/CogWriterTest.java | 177 +++++++++++++++++++++ 4 files changed, 316 insertions(+), 22 deletions(-) diff --git a/common/src/main/java/org/apache/sedona/common/raster/cog/CogAssembler.java b/common/src/main/java/org/apache/sedona/common/raster/cog/CogAssembler.java index 579f9be9d4..f6879ae9ea 100644 --- a/common/src/main/java/org/apache/sedona/common/raster/cog/CogAssembler.java +++ b/common/src/main/java/org/apache/sedona/common/raster/cog/CogAssembler.java @@ -75,11 +75,21 @@ public class CogAssembler { ByteOrder byteOrder = parsedTiffs.get(0).byteOrder; int ifdCount = parsedTiffs.size(); + // Determine which overview IFDs need NewSubfileType injection + boolean[] needsNewSubfileType = new boolean[ifdCount]; + for (int i = 1; i < ifdCount; i++) { + needsNewSubfileType[i] = !parsedTiffs.get(i).hasNewSubfileType; + } + // Phase 1: Compute sizes of all IFD regions (IFD entries + overflow data) + // If we need to inject NewSubfileType, the IFD grows by 12 bytes (one tag entry) int[] ifdRegionSizes = new int[ifdCount]; + int[] effectiveTagCounts = new int[ifdCount]; for (int i = 0; i < ifdCount; i++) { TiffIfdParser.ParsedTiff pt = parsedTiffs.get(i); - ifdRegionSizes[i] = pt.getIfdAndOverflowSize(); + int extraBytes = needsNewSubfileType[i] ? 12 : 0; + effectiveTagCounts[i] = pt.tagCount + (needsNewSubfileType[i] ? 1 : 0); + ifdRegionSizes[i] = pt.getIfdAndOverflowSize() + extraBytes; } // Phase 2: Compute absolute offsets for each IFD and its image data. @@ -121,20 +131,30 @@ public class CogAssembler { // Write each IFD + its overflow data for (int i = 0; i < ifdCount; i++) { TiffIfdParser.ParsedTiff pt = parsedTiffs.get(i); + boolean isOverview = i > 0; int ifdStart = ifdAbsoluteOffsets[i]; int nextIfdOffset = (i + 1 < ifdCount) ? ifdAbsoluteOffsets[i + 1] : 0; + int tagCountForIfd = effectiveTagCounts[i]; // Compute where this IFD's overflow data will be in the output - int overflowStartInOutput = ifdStart + pt.getIfdSize(); + // Account for possible extra 12 bytes from injected tag + int overflowStartInOutput = ifdStart + 2 + tagCountForIfd * 12 + 4; // Patch the IFD entries: // - Rebase overflow pointers from original file offsets to new output offsets // - Rewrite TileOffsets/StripOffsets to point to the new image data location + // - Inject NewSubfileType=1 for overview IFDs if missing byte[] patchedEntries = - patchIfdEntries(pt, overflowStartInOutput, imageDataAbsoluteOffsets[i], i > 0, byteOrder); + patchIfdEntries( + pt, + overflowStartInOutput, + imageDataAbsoluteOffsets[i], + isOverview, + needsNewSubfileType[i], + byteOrder); - // Write: tag count (2 bytes) + entries (tagCount*12) + next IFD offset (4 bytes) - writeShort(dos, byteOrder, pt.tagCount); + // Write: tag count (2 bytes) + entries (tagCountForIfd*12) + next IFD offset (4 bytes) + writeShort(dos, byteOrder, tagCountForIfd); dos.write(patchedEntries); writeInt(dos, byteOrder, nextIfdOffset); @@ -157,7 +177,7 @@ public class CogAssembler { * <ol> * <li>Overflow data pointers (rebase from original file offset to new output offset) * <li>TileOffsets/StripOffsets values (point to new image data location) - * <li>Inject NewSubfileType=1 for overview IFDs (if not already present) + * <li>Set or inject NewSubfileType=1 for overview IFDs * </ol> */ private static byte[] patchIfdEntries( @@ -165,6 +185,7 @@ public class CogAssembler { int newOverflowStart, int newImageDataStart, boolean isOverview, + boolean injectNewSubfileType, ByteOrder byteOrder) { byte[] entries = pt.ifdEntries.clone(); @@ -179,7 +200,7 @@ public class CogAssembler { int count = buf.getInt(offset + 4); int valueSize = count * getFieldTypeSize(fieldType); - // Handle NewSubfileType tag for overviews + // Handle NewSubfileType tag for overviews (when already present) if (tag == TiffIfdParser.TAG_NEW_SUBFILE_TYPE && isOverview) { buf.putInt(offset + 8, REDUCED_IMAGE); continue; @@ -216,9 +237,62 @@ public class CogAssembler { } } + // If we need to inject NewSubfileType, insert a 12-byte entry in sorted tag order + if (injectNewSubfileType) { + return insertNewSubfileTypeTag(entries, pt.tagCount, byteOrder); + } + return entries; } + /** + * Insert a NewSubfileType tag entry (tag 254) into a sorted IFD entry array. The new entry is + * placed at the correct position to maintain tag sort order, and existing entries after it are + * shifted right by 12 bytes. + * + * @param entries The original IFD entries (tagCount * 12 bytes) + * @param tagCount The original number of tags + * @param byteOrder The byte order for writing the new entry + * @return A new byte array that is 12 bytes longer, with the NewSubfileType entry inserted + */ + private static byte[] insertNewSubfileTypeTag(byte[] entries, int tagCount, ByteOrder byteOrder) { + // NewSubfileType = tag 254, very low, typically goes near the start + byte[] newEntries = new byte[entries.length + 12]; + ByteBuffer srcBuf = ByteBuffer.wrap(entries).order(byteOrder); + + // Find insertion point: first tag with code > 254 + int insertIdx = tagCount; // default: append at end + for (int i = 0; i < tagCount; i++) { + int tag = srcBuf.getShort(i * 12) & 0xFFFF; + if (tag > TiffIfdParser.TAG_NEW_SUBFILE_TYPE) { + insertIdx = i; + break; + } + } + + // Copy entries before insertion point + int beforeBytes = insertIdx * 12; + if (beforeBytes > 0) { + System.arraycopy(entries, 0, newEntries, 0, beforeBytes); + } + + // Write the NewSubfileType entry at insertion point + ByteBuffer newBuf = ByteBuffer.wrap(newEntries).order(byteOrder); + int insertOffset = insertIdx * 12; + newBuf.putShort(insertOffset, (short) TiffIfdParser.TAG_NEW_SUBFILE_TYPE); // tag = 254 + newBuf.putShort(insertOffset + 2, (short) 4); // type = LONG + newBuf.putInt(insertOffset + 4, 1); // count = 1 + newBuf.putInt(insertOffset + 8, REDUCED_IMAGE); // value = 1 + + // Copy entries after insertion point + int afterBytes = entries.length - beforeBytes; + if (afterBytes > 0) { + System.arraycopy(entries, beforeBytes, newEntries, beforeBytes + 12, afterBytes); + } + + return newEntries; + } + /** Write a 16-bit value respecting byte order */ private static void writeShort(DataOutputStream dos, ByteOrder order, int value) throws IOException { diff --git a/common/src/main/java/org/apache/sedona/common/raster/cog/CogWriter.java b/common/src/main/java/org/apache/sedona/common/raster/cog/CogWriter.java index 5a0bad8c26..4adadedd43 100644 --- a/common/src/main/java/org/apache/sedona/common/raster/cog/CogWriter.java +++ b/common/src/main/java/org/apache/sedona/common/raster/cog/CogWriter.java @@ -38,7 +38,6 @@ import org.geotools.coverage.grid.io.AbstractGridFormat; import org.geotools.coverage.processing.Operations; import org.geotools.gce.geotiff.GeoTiffWriteParams; import org.geotools.gce.geotiff.GeoTiffWriter; -import org.geotools.geometry.jts.ReferencedEnvelope; import org.geotools.referencing.operation.transform.AffineTransform2D; /** @@ -87,6 +86,16 @@ public class CogWriter { if (compressionQuality < 0) { compressionQuality = 0.2; } + if (compressionQuality > 1.0) { + throw new IllegalArgumentException( + "compressionQuality must be between 0.0 and 1.0, got: " + compressionQuality); + } + if (tileSize <= 0) { + throw new IllegalArgumentException("tileSize must be positive, got: " + tileSize); + } + if ((tileSize & (tileSize - 1)) != 0) { + throw new IllegalArgumentException("tileSize must be a power of 2, got: " + tileSize); + } RenderedImage image = raster.getRenderedImage(); int cols = image.getWidth(); @@ -104,11 +113,9 @@ public class CogWriter { // Step 3: Write each as a tiled GeoTIFF byte array List<byte[]> tiffBytes = new ArrayList<>(); - tiffBytes.add( - writeAsTiledGeoTiff(raster, compressionType, compressionQuality, tileSize, false)); + tiffBytes.add(writeAsTiledGeoTiff(raster, compressionType, compressionQuality, tileSize)); for (GridCoverage2D overview : overviews) { - tiffBytes.add( - writeAsTiledGeoTiff(overview, compressionType, compressionQuality, tileSize, true)); + tiffBytes.add(writeAsTiledGeoTiff(overview, compressionType, compressionQuality, tileSize)); } // Step 4: Parse each TIFF's IFD structure @@ -187,7 +194,6 @@ public class CogWriter { int newHeight = (int) Math.ceil((double) image.getHeight() / decimationFactor); // Use GeoTools Operations.DEFAULT.resample to downsample - ReferencedEnvelope envelope = raster.getEnvelope2D(); CoordinateReferenceSystem crs = raster.getCoordinateReferenceSystem2D(); AffineTransform2D originalTransform = @@ -223,17 +229,11 @@ public class CogWriter { * @param compressionType Compression type * @param compressionQuality Quality 0.0 to 1.0 * @param tileSize Tile dimensions in pixels - * @param isOverview If true, sets NewSubfileType=1 (ReducedImage) — note: GeoTools may not - * support this directly, in which case the CogAssembler handles it during assembly. * @return Tiled GeoTIFF as byte array * @throws IOException if writing fails */ private static byte[] writeAsTiledGeoTiff( - GridCoverage2D raster, - String compressionType, - double compressionQuality, - int tileSize, - boolean isOverview) + GridCoverage2D raster, String compressionType, double compressionQuality, int tileSize) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); diff --git a/common/src/main/java/org/apache/sedona/common/raster/cog/TiffIfdParser.java b/common/src/main/java/org/apache/sedona/common/raster/cog/TiffIfdParser.java index 7c0d31cd1e..60e48e0051 100644 --- a/common/src/main/java/org/apache/sedona/common/raster/cog/TiffIfdParser.java +++ b/common/src/main/java/org/apache/sedona/common/raster/cog/TiffIfdParser.java @@ -103,6 +103,9 @@ public class TiffIfdParser { /** Byte counts of each tile/strip */ public final int[] segmentByteCounts; + /** Whether the IFD contains a NewSubfileType tag */ + public final boolean hasNewSubfileType; + /** The total size of the IFD region: 2 (count) + tagCount*12 + 4 (next pointer) */ public int getIfdSize() { return 2 + tagCount * 12 + 4; @@ -122,7 +125,8 @@ public class TiffIfdParser { int overflowDataStart, byte[] imageData, int[] segmentOffsets, - int[] segmentByteCounts) { + int[] segmentByteCounts, + boolean hasNewSubfileType) { this.byteOrder = byteOrder; this.ifdOffset = ifdOffset; this.tagCount = tagCount; @@ -132,6 +136,7 @@ public class TiffIfdParser { this.imageData = imageData; this.segmentOffsets = segmentOffsets; this.segmentByteCounts = segmentByteCounts; + this.hasNewSubfileType = hasNewSubfileType; } } @@ -168,6 +173,10 @@ public class TiffIfdParser { // Read first IFD offset int ifdOffset = buf.getInt(4); + if (ifdOffset < 8 || ifdOffset >= tiffBytes.length - 2) { + throw new IllegalArgumentException( + "IFD offset out of range: " + ifdOffset + " (file size: " + tiffBytes.length + ")"); + } // Read number of directory entries int tagCount = buf.getShort(ifdOffset) & 0xFFFF; @@ -175,6 +184,15 @@ public class TiffIfdParser { // Read all IFD entries (12 bytes each) int entriesStart = ifdOffset + 2; int entriesLen = tagCount * 12; + if (entriesStart + entriesLen > tiffBytes.length) { + throw new IllegalArgumentException( + "IFD entries extend beyond file: entriesStart=" + + entriesStart + + " entriesLen=" + + entriesLen + + " fileSize=" + + tiffBytes.length); + } byte[] ifdEntries = new byte[entriesLen]; System.arraycopy(tiffBytes, entriesStart, ifdEntries, 0, entriesLen); @@ -182,6 +200,7 @@ public class TiffIfdParser { int offsetsTag = -1; int byteCountsTag = -1; int segmentCount = 0; + boolean hasNewSubfileType = false; // Also track the overflow data region int overflowStart = Integer.MAX_VALUE; @@ -200,11 +219,24 @@ public class TiffIfdParser { segmentCount = count; } else if (tag == TAG_TILE_BYTE_COUNTS || tag == TAG_STRIP_BYTE_COUNTS) { byteCountsTag = tag; + } else if (tag == TAG_NEW_SUBFILE_TYPE) { + hasNewSubfileType = true; } // Track overflow data region (values > 4 bytes stored outside IFD entries) if (valueSize > 4) { int valOffset = buf.getInt(entryOffset + 8); + if (valOffset < 0 || valOffset + valueSize > tiffBytes.length) { + throw new IllegalArgumentException( + "Overflow data for tag " + + tag + + " out of range: offset=" + + valOffset + + " size=" + + valueSize + + " fileSize=" + + tiffBytes.length); + } overflowStart = Math.min(overflowStart, valOffset); overflowEnd = Math.max(overflowEnd, valOffset + valueSize); } @@ -239,6 +271,16 @@ public class TiffIfdParser { imageDataEnd = Math.max(imageDataEnd, segmentOffsets[i] + segmentByteCounts[i]); } + if (imageDataStart < 0 || imageDataEnd > tiffBytes.length) { + throw new IllegalArgumentException( + "Image data region out of range: start=" + + imageDataStart + + " end=" + + imageDataEnd + + " fileSize=" + + tiffBytes.length); + } + // Extract image data byte[] imageData = new byte[imageDataEnd - imageDataStart]; System.arraycopy(tiffBytes, imageDataStart, imageData, 0, imageData.length); @@ -258,7 +300,8 @@ public class TiffIfdParser { overflowDataStart, imageData, relativeOffsets, - segmentByteCounts); + segmentByteCounts, + hasNewSubfileType); } /** diff --git a/common/src/test/java/org/apache/sedona/common/raster/cog/CogWriterTest.java b/common/src/test/java/org/apache/sedona/common/raster/cog/CogWriterTest.java index 45c7be3006..dfbbd07d0e 100644 --- a/common/src/test/java/org/apache/sedona/common/raster/cog/CogWriterTest.java +++ b/common/src/test/java/org/apache/sedona/common/raster/cog/CogWriterTest.java @@ -242,4 +242,181 @@ public class CogWriterTest { assertTrue(parsed.imageData.length > 0); assertTrue(parsed.ifdEntries.length == parsed.tagCount * 12); } + + @Test + public void testOverviewIfdHasNewSubfileType() throws IOException { + // Create a 512x512 raster that will have at least one overview + double[] bandValues = new double[512 * 512]; + for (int i = 0; i < bandValues.length; i++) { + bandValues[i] = (i * 3) % 256; + } + GridCoverage2D raster = + RasterConstructors.makeNonEmptyRaster( + 1, "d", 512, 512, 0, 0, 1, -1, 0, 0, 4326, new double[][] {bandValues}); + + byte[] cogBytes = RasterOutputs.asCloudOptimizedGeoTiff(raster); + ByteOrder byteOrder = (cogBytes[0] == 'I') ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN; + ByteBuffer buf = ByteBuffer.wrap(cogBytes).order(byteOrder); + + // Navigate to second IFD (first overview) + int firstIfdOffset = buf.getInt(4); + int tagCount0 = buf.getShort(firstIfdOffset) & 0xFFFF; + int nextIfdPointerPos = firstIfdOffset + 2 + tagCount0 * 12; + int secondIfdOffset = buf.getInt(nextIfdPointerPos); + assertTrue("Should have at least one overview IFD", secondIfdOffset > 0); + + // Scan second IFD for NewSubfileType tag (254) + int tagCount1 = buf.getShort(secondIfdOffset) & 0xFFFF; + boolean foundNewSubfileType = false; + int newSubfileTypeValue = -1; + for (int i = 0; i < tagCount1; i++) { + int entryOffset = secondIfdOffset + 2 + i * 12; + int tag = buf.getShort(entryOffset) & 0xFFFF; + if (tag == 254) { + foundNewSubfileType = true; + newSubfileTypeValue = buf.getInt(entryOffset + 8); + break; + } + } + assertTrue("Overview IFD must contain NewSubfileType tag (254)", foundNewSubfileType); + assertEquals("NewSubfileType must be 1 (ReducedImage)", 1, newSubfileTypeValue); + } + + @Test + public void testInvalidInputParameters() { + double[] bandValues = new double[50 * 50]; + GridCoverage2D raster = + RasterConstructors.makeNonEmptyRaster( + 1, "d", 50, 50, 0, 0, 1, -1, 0, 0, 4326, new double[][] {bandValues}); + + // compressionQuality > 1 + try { + CogWriter.write(raster, "Deflate", 1.5, 256); + fail("Expected IllegalArgumentException for compressionQuality > 1"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("compressionQuality")); + } catch (IOException e) { + fail("Expected IllegalArgumentException, not IOException"); + } + + // tileSize <= 0 + try { + CogWriter.write(raster, "Deflate", 0.5, 0); + fail("Expected IllegalArgumentException for tileSize <= 0"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("tileSize")); + } catch (IOException e) { + fail("Expected IllegalArgumentException, not IOException"); + } + + // tileSize not power of 2 + try { + CogWriter.write(raster, "Deflate", 0.5, 100); + fail("Expected IllegalArgumentException for non-power-of-2 tileSize"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("power of 2")); + } catch (IOException e) { + fail("Expected IllegalArgumentException, not IOException"); + } + } + + @Test + public void testParserRejectsMalformedTiff() { + // Too short + try { + TiffIfdParser.parse(new byte[] {0, 0, 0}); + fail("Expected IllegalArgumentException for short input"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("too short")); + } + + // Invalid byte order marker + try { + TiffIfdParser.parse(new byte[] {'X', 'X', 0, 42, 0, 0, 0, 8}); + fail("Expected IllegalArgumentException for invalid byte order"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("byte order")); + } + + // Valid header but IFD offset points beyond file + byte[] badOffset = new byte[16]; + badOffset[0] = 'I'; + badOffset[1] = 'I'; + badOffset[2] = 42; + badOffset[3] = 0; + // IFD offset = 9999 (way beyond file) + ByteBuffer b = ByteBuffer.wrap(badOffset).order(ByteOrder.LITTLE_ENDIAN); + b.putInt(4, 9999); + try { + TiffIfdParser.parse(badOffset); + fail("Expected IllegalArgumentException for out-of-range IFD offset"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("out of range")); + } + } + + @Test + public void testCogTileOffsetsAreForwardPointing() throws IOException { + // Create a raster with overviews + double[] bandValues = new double[512 * 512]; + for (int i = 0; i < bandValues.length; i++) { + bandValues[i] = (i * 11) % 256; + } + GridCoverage2D raster = + RasterConstructors.makeNonEmptyRaster( + 1, "d", 512, 512, 0, 0, 1, -1, 0, 0, 4326, new double[][] {bandValues}); + + byte[] cogBytes = RasterOutputs.asCloudOptimizedGeoTiff(raster); + ByteOrder byteOrder = (cogBytes[0] == 'I') ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN; + ByteBuffer buf = ByteBuffer.wrap(cogBytes).order(byteOrder); + + // Walk all IFDs and verify TileOffsets/StripOffsets point within file bounds + int ifdOffset = buf.getInt(4); + int ifdIndex = 0; + int lastIfdEnd = 0; + + while (ifdOffset != 0) { + int tagCount = buf.getShort(ifdOffset) & 0xFFFF; + int ifdEnd = ifdOffset + 2 + tagCount * 12 + 4; + lastIfdEnd = Math.max(lastIfdEnd, ifdEnd); + + for (int i = 0; i < tagCount; i++) { + int entryOffset = ifdOffset + 2 + i * 12; + int tag = buf.getShort(entryOffset) & 0xFFFF; + int fieldType = buf.getShort(entryOffset + 2) & 0xFFFF; + int count = buf.getInt(entryOffset + 4); + + // Check TileOffsets (324) or StripOffsets (273) + if (tag == 324 || tag == 273) { + if (count == 1) { + int offset = buf.getInt(entryOffset + 8); + assertTrue( + "IFD " + ifdIndex + ": TileOffset " + offset + " must be within file", + offset >= 0 && offset < cogBytes.length); + } else { + // Offsets stored in overflow area + int arrayOffset = buf.getInt(entryOffset + 8); + for (int j = 0; j < count; j++) { + int tileOffset = buf.getInt(arrayOffset + j * 4); + assertTrue( + "IFD " + ifdIndex + " tile " + j + ": offset " + tileOffset + " out of range", + tileOffset >= 0 && tileOffset < cogBytes.length); + } + } + } + } + + // Read next IFD offset + int nextIfdPointerPos = ifdOffset + 2 + tagCount * 12; + ifdOffset = buf.getInt(nextIfdPointerPos); + ifdIndex++; + } + + // Verify we found at least 2 IFDs (full-res + overview) + assertTrue("Expected at least 2 IFDs, found " + ifdIndex, ifdIndex >= 2); + + // Verify all IFDs are before image data (forward-pointing) + // The last IFD end should be well before the end of the file + assertTrue("IFD region should be at start of file", lastIfdEnd < cogBytes.length / 2); + } }
