This is an automated email from the ASF dual-hosted git repository.

bchapuis pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git


The following commit(s) were added to refs/heads/main by this push:
     new b56b3257 Enable internal compression in pmtiles (#811)
b56b3257 is described below

commit b56b3257a5d317fa7258007c17954e5bc25d3812
Author: Bertil Chapuis <[email protected]>
AuthorDate: Sat Dec 9 12:54:20 2023 +0100

    Enable internal compression in pmtiles (#811)
    
    * Enable internal compression in pmtiles
    
    * Use buffer instead of little endian stream when deserializing headers
---
 .../apache/baremaps/tilestore/pmtiles/PMTiles.java | 227 +++++++++++----------
 .../baremaps/tilestore/pmtiles/PMTilesWriter.java  |  23 ++-
 .../baremaps/tilestore/pmtiles/PMTilesTest.java    |  17 +-
 3 files changed, 143 insertions(+), 124 deletions(-)

diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
 
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
index e8bf2f30..f44a9138 100644
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
@@ -17,11 +17,13 @@
 
 package org.apache.baremaps.tilestore.pmtiles;
 
-import com.google.common.io.LittleEndianDataInputStream;
-import com.google.common.io.LittleEndianDataOutputStream;
 import com.google.common.math.LongMath;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -31,35 +33,35 @@ public class PMTiles {
     return high * 0x100000000L + low;
   }
 
-  public static long readVarIntRemainder(LittleEndianDataInputStream input, 
long l)
+  public static long readVarIntRemainder(InputStream input, long l)
       throws IOException {
     long h, b;
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h = (b & 0x70) >> 4;
     if (b < 0x80) {
       return toNum(l, h);
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h |= (b & 0x7f) << 3;
     if (b < 0x80) {
       return toNum(l, h);
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h |= (b & 0x7f) << 10;
     if (b < 0x80) {
       return toNum(l, h);
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h |= (b & 0x7f) << 17;
     if (b < 0x80) {
       return toNum(l, h);
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h |= (b & 0x7f) << 24;
     if (b < 0x80) {
       return toNum(l, h);
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h |= (b & 0x01) << 31;
     if (b < 0x80) {
       return toNum(l, h);
@@ -67,36 +69,36 @@ public class PMTiles {
     throw new RuntimeException("Expected varint not more than 10 bytes");
   }
 
-  public static int writeVarInt(LittleEndianDataOutputStream output, long 
value)
+  public static int writeVarInt(OutputStream output, long value)
       throws IOException {
     int n = 1;
     while (value >= 0x80) {
-      output.writeByte((byte) (value | 0x80));
+      output.write((byte) (value | 0x80));
       value >>>= 7;
       n++;
     }
-    output.writeByte((byte) value);
+    output.write((byte) value);
     return n;
   }
 
-  public static long readVarInt(LittleEndianDataInputStream input) throws 
IOException {
+  public static long readVarInt(InputStream input) throws IOException {
     long val, b;
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     val = b & 0x7f;
     if (b < 0x80) {
       return val;
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     val |= (b & 0x7f) << 7;
     if (b < 0x80) {
       return val;
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     val |= (b & 0x7f) << 14;
     if (b < 0x80) {
       return val;
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     val |= (b & 0x7f) << 21;
     if (b < 0x80) {
       return val;
@@ -179,74 +181,83 @@ public class PMTiles {
 
   private static final int HEADER_SIZE_BYTES = 127;
 
-  public static Header deserializeHeader(LittleEndianDataInputStream input) 
throws IOException {
-    input.skipBytes(7);
+  public static Header deserializeHeader(InputStream input) throws IOException 
{
+    byte[] bytes = new byte[HEADER_SIZE_BYTES];
+    var num = input.read(bytes);
+    if (num != HEADER_SIZE_BYTES) {
+      throw new IOException("Invalid header size");
+    }
+    var buffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN);
+    buffer.position(7);
     return new Header(
-        input.readByte(),
-        input.readLong(),
-        input.readLong(),
-        input.readLong(),
-        input.readLong(),
-        input.readLong(),
-        input.readLong(),
-        input.readLong(),
-        input.readLong(),
-        input.readLong(),
-        input.readLong(),
-        input.readLong(),
-        input.readByte() == 1,
-        Compression.values()[input.readByte()],
-        Compression.values()[input.readByte()],
-        TileType.values()[input.readByte()],
-        input.readByte(),
-        input.readByte(),
-        (double) input.readInt() / 10000000,
-        (double) input.readInt() / 10000000,
-        (double) input.readInt() / 10000000,
-        (double) input.readInt() / 10000000,
-        input.readByte(),
-        (double) input.readInt() / 10000000,
-        (double) input.readInt() / 10000000);
+        buffer.get(),
+        buffer.getLong(),
+        buffer.getLong(),
+        buffer.getLong(),
+        buffer.getLong(),
+        buffer.getLong(),
+        buffer.getLong(),
+        buffer.getLong(),
+        buffer.getLong(),
+        buffer.getLong(),
+        buffer.getLong(),
+        buffer.getLong(),
+        buffer.get() == 1,
+        Compression.values()[buffer.get()],
+        Compression.values()[buffer.get()],
+        TileType.values()[buffer.get()],
+        buffer.get(),
+        buffer.get(),
+        (double) buffer.getInt() / 10000000,
+        (double) buffer.getInt() / 10000000,
+        (double) buffer.getInt() / 10000000,
+        (double) buffer.getInt() / 10000000,
+        buffer.get(),
+        (double) buffer.getInt() / 10000000,
+        (double) buffer.getInt() / 10000000);
   }
 
-  public static void serializeHeader(LittleEndianDataOutputStream output, 
Header header)
-      throws IOException {
-    output.writeByte((byte) 0x50);
-    output.writeByte((byte) 0x4D);
-    output.writeByte((byte) 0x54);
-    output.writeByte((byte) 0x69);
-    output.writeByte((byte) 0x6C);
-    output.writeByte((byte) 0x65);
-    output.writeByte((byte) 0x73);
-    output.writeByte((byte) header.getSpecVersion());
-    output.writeLong(header.getRootDirectoryOffset());
-    output.writeLong(header.getRootDirectoryLength());
-    output.writeLong(header.getJsonMetadataOffset());
-    output.writeLong(header.getJsonMetadataLength());
-    output.writeLong(header.getLeafDirectoryOffset());
-    output.writeLong(header.getLeafDirectoryLength());
-    output.writeLong(header.getTileDataOffset());
-    output.writeLong(header.getTileDataLength());
-    output.writeLong(header.getNumAddressedTiles());
-    output.writeLong(header.getNumTileEntries());
-    output.writeLong(header.getNumTileContents());
-    output.writeByte((byte) (header.isClustered() ? 1 : 0));
-    output.writeByte((byte) header.getInternalCompression().ordinal());
-    output.writeByte((byte) header.getTileCompression().ordinal());
-    output.writeByte((byte) header.getTileType().ordinal());
-    output.writeByte((byte) header.getMinZoom());
-    output.writeByte((byte) header.getMaxZoom());
-    output.writeInt((int) (header.getMinLon() * 10000000));
-    output.writeInt((int) (header.getMinLat() * 10000000));
-    output.writeInt((int) (header.getMaxLon() * 10000000));
-    output.writeInt((int) (header.getMaxLat() * 10000000));
-    output.writeByte((byte) header.getCenterZoom());
-    output.writeInt((int) (header.getCenterLon() * 10000000));
-    output.writeInt((int) (header.getCenterLat() * 10000000));
+  public static byte[] serializeHeader(Header header) {
+    var buffer = 
ByteBuffer.allocate(HEADER_SIZE_BYTES).order(ByteOrder.LITTLE_ENDIAN);
+    buffer.put((byte) 0x50);
+    buffer.put((byte) 0x4D);
+    buffer.put((byte) 0x54);
+    buffer.put((byte) 0x69);
+    buffer.put((byte) 0x6C);
+    buffer.put((byte) 0x65);
+    buffer.put((byte) 0x73);
+    buffer.put((byte) header.getSpecVersion());
+    buffer.putLong(header.getRootDirectoryOffset());
+    buffer.putLong(header.getRootDirectoryLength());
+    buffer.putLong(header.getJsonMetadataOffset());
+    buffer.putLong(header.getJsonMetadataLength());
+    buffer.putLong(header.getLeafDirectoryOffset());
+    buffer.putLong(header.getLeafDirectoryLength());
+    buffer.putLong(header.getTileDataOffset());
+    buffer.putLong(header.getTileDataLength());
+    buffer.putLong(header.getNumAddressedTiles());
+    buffer.putLong(header.getNumTileEntries());
+    buffer.putLong(header.getNumTileContents());
+    buffer.put((byte) (header.isClustered() ? 1 : 0));
+    buffer.put((byte) header.getInternalCompression().ordinal());
+    buffer.put((byte) header.getTileCompression().ordinal());
+    buffer.put((byte) header.getTileType().ordinal());
+    buffer.put((byte) header.getMinZoom());
+    buffer.put((byte) header.getMaxZoom());
+    buffer.putInt((int) (header.getMinLon() * 10000000));
+    buffer.putInt((int) (header.getMinLat() * 10000000));
+    buffer.putInt((int) (header.getMaxLon() * 10000000));
+    buffer.putInt((int) (header.getMaxLat() * 10000000));
+    buffer.put((byte) header.getCenterZoom());
+    buffer.putInt((int) (header.getCenterLon() * 10000000));
+    buffer.putInt((int) (header.getCenterLat() * 10000000));
+    buffer.flip();
+    return buffer.array();
   }
 
-  public static void serializeEntries(LittleEndianDataOutputStream output, 
List<Entry> entries)
+  public static void serializeEntries(OutputStream output, List<Entry> entries)
       throws IOException {
+    var buffer = ByteBuffer.allocate(entries.size() * 48);
     writeVarInt(output, entries.size());
     long lastId = 0;
     for (Entry entry : entries) {
@@ -268,9 +279,11 @@ public class PMTiles {
         writeVarInt(output, entry.getOffset() + 1);
       }
     }
+    buffer.flip();
+    output.write(buffer.array(), 0, buffer.limit());
   }
 
-  public static List<Entry> deserializeEntries(LittleEndianDataInputStream 
buffer)
+  public static List<Entry> deserializeEntries(InputStream buffer)
       throws IOException {
     long numEntries = readVarInt(buffer);
     List<Entry> entries = new ArrayList<>((int) numEntries);
@@ -329,60 +342,62 @@ public class PMTiles {
     return null;
   }
 
-  public static Directories buildRootLeaves(List<Entry> entries, int leafSize) 
throws IOException {
+  public static Directories buildRootLeaves(List<Entry> entries, int leafSize,
+      Compression compression) throws IOException {
     var rootEntries = new ArrayList<Entry>();
     var numLeaves = 0;
     byte[] leavesBytes;
     byte[] rootBytes;
 
-    try (var leavesOutput = new ByteArrayOutputStream();
-        var leavesDataOutput = new LittleEndianDataOutputStream(leavesOutput)) 
{
+    try (var leavesOutput = new ByteArrayOutputStream()) {
       for (var i = 0; i < entries.size(); i += leafSize) {
         numLeaves++;
         var end = i + leafSize;
         if (i + leafSize > entries.size()) {
           end = entries.size();
         }
-
         var offset = leavesOutput.size();
-        serializeEntries(leavesDataOutput, entries.subList(i, end));
-        var length = leavesOutput.size();
-        rootEntries.add(new Entry(entries.get(i).getTileId(), offset, length, 
0));
+        try (var leafOutput = new ByteArrayOutputStream()) {
+          try (var compressedLeafOutput = compression.compress(leafOutput)) {
+            serializeEntries(compressedLeafOutput, entries.subList(i, end));
+          }
+          var length = leafOutput.size();
+          rootEntries.add(new Entry(entries.get(i).getTileId(), offset, 
length, 0));
+          leavesOutput.write(leafOutput.toByteArray());
+        }
       }
-
       leavesBytes = leavesOutput.toByteArray();
     }
 
-    try (var rootOutput = new ByteArrayOutputStream();
-        var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
-      serializeEntries(rootDataOutput, rootEntries);
+    try (var rootOutput = new ByteArrayOutputStream()) {
+      try (var compressedRootOutput = compression.compress(rootOutput)) {
+        serializeEntries(compressedRootOutput, rootEntries);
+      }
       rootBytes = rootOutput.toByteArray();
     }
 
     return new Directories(rootBytes, leavesBytes, numLeaves);
   }
 
-  public static Directories optimizeDirectories(List<Entry> entries, int 
targetRootLenght)
+  public static Directories optimizeDirectories(List<Entry> entries, int 
targetRootLength,
+      Compression compression)
       throws IOException {
     if (entries.size() < 16384) {
-      byte[] rootBytes;
-      try (var rootOutput = new ByteArrayOutputStream();
-          var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
-        serializeEntries(rootDataOutput, entries);
-        rootBytes = rootOutput.toByteArray();
-      }
-      if (rootBytes.length <= targetRootLenght) {
-        return new Directories(rootBytes, new byte[] {}, 0);
+      try (var rootOutput = new ByteArrayOutputStream()) {
+        try (var compressedOutput = compression.compress(rootOutput)) {
+          serializeEntries(compressedOutput, entries);
+        }
+        byte[] rootBytes = rootOutput.toByteArray();
+        if (rootBytes.length <= targetRootLength) {
+          return new Directories(rootBytes, new byte[] {}, 0);
+        }
       }
     }
 
-    double leafSize = (double) entries.size() / 3500;
-    if (leafSize < 4096) {
-      leafSize = 4096;
-    }
+    double leafSize = Math.max((double) entries.size() / 3500, 4096);
     for (;;) {
-      var directories = buildRootLeaves(entries, (int) leafSize);
-      if (directories.getRoot().length <= targetRootLenght) {
+      var directories = buildRootLeaves(entries, (int) leafSize, compression);
+      if (directories.getRoot().length <= targetRootLength) {
         return directories;
       }
       leafSize = leafSize * 1.2;
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
 
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
index f01aaf3e..892a692d 100644
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
@@ -19,7 +19,7 @@ package org.apache.baremaps.tilestore.pmtiles;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.hash.Hashing;
-import com.google.common.io.LittleEndianDataOutputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.nio.file.Files;
@@ -28,6 +28,8 @@ import java.util.*;
 
 public class PMTilesWriter {
 
+  private Compression compression = Compression.Gzip;
+
   private Path path;
 
   private Map<String, Object> metadata = new HashMap<>();
@@ -153,9 +155,16 @@ public class PMTilesWriter {
       entries.sort(Comparator.comparingLong(Entry::getTileId));
     }
 
-    var metadataBytes = new ObjectMapper().writeValueAsBytes(metadata);
+    var directories = PMTiles.optimizeDirectories(entries, 16247, compression);
+
+    byte[] metadataBytes;
+    try (var metadataOutput = new ByteArrayOutputStream()) {
+      try (var compressedMetadataOutput = 
compression.compress(metadataOutput)) {
+        new ObjectMapper().writeValue(compressedMetadataOutput, metadata);
+      }
+      metadataBytes = metadataOutput.toByteArray();
+    }
 
-    var directories = PMTiles.optimizeDirectories(entries, 16247);
     var rootOffset = 127;
     var rootLength = directories.getRoot().length;
     var metadataOffset = rootOffset + rootLength;
@@ -172,8 +181,8 @@ public class PMTilesWriter {
     header.setNumTileContents(numTiles);
     header.setClustered(true);
 
-    header.setInternalCompression(Compression.None);
-    header.setTileCompression(Compression.Gzip);
+    header.setInternalCompression(compression);
+    header.setTileCompression(compression);
     header.setTileType(TileType.mvt);
     header.setRootOffset(rootOffset);
     header.setRootLength(rootLength);
@@ -194,8 +203,8 @@ public class PMTilesWriter {
     header.setCenterLat(centerLat);
     header.setCenterLon(centerLon);
 
-    try (var output = new LittleEndianDataOutputStream(new 
FileOutputStream(path.toFile()))) {
-      PMTiles.serializeHeader(output, header);
+    try (var output = new FileOutputStream(path.toFile())) {
+      output.write(PMTiles.serializeHeader(header));
       output.write(directories.getRoot());
       output.write(metadataBytes);
       output.write(directories.getLeaves());
diff --git 
a/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
 
b/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
index e078d1d5..e06f8d03 100644
--- 
a/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
+++ 
b/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
@@ -20,7 +20,6 @@ package org.apache.baremaps.tilestore.pmtiles;
 import static org.junit.jupiter.api.Assertions.*;
 
 import com.google.common.io.LittleEndianDataInputStream;
-import com.google.common.io.LittleEndianDataOutputStream;
 import com.google.common.math.LongMath;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -59,15 +58,13 @@ class PMTilesTest {
   void encodeVarInt() throws IOException {
     for (long i = 0; i < 1000; i++) {
       var array = new ByteArrayOutputStream();
-      var output = new LittleEndianDataOutputStream(array);
-      PMTiles.writeVarInt(output, i);
+      PMTiles.writeVarInt(array, i);
       var input = new LittleEndianDataInputStream(new 
ByteArrayInputStream(array.toByteArray()));
       assertEquals(i, PMTiles.readVarInt(input));
     }
     for (long i = Long.MAX_VALUE - 1000; i < Long.MAX_VALUE; i++) {
       var array = new ByteArrayOutputStream();
-      var output = new LittleEndianDataOutputStream(array);
-      PMTiles.writeVarInt(output, i);
+      PMTiles.writeVarInt(array, i);
       var input = new LittleEndianDataInputStream(new 
ByteArrayInputStream(array.toByteArray()));
       assertEquals(i, PMTiles.readVarInt(input));
     }
@@ -190,9 +187,7 @@ class PMTilesTest {
         0);
 
     var array = new ByteArrayOutputStream();
-
-    var output = new LittleEndianDataOutputStream(array);
-    PMTiles.serializeHeader(output, header);
+    array.write(PMTiles.serializeHeader(header));
 
     var input = new LittleEndianDataInputStream(new 
ByteArrayInputStream(array.toByteArray()));
     var header2 = PMTiles.deserializeHeader(input);
@@ -259,7 +254,7 @@ class PMTilesTest {
   @Test
   void buildRootLeaves() throws IOException {
     var entries = List.of(new Entry(100, 1, 1, 0));
-    var directories = PMTiles.buildRootLeaves(entries, 1);
+    var directories = PMTiles.buildRootLeaves(entries, 1, Compression.None);
     assertEquals(directories.getNumLeaves(), 1);
 
   }
@@ -269,7 +264,7 @@ class PMTilesTest {
     var random = new Random(3857);
     var entries = new ArrayList<Entry>();
     entries.add(new Entry(0, 0, 100, 1));
-    var directories = PMTiles.optimizeDirectories(entries, 100);
+    var directories = PMTiles.optimizeDirectories(entries, 100, 
Compression.None);
     assertFalse(directories.getLeaves().length > 0);
     assertEquals(0, directories.getNumLeaves());
 
@@ -280,7 +275,7 @@ class PMTilesTest {
       entries.add(new Entry(i, offset, randTileSize, 1));
       offset += randTileSize;
     }
-    directories = PMTiles.optimizeDirectories(entries, 1024);
+    directories = PMTiles.optimizeDirectories(entries, 1024, Compression.None);
     assertFalse(directories.getRoot().length > 1024);
     assertFalse(directories.getNumLeaves() == 0);
     assertFalse(directories.getLeaves().length == 0);

Reply via email to