This is an automated email from the ASF dual-hosted git repository.

bchapuis pushed a commit to branch pmtile-out-of-memory-error
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git

commit b8920f6f56bef7e23191d9ab56a4f16006bf9dea
Author: Bertil Chapuis <[email protected]>
AuthorDate: Wed Dec 6 00:49:08 2023 +0100

    Enable internal compression in pmtiles
---
 .../apache/baremaps/tilestore/pmtiles/PMTiles.java | 164 +++++++++++----------
 .../baremaps/tilestore/pmtiles/PMTilesWriter.java  |  23 ++-
 .../baremaps/tilestore/pmtiles/PMTilesTest.java    |  17 +--
 3 files changed, 109 insertions(+), 95 deletions(-)

diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
 
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
index e8bf2f30..38e8544e 100644
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
@@ -18,10 +18,13 @@
 package org.apache.baremaps.tilestore.pmtiles;
 
 import com.google.common.io.LittleEndianDataInputStream;
-import com.google.common.io.LittleEndianDataOutputStream;
 import com.google.common.math.LongMath;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -31,35 +34,35 @@ public class PMTiles {
     return high * 0x100000000L + low;
   }
 
-  public static long readVarIntRemainder(LittleEndianDataInputStream input, 
long l)
+  public static long readVarIntRemainder(InputStream input, long l)
       throws IOException {
     long h, b;
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h = (b & 0x70) >> 4;
     if (b < 0x80) {
       return toNum(l, h);
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h |= (b & 0x7f) << 3;
     if (b < 0x80) {
       return toNum(l, h);
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h |= (b & 0x7f) << 10;
     if (b < 0x80) {
       return toNum(l, h);
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h |= (b & 0x7f) << 17;
     if (b < 0x80) {
       return toNum(l, h);
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h |= (b & 0x7f) << 24;
     if (b < 0x80) {
       return toNum(l, h);
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     h |= (b & 0x01) << 31;
     if (b < 0x80) {
       return toNum(l, h);
@@ -67,36 +70,36 @@ public class PMTiles {
     throw new RuntimeException("Expected varint not more than 10 bytes");
   }
 
-  public static int writeVarInt(LittleEndianDataOutputStream output, long 
value)
+  public static int writeVarInt(OutputStream output, long value)
       throws IOException {
     int n = 1;
     while (value >= 0x80) {
-      output.writeByte((byte) (value | 0x80));
+      output.write((byte) (value | 0x80));
       value >>>= 7;
       n++;
     }
-    output.writeByte((byte) value);
+    output.write((byte) value);
     return n;
   }
 
-  public static long readVarInt(LittleEndianDataInputStream input) throws 
IOException {
+  public static long readVarInt(InputStream input) throws IOException {
     long val, b;
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     val = b & 0x7f;
     if (b < 0x80) {
       return val;
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     val |= (b & 0x7f) << 7;
     if (b < 0x80) {
       return val;
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     val |= (b & 0x7f) << 14;
     if (b < 0x80) {
       return val;
     }
-    b = input.readByte() & 0xff;
+    b = input.read() & 0xff;
     val |= (b & 0x7f) << 21;
     if (b < 0x80) {
       return val;
@@ -209,44 +212,47 @@ public class PMTiles {
         (double) input.readInt() / 10000000);
   }
 
-  public static void serializeHeader(LittleEndianDataOutputStream output, 
Header header)
-      throws IOException {
-    output.writeByte((byte) 0x50);
-    output.writeByte((byte) 0x4D);
-    output.writeByte((byte) 0x54);
-    output.writeByte((byte) 0x69);
-    output.writeByte((byte) 0x6C);
-    output.writeByte((byte) 0x65);
-    output.writeByte((byte) 0x73);
-    output.writeByte((byte) header.getSpecVersion());
-    output.writeLong(header.getRootDirectoryOffset());
-    output.writeLong(header.getRootDirectoryLength());
-    output.writeLong(header.getJsonMetadataOffset());
-    output.writeLong(header.getJsonMetadataLength());
-    output.writeLong(header.getLeafDirectoryOffset());
-    output.writeLong(header.getLeafDirectoryLength());
-    output.writeLong(header.getTileDataOffset());
-    output.writeLong(header.getTileDataLength());
-    output.writeLong(header.getNumAddressedTiles());
-    output.writeLong(header.getNumTileEntries());
-    output.writeLong(header.getNumTileContents());
-    output.writeByte((byte) (header.isClustered() ? 1 : 0));
-    output.writeByte((byte) header.getInternalCompression().ordinal());
-    output.writeByte((byte) header.getTileCompression().ordinal());
-    output.writeByte((byte) header.getTileType().ordinal());
-    output.writeByte((byte) header.getMinZoom());
-    output.writeByte((byte) header.getMaxZoom());
-    output.writeInt((int) (header.getMinLon() * 10000000));
-    output.writeInt((int) (header.getMinLat() * 10000000));
-    output.writeInt((int) (header.getMaxLon() * 10000000));
-    output.writeInt((int) (header.getMaxLat() * 10000000));
-    output.writeByte((byte) header.getCenterZoom());
-    output.writeInt((int) (header.getCenterLon() * 10000000));
-    output.writeInt((int) (header.getCenterLat() * 10000000));
+  public static byte[] serializeHeader(Header header) {
+    var buffer = 
ByteBuffer.allocate(HEADER_SIZE_BYTES).order(ByteOrder.LITTLE_ENDIAN);
+    buffer.put((byte) 0x50);
+    buffer.put((byte) 0x4D);
+    buffer.put((byte) 0x54);
+    buffer.put((byte) 0x69);
+    buffer.put((byte) 0x6C);
+    buffer.put((byte) 0x65);
+    buffer.put((byte) 0x73);
+    buffer.put((byte) header.getSpecVersion());
+    buffer.putLong(header.getRootDirectoryOffset());
+    buffer.putLong(header.getRootDirectoryLength());
+    buffer.putLong(header.getJsonMetadataOffset());
+    buffer.putLong(header.getJsonMetadataLength());
+    buffer.putLong(header.getLeafDirectoryOffset());
+    buffer.putLong(header.getLeafDirectoryLength());
+    buffer.putLong(header.getTileDataOffset());
+    buffer.putLong(header.getTileDataLength());
+    buffer.putLong(header.getNumAddressedTiles());
+    buffer.putLong(header.getNumTileEntries());
+    buffer.putLong(header.getNumTileContents());
+    buffer.put((byte) (header.isClustered() ? 1 : 0));
+    buffer.put((byte) header.getInternalCompression().ordinal());
+    buffer.put((byte) header.getTileCompression().ordinal());
+    buffer.put((byte) header.getTileType().ordinal());
+    buffer.put((byte) header.getMinZoom());
+    buffer.put((byte) header.getMaxZoom());
+    buffer.putInt((int) (header.getMinLon() * 10000000));
+    buffer.putInt((int) (header.getMinLat() * 10000000));
+    buffer.putInt((int) (header.getMaxLon() * 10000000));
+    buffer.putInt((int) (header.getMaxLat() * 10000000));
+    buffer.put((byte) header.getCenterZoom());
+    buffer.putInt((int) (header.getCenterLon() * 10000000));
+    buffer.putInt((int) (header.getCenterLat() * 10000000));
+    buffer.flip();
+    return buffer.array();
   }
 
-  public static void serializeEntries(LittleEndianDataOutputStream output, 
List<Entry> entries)
+  public static void serializeEntries(OutputStream output, List<Entry> entries)
       throws IOException {
+    var buffer = ByteBuffer.allocate(entries.size() * 48);
     writeVarInt(output, entries.size());
     long lastId = 0;
     for (Entry entry : entries) {
@@ -268,6 +274,8 @@ public class PMTiles {
         writeVarInt(output, entry.getOffset() + 1);
       }
     }
+    buffer.flip();
+    output.write(buffer.array(), 0, buffer.limit());
   }
 
   public static List<Entry> deserializeEntries(LittleEndianDataInputStream 
buffer)
@@ -329,60 +337,62 @@ public class PMTiles {
     return null;
   }
 
-  public static Directories buildRootLeaves(List<Entry> entries, int leafSize) 
throws IOException {
+  public static Directories buildRootLeaves(List<Entry> entries, int leafSize,
+      Compression compression) throws IOException {
     var rootEntries = new ArrayList<Entry>();
     var numLeaves = 0;
     byte[] leavesBytes;
     byte[] rootBytes;
 
-    try (var leavesOutput = new ByteArrayOutputStream();
-        var leavesDataOutput = new LittleEndianDataOutputStream(leavesOutput)) 
{
+    try (var leavesOutput = new ByteArrayOutputStream()) {
       for (var i = 0; i < entries.size(); i += leafSize) {
         numLeaves++;
         var end = i + leafSize;
         if (i + leafSize > entries.size()) {
           end = entries.size();
         }
-
         var offset = leavesOutput.size();
-        serializeEntries(leavesDataOutput, entries.subList(i, end));
-        var length = leavesOutput.size();
-        rootEntries.add(new Entry(entries.get(i).getTileId(), offset, length, 
0));
+        try (var leafOutput = new ByteArrayOutputStream()) {
+          try (var compressedLeafOutput = compression.compress(leafOutput)) {
+            serializeEntries(compressedLeafOutput, entries.subList(i, end));
+          }
+          var length = leafOutput.size();
+          rootEntries.add(new Entry(entries.get(i).getTileId(), offset, 
length, 0));
+          leavesOutput.write(leafOutput.toByteArray());
+        }
       }
-
       leavesBytes = leavesOutput.toByteArray();
     }
 
-    try (var rootOutput = new ByteArrayOutputStream();
-        var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
-      serializeEntries(rootDataOutput, rootEntries);
+    try (var rootOutput = new ByteArrayOutputStream()) {
+      try (var compressedRootOutput = compression.compress(rootOutput)) {
+        serializeEntries(compressedRootOutput, rootEntries);
+      }
       rootBytes = rootOutput.toByteArray();
     }
 
     return new Directories(rootBytes, leavesBytes, numLeaves);
   }
 
-  public static Directories optimizeDirectories(List<Entry> entries, int 
targetRootLenght)
+  public static Directories optimizeDirectories(List<Entry> entries, int 
targetRootLength,
+      Compression compression)
       throws IOException {
     if (entries.size() < 16384) {
-      byte[] rootBytes;
-      try (var rootOutput = new ByteArrayOutputStream();
-          var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
-        serializeEntries(rootDataOutput, entries);
-        rootBytes = rootOutput.toByteArray();
-      }
-      if (rootBytes.length <= targetRootLenght) {
-        return new Directories(rootBytes, new byte[] {}, 0);
+      try (var rootOutput = new ByteArrayOutputStream()) {
+        try (var compressedOutput = compression.compress(rootOutput)) {
+          serializeEntries(compressedOutput, entries);
+        }
+        byte[] rootBytes = rootOutput.toByteArray();
+        if (rootBytes.length <= targetRootLength) {
+          return new Directories(rootBytes, new byte[] {}, 0);
+        }
       }
     }
 
-    double leafSize = (double) entries.size() / 3500;
-    if (leafSize < 4096) {
-      leafSize = 4096;
-    }
+    double leafSize = Math.max((double) entries.size() / 3500, 4096);
     for (;;) {
-      var directories = buildRootLeaves(entries, (int) leafSize);
-      if (directories.getRoot().length <= targetRootLenght) {
+      var directories = buildRootLeaves(entries, (int) leafSize, compression);
+      if (directories.getRoot().length <= targetRootLength) {
         return directories;
       }
       leafSize = leafSize * 1.2;
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
 
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
index f01aaf3e..892a692d 100644
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
@@ -19,7 +19,7 @@ package org.apache.baremaps.tilestore.pmtiles;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.hash.Hashing;
-import com.google.common.io.LittleEndianDataOutputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.nio.file.Files;
@@ -28,6 +28,8 @@ import java.util.*;
 
 public class PMTilesWriter {
 
+  private Compression compression = Compression.Gzip;
+
   private Path path;
 
   private Map<String, Object> metadata = new HashMap<>();
@@ -153,9 +155,16 @@ public class PMTilesWriter {
       entries.sort(Comparator.comparingLong(Entry::getTileId));
     }
 
-    var metadataBytes = new ObjectMapper().writeValueAsBytes(metadata);
+    var directories = PMTiles.optimizeDirectories(entries, 16247, compression);
+
+    byte[] metadataBytes;
+    try (var metadataOutput = new ByteArrayOutputStream()) {
+      try (var compressedMetadataOutput = 
compression.compress(metadataOutput)) {
+        new ObjectMapper().writeValue(compressedMetadataOutput, metadata);
+      }
+      metadataBytes = metadataOutput.toByteArray();
+    }
 
-    var directories = PMTiles.optimizeDirectories(entries, 16247);
     var rootOffset = 127;
     var rootLength = directories.getRoot().length;
     var metadataOffset = rootOffset + rootLength;
@@ -172,8 +181,8 @@ public class PMTilesWriter {
     header.setNumTileContents(numTiles);
     header.setClustered(true);
 
-    header.setInternalCompression(Compression.None);
-    header.setTileCompression(Compression.Gzip);
+    header.setInternalCompression(compression);
+    header.setTileCompression(compression);
     header.setTileType(TileType.mvt);
     header.setRootOffset(rootOffset);
     header.setRootLength(rootLength);
@@ -194,8 +203,8 @@ public class PMTilesWriter {
     header.setCenterLat(centerLat);
     header.setCenterLon(centerLon);
 
-    try (var output = new LittleEndianDataOutputStream(new 
FileOutputStream(path.toFile()))) {
-      PMTiles.serializeHeader(output, header);
+    try (var output = new FileOutputStream(path.toFile())) {
+      output.write(PMTiles.serializeHeader(header));
       output.write(directories.getRoot());
       output.write(metadataBytes);
       output.write(directories.getLeaves());
diff --git 
a/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
 
b/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
index e078d1d5..e06f8d03 100644
--- 
a/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
+++ 
b/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
@@ -20,7 +20,6 @@ package org.apache.baremaps.tilestore.pmtiles;
 import static org.junit.jupiter.api.Assertions.*;
 
 import com.google.common.io.LittleEndianDataInputStream;
-import com.google.common.io.LittleEndianDataOutputStream;
 import com.google.common.math.LongMath;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -59,15 +58,13 @@ class PMTilesTest {
   void encodeVarInt() throws IOException {
     for (long i = 0; i < 1000; i++) {
       var array = new ByteArrayOutputStream();
-      var output = new LittleEndianDataOutputStream(array);
-      PMTiles.writeVarInt(output, i);
+      PMTiles.writeVarInt(array, i);
       var input = new LittleEndianDataInputStream(new 
ByteArrayInputStream(array.toByteArray()));
       assertEquals(i, PMTiles.readVarInt(input));
     }
     for (long i = Long.MAX_VALUE - 1000; i < Long.MAX_VALUE; i++) {
       var array = new ByteArrayOutputStream();
-      var output = new LittleEndianDataOutputStream(array);
-      PMTiles.writeVarInt(output, i);
+      PMTiles.writeVarInt(array, i);
       var input = new LittleEndianDataInputStream(new 
ByteArrayInputStream(array.toByteArray()));
       assertEquals(i, PMTiles.readVarInt(input));
     }
@@ -190,9 +187,7 @@ class PMTilesTest {
         0);
 
     var array = new ByteArrayOutputStream();
-
-    var output = new LittleEndianDataOutputStream(array);
-    PMTiles.serializeHeader(output, header);
+    array.write(PMTiles.serializeHeader(header));
 
     var input = new LittleEndianDataInputStream(new 
ByteArrayInputStream(array.toByteArray()));
     var header2 = PMTiles.deserializeHeader(input);
@@ -259,7 +254,7 @@ class PMTilesTest {
   @Test
   void buildRootLeaves() throws IOException {
     var entries = List.of(new Entry(100, 1, 1, 0));
-    var directories = PMTiles.buildRootLeaves(entries, 1);
+    var directories = PMTiles.buildRootLeaves(entries, 1, Compression.None);
     assertEquals(directories.getNumLeaves(), 1);
 
   }
@@ -269,7 +264,7 @@ class PMTilesTest {
     var random = new Random(3857);
     var entries = new ArrayList<Entry>();
     entries.add(new Entry(0, 0, 100, 1));
-    var directories = PMTiles.optimizeDirectories(entries, 100);
+    var directories = PMTiles.optimizeDirectories(entries, 100, 
Compression.None);
     assertFalse(directories.getLeaves().length > 0);
     assertEquals(0, directories.getNumLeaves());
 
@@ -280,7 +275,7 @@ class PMTilesTest {
       entries.add(new Entry(i, offset, randTileSize, 1));
       offset += randTileSize;
     }
-    directories = PMTiles.optimizeDirectories(entries, 1024);
+    directories = PMTiles.optimizeDirectories(entries, 1024, Compression.None);
     assertFalse(directories.getRoot().length > 1024);
     assertFalse(directories.getNumLeaves() == 0);
     assertFalse(directories.getLeaves().length == 0);

Reply via email to