This is an automated email from the ASF dual-hosted git repository.
bchapuis pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git
The following commit(s) were added to refs/heads/main by this push:
new b56b3257 Enable internal compression in pmtiles (#811)
b56b3257 is described below
commit b56b3257a5d317fa7258007c17954e5bc25d3812
Author: Bertil Chapuis <[email protected]>
AuthorDate: Sat Dec 9 12:54:20 2023 +0100
Enable internal compression in pmtiles (#811)
* Enable internal compression in pmtiles
* Use buffer instead of little endian stream when deserializing headers
---
.../apache/baremaps/tilestore/pmtiles/PMTiles.java | 227 +++++++++++----------
.../baremaps/tilestore/pmtiles/PMTilesWriter.java | 23 ++-
.../baremaps/tilestore/pmtiles/PMTilesTest.java | 17 +-
3 files changed, 143 insertions(+), 124 deletions(-)
diff --git
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
index e8bf2f30..f44a9138 100644
---
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
+++
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java
@@ -17,11 +17,13 @@
package org.apache.baremaps.tilestore.pmtiles;
-import com.google.common.io.LittleEndianDataInputStream;
-import com.google.common.io.LittleEndianDataOutputStream;
import com.google.common.math.LongMath;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.List;
@@ -31,35 +33,35 @@ public class PMTiles {
return high * 0x100000000L + low;
}
- public static long readVarIntRemainder(LittleEndianDataInputStream input,
long l)
+ public static long readVarIntRemainder(InputStream input, long l)
throws IOException {
long h, b;
- b = input.readByte() & 0xff;
+ b = input.read() & 0xff;
h = (b & 0x70) >> 4;
if (b < 0x80) {
return toNum(l, h);
}
- b = input.readByte() & 0xff;
+ b = input.read() & 0xff;
h |= (b & 0x7f) << 3;
if (b < 0x80) {
return toNum(l, h);
}
- b = input.readByte() & 0xff;
+ b = input.read() & 0xff;
h |= (b & 0x7f) << 10;
if (b < 0x80) {
return toNum(l, h);
}
- b = input.readByte() & 0xff;
+ b = input.read() & 0xff;
h |= (b & 0x7f) << 17;
if (b < 0x80) {
return toNum(l, h);
}
- b = input.readByte() & 0xff;
+ b = input.read() & 0xff;
h |= (b & 0x7f) << 24;
if (b < 0x80) {
return toNum(l, h);
}
- b = input.readByte() & 0xff;
+ b = input.read() & 0xff;
h |= (b & 0x01) << 31;
if (b < 0x80) {
return toNum(l, h);
@@ -67,36 +69,36 @@ public class PMTiles {
throw new RuntimeException("Expected varint not more than 10 bytes");
}
- public static int writeVarInt(LittleEndianDataOutputStream output, long
value)
+ public static int writeVarInt(OutputStream output, long value)
throws IOException {
int n = 1;
while (value >= 0x80) {
- output.writeByte((byte) (value | 0x80));
+ output.write((byte) (value | 0x80));
value >>>= 7;
n++;
}
- output.writeByte((byte) value);
+ output.write((byte) value);
return n;
}
- public static long readVarInt(LittleEndianDataInputStream input) throws
IOException {
+ public static long readVarInt(InputStream input) throws IOException {
long val, b;
- b = input.readByte() & 0xff;
+ b = input.read() & 0xff;
val = b & 0x7f;
if (b < 0x80) {
return val;
}
- b = input.readByte() & 0xff;
+ b = input.read() & 0xff;
val |= (b & 0x7f) << 7;
if (b < 0x80) {
return val;
}
- b = input.readByte() & 0xff;
+ b = input.read() & 0xff;
val |= (b & 0x7f) << 14;
if (b < 0x80) {
return val;
}
- b = input.readByte() & 0xff;
+ b = input.read() & 0xff;
val |= (b & 0x7f) << 21;
if (b < 0x80) {
return val;
@@ -179,74 +181,83 @@ public class PMTiles {
private static final int HEADER_SIZE_BYTES = 127;
- public static Header deserializeHeader(LittleEndianDataInputStream input)
throws IOException {
- input.skipBytes(7);
+ public static Header deserializeHeader(InputStream input) throws IOException
{
+ byte[] bytes = new byte[HEADER_SIZE_BYTES];
+ var num = input.read(bytes);
+ if (num != HEADER_SIZE_BYTES) {
+ throw new IOException("Invalid header size");
+ }
+ var buffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN);
+ buffer.position(7);
return new Header(
- input.readByte(),
- input.readLong(),
- input.readLong(),
- input.readLong(),
- input.readLong(),
- input.readLong(),
- input.readLong(),
- input.readLong(),
- input.readLong(),
- input.readLong(),
- input.readLong(),
- input.readLong(),
- input.readByte() == 1,
- Compression.values()[input.readByte()],
- Compression.values()[input.readByte()],
- TileType.values()[input.readByte()],
- input.readByte(),
- input.readByte(),
- (double) input.readInt() / 10000000,
- (double) input.readInt() / 10000000,
- (double) input.readInt() / 10000000,
- (double) input.readInt() / 10000000,
- input.readByte(),
- (double) input.readInt() / 10000000,
- (double) input.readInt() / 10000000);
+ buffer.get(),
+ buffer.getLong(),
+ buffer.getLong(),
+ buffer.getLong(),
+ buffer.getLong(),
+ buffer.getLong(),
+ buffer.getLong(),
+ buffer.getLong(),
+ buffer.getLong(),
+ buffer.getLong(),
+ buffer.getLong(),
+ buffer.getLong(),
+ buffer.get() == 1,
+ Compression.values()[buffer.get()],
+ Compression.values()[buffer.get()],
+ TileType.values()[buffer.get()],
+ buffer.get(),
+ buffer.get(),
+ (double) buffer.getInt() / 10000000,
+ (double) buffer.getInt() / 10000000,
+ (double) buffer.getInt() / 10000000,
+ (double) buffer.getInt() / 10000000,
+ buffer.get(),
+ (double) buffer.getInt() / 10000000,
+ (double) buffer.getInt() / 10000000);
}
- public static void serializeHeader(LittleEndianDataOutputStream output,
Header header)
- throws IOException {
- output.writeByte((byte) 0x50);
- output.writeByte((byte) 0x4D);
- output.writeByte((byte) 0x54);
- output.writeByte((byte) 0x69);
- output.writeByte((byte) 0x6C);
- output.writeByte((byte) 0x65);
- output.writeByte((byte) 0x73);
- output.writeByte((byte) header.getSpecVersion());
- output.writeLong(header.getRootDirectoryOffset());
- output.writeLong(header.getRootDirectoryLength());
- output.writeLong(header.getJsonMetadataOffset());
- output.writeLong(header.getJsonMetadataLength());
- output.writeLong(header.getLeafDirectoryOffset());
- output.writeLong(header.getLeafDirectoryLength());
- output.writeLong(header.getTileDataOffset());
- output.writeLong(header.getTileDataLength());
- output.writeLong(header.getNumAddressedTiles());
- output.writeLong(header.getNumTileEntries());
- output.writeLong(header.getNumTileContents());
- output.writeByte((byte) (header.isClustered() ? 1 : 0));
- output.writeByte((byte) header.getInternalCompression().ordinal());
- output.writeByte((byte) header.getTileCompression().ordinal());
- output.writeByte((byte) header.getTileType().ordinal());
- output.writeByte((byte) header.getMinZoom());
- output.writeByte((byte) header.getMaxZoom());
- output.writeInt((int) (header.getMinLon() * 10000000));
- output.writeInt((int) (header.getMinLat() * 10000000));
- output.writeInt((int) (header.getMaxLon() * 10000000));
- output.writeInt((int) (header.getMaxLat() * 10000000));
- output.writeByte((byte) header.getCenterZoom());
- output.writeInt((int) (header.getCenterLon() * 10000000));
- output.writeInt((int) (header.getCenterLat() * 10000000));
+ public static byte[] serializeHeader(Header header) {
+ var buffer =
ByteBuffer.allocate(HEADER_SIZE_BYTES).order(ByteOrder.LITTLE_ENDIAN);
+ buffer.put((byte) 0x50);
+ buffer.put((byte) 0x4D);
+ buffer.put((byte) 0x54);
+ buffer.put((byte) 0x69);
+ buffer.put((byte) 0x6C);
+ buffer.put((byte) 0x65);
+ buffer.put((byte) 0x73);
+ buffer.put((byte) header.getSpecVersion());
+ buffer.putLong(header.getRootDirectoryOffset());
+ buffer.putLong(header.getRootDirectoryLength());
+ buffer.putLong(header.getJsonMetadataOffset());
+ buffer.putLong(header.getJsonMetadataLength());
+ buffer.putLong(header.getLeafDirectoryOffset());
+ buffer.putLong(header.getLeafDirectoryLength());
+ buffer.putLong(header.getTileDataOffset());
+ buffer.putLong(header.getTileDataLength());
+ buffer.putLong(header.getNumAddressedTiles());
+ buffer.putLong(header.getNumTileEntries());
+ buffer.putLong(header.getNumTileContents());
+ buffer.put((byte) (header.isClustered() ? 1 : 0));
+ buffer.put((byte) header.getInternalCompression().ordinal());
+ buffer.put((byte) header.getTileCompression().ordinal());
+ buffer.put((byte) header.getTileType().ordinal());
+ buffer.put((byte) header.getMinZoom());
+ buffer.put((byte) header.getMaxZoom());
+ buffer.putInt((int) (header.getMinLon() * 10000000));
+ buffer.putInt((int) (header.getMinLat() * 10000000));
+ buffer.putInt((int) (header.getMaxLon() * 10000000));
+ buffer.putInt((int) (header.getMaxLat() * 10000000));
+ buffer.put((byte) header.getCenterZoom());
+ buffer.putInt((int) (header.getCenterLon() * 10000000));
+ buffer.putInt((int) (header.getCenterLat() * 10000000));
+ buffer.flip();
+ return buffer.array();
}
- public static void serializeEntries(LittleEndianDataOutputStream output,
List<Entry> entries)
+ public static void serializeEntries(OutputStream output, List<Entry> entries)
throws IOException {
+ var buffer = ByteBuffer.allocate(entries.size() * 48);
writeVarInt(output, entries.size());
long lastId = 0;
for (Entry entry : entries) {
@@ -268,9 +279,11 @@ public class PMTiles {
writeVarInt(output, entry.getOffset() + 1);
}
}
+ buffer.flip();
+ output.write(buffer.array(), 0, buffer.limit());
}
- public static List<Entry> deserializeEntries(LittleEndianDataInputStream
buffer)
+ public static List<Entry> deserializeEntries(InputStream buffer)
throws IOException {
long numEntries = readVarInt(buffer);
List<Entry> entries = new ArrayList<>((int) numEntries);
@@ -329,60 +342,62 @@ public class PMTiles {
return null;
}
- public static Directories buildRootLeaves(List<Entry> entries, int leafSize)
throws IOException {
+ public static Directories buildRootLeaves(List<Entry> entries, int leafSize,
+ Compression compression) throws IOException {
var rootEntries = new ArrayList<Entry>();
var numLeaves = 0;
byte[] leavesBytes;
byte[] rootBytes;
- try (var leavesOutput = new ByteArrayOutputStream();
- var leavesDataOutput = new LittleEndianDataOutputStream(leavesOutput))
{
+ try (var leavesOutput = new ByteArrayOutputStream()) {
for (var i = 0; i < entries.size(); i += leafSize) {
numLeaves++;
var end = i + leafSize;
if (i + leafSize > entries.size()) {
end = entries.size();
}
-
var offset = leavesOutput.size();
- serializeEntries(leavesDataOutput, entries.subList(i, end));
- var length = leavesOutput.size();
- rootEntries.add(new Entry(entries.get(i).getTileId(), offset, length,
0));
+ try (var leafOutput = new ByteArrayOutputStream()) {
+ try (var compressedLeafOutput = compression.compress(leafOutput)) {
+ serializeEntries(compressedLeafOutput, entries.subList(i, end));
+ }
+ var length = leafOutput.size();
+ rootEntries.add(new Entry(entries.get(i).getTileId(), offset,
length, 0));
+ leavesOutput.write(leafOutput.toByteArray());
+ }
}
-
leavesBytes = leavesOutput.toByteArray();
}
- try (var rootOutput = new ByteArrayOutputStream();
- var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
- serializeEntries(rootDataOutput, rootEntries);
+ try (var rootOutput = new ByteArrayOutputStream()) {
+ try (var compressedRootOutput = compression.compress(rootOutput)) {
+ serializeEntries(compressedRootOutput, rootEntries);
+ }
rootBytes = rootOutput.toByteArray();
}
return new Directories(rootBytes, leavesBytes, numLeaves);
}
- public static Directories optimizeDirectories(List<Entry> entries, int
targetRootLenght)
+ public static Directories optimizeDirectories(List<Entry> entries, int
targetRootLength,
+ Compression compression)
throws IOException {
if (entries.size() < 16384) {
- byte[] rootBytes;
- try (var rootOutput = new ByteArrayOutputStream();
- var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
- serializeEntries(rootDataOutput, entries);
- rootBytes = rootOutput.toByteArray();
- }
- if (rootBytes.length <= targetRootLenght) {
- return new Directories(rootBytes, new byte[] {}, 0);
+ try (var rootOutput = new ByteArrayOutputStream()) {
+ try (var compressedOutput = compression.compress(rootOutput)) {
+ serializeEntries(compressedOutput, entries);
+ }
+ byte[] rootBytes = rootOutput.toByteArray();
+ if (rootBytes.length <= targetRootLength) {
+ return new Directories(rootBytes, new byte[] {}, 0);
+ }
}
}
- double leafSize = (double) entries.size() / 3500;
- if (leafSize < 4096) {
- leafSize = 4096;
- }
+ double leafSize = Math.max((double) entries.size() / 3500, 4096);
for (;;) {
- var directories = buildRootLeaves(entries, (int) leafSize);
- if (directories.getRoot().length <= targetRootLenght) {
+ var directories = buildRootLeaves(entries, (int) leafSize, compression);
+ if (directories.getRoot().length <= targetRootLength) {
return directories;
}
leafSize = leafSize * 1.2;
diff --git
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
index f01aaf3e..892a692d 100644
---
a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
+++
b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java
@@ -19,7 +19,7 @@ package org.apache.baremaps.tilestore.pmtiles;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.hash.Hashing;
-import com.google.common.io.LittleEndianDataOutputStream;
+import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
@@ -28,6 +28,8 @@ import java.util.*;
public class PMTilesWriter {
+ private Compression compression = Compression.Gzip;
+
private Path path;
private Map<String, Object> metadata = new HashMap<>();
@@ -153,9 +155,16 @@ public class PMTilesWriter {
entries.sort(Comparator.comparingLong(Entry::getTileId));
}
- var metadataBytes = new ObjectMapper().writeValueAsBytes(metadata);
+ var directories = PMTiles.optimizeDirectories(entries, 16247, compression);
+
+ byte[] metadataBytes;
+ try (var metadataOutput = new ByteArrayOutputStream()) {
+ try (var compressedMetadataOutput =
compression.compress(metadataOutput)) {
+ new ObjectMapper().writeValue(compressedMetadataOutput, metadata);
+ }
+ metadataBytes = metadataOutput.toByteArray();
+ }
- var directories = PMTiles.optimizeDirectories(entries, 16247);
var rootOffset = 127;
var rootLength = directories.getRoot().length;
var metadataOffset = rootOffset + rootLength;
@@ -172,8 +181,8 @@ public class PMTilesWriter {
header.setNumTileContents(numTiles);
header.setClustered(true);
- header.setInternalCompression(Compression.None);
- header.setTileCompression(Compression.Gzip);
+ header.setInternalCompression(compression);
+ header.setTileCompression(compression);
header.setTileType(TileType.mvt);
header.setRootOffset(rootOffset);
header.setRootLength(rootLength);
@@ -194,8 +203,8 @@ public class PMTilesWriter {
header.setCenterLat(centerLat);
header.setCenterLon(centerLon);
- try (var output = new LittleEndianDataOutputStream(new
FileOutputStream(path.toFile()))) {
- PMTiles.serializeHeader(output, header);
+ try (var output = new FileOutputStream(path.toFile())) {
+ output.write(PMTiles.serializeHeader(header));
output.write(directories.getRoot());
output.write(metadataBytes);
output.write(directories.getLeaves());
diff --git
a/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
b/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
index e078d1d5..e06f8d03 100644
---
a/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
+++
b/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java
@@ -20,7 +20,6 @@ package org.apache.baremaps.tilestore.pmtiles;
import static org.junit.jupiter.api.Assertions.*;
import com.google.common.io.LittleEndianDataInputStream;
-import com.google.common.io.LittleEndianDataOutputStream;
import com.google.common.math.LongMath;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@@ -59,15 +58,13 @@ class PMTilesTest {
void encodeVarInt() throws IOException {
for (long i = 0; i < 1000; i++) {
var array = new ByteArrayOutputStream();
- var output = new LittleEndianDataOutputStream(array);
- PMTiles.writeVarInt(output, i);
+ PMTiles.writeVarInt(array, i);
var input = new LittleEndianDataInputStream(new
ByteArrayInputStream(array.toByteArray()));
assertEquals(i, PMTiles.readVarInt(input));
}
for (long i = Long.MAX_VALUE - 1000; i < Long.MAX_VALUE; i++) {
var array = new ByteArrayOutputStream();
- var output = new LittleEndianDataOutputStream(array);
- PMTiles.writeVarInt(output, i);
+ PMTiles.writeVarInt(array, i);
var input = new LittleEndianDataInputStream(new
ByteArrayInputStream(array.toByteArray()));
assertEquals(i, PMTiles.readVarInt(input));
}
@@ -190,9 +187,7 @@ class PMTilesTest {
0);
var array = new ByteArrayOutputStream();
-
- var output = new LittleEndianDataOutputStream(array);
- PMTiles.serializeHeader(output, header);
+ array.write(PMTiles.serializeHeader(header));
var input = new LittleEndianDataInputStream(new
ByteArrayInputStream(array.toByteArray()));
var header2 = PMTiles.deserializeHeader(input);
@@ -259,7 +254,7 @@ class PMTilesTest {
@Test
void buildRootLeaves() throws IOException {
var entries = List.of(new Entry(100, 1, 1, 0));
- var directories = PMTiles.buildRootLeaves(entries, 1);
+ var directories = PMTiles.buildRootLeaves(entries, 1, Compression.None);
assertEquals(directories.getNumLeaves(), 1);
}
@@ -269,7 +264,7 @@ class PMTilesTest {
var random = new Random(3857);
var entries = new ArrayList<Entry>();
entries.add(new Entry(0, 0, 100, 1));
- var directories = PMTiles.optimizeDirectories(entries, 100);
+ var directories = PMTiles.optimizeDirectories(entries, 100,
Compression.None);
assertFalse(directories.getLeaves().length > 0);
assertEquals(0, directories.getNumLeaves());
@@ -280,7 +275,7 @@ class PMTilesTest {
entries.add(new Entry(i, offset, randTileSize, 1));
offset += randTileSize;
}
- directories = PMTiles.optimizeDirectories(entries, 1024);
+ directories = PMTiles.optimizeDirectories(entries, 1024, Compression.None);
assertFalse(directories.getRoot().length > 1024);
assertFalse(directories.getNumLeaves() == 0);
assertFalse(directories.getLeaves().length == 0);