This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 0e13c28ce2 [core] Fix BTree index meta for empty keys (#8140)
0e13c28ce2 is described below
commit 0e13c28ce2a6624b3d470416a3b23a7a4661348c
Author: YeJunHao <[email protected]>
AuthorDate: Sat Jun 6 16:37:26 2026 +0800
[core] Fix BTree index meta for empty keys (#8140)
BTreeIndexMeta used zero-length encoded keys to represent null
boundaries. This collides with valid empty serialized keys, for example
an empty string, and can deserialize BTree metadata with a null boundary
key. Readers or compactors that wrap the boundary key can then hit a
NullPointerException.
---
.../paimon/globalindex/btree/BTreeIndexMeta.java | 37 ++++-
.../globalindex/btree/BTreeIndexMetaTest.java | 171 +++++++++++++++++++++
2 files changed, 203 insertions(+), 5 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexMeta.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexMeta.java
index 83d12b217a..96e0d85a74 100644
---
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexMeta.java
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexMeta.java
@@ -30,6 +30,10 @@ import javax.annotation.Nullable;
*/
public class BTreeIndexMeta {
+ private static final byte FORMAT_VERSION_WITH_NULL_FLAGS = 1;
+ private static final byte FIRST_KEY_IS_NULL = 1;
+ private static final byte LAST_KEY_IS_NULL = 1 << 1;
+
@Nullable private final byte[] firstKey;
@Nullable private final byte[] lastKey;
private final boolean hasNulls;
@@ -61,36 +65,59 @@ public class BTreeIndexMeta {
private int memorySize() {
return (firstKey == null ? 0 : firstKey.length)
+ (lastKey == null ? 0 : lastKey.length)
- + 9;
+ + 11;
}
public byte[] serialize() {
MemorySliceOutput sliceOutput = new MemorySliceOutput(memorySize());
+ byte nullKeyFlags = 0;
if (firstKey != null) {
sliceOutput.writeInt(firstKey.length);
sliceOutput.writeBytes(firstKey);
} else {
sliceOutput.writeInt(0);
+ nullKeyFlags |= FIRST_KEY_IS_NULL;
}
if (lastKey != null) {
sliceOutput.writeInt(lastKey.length);
sliceOutput.writeBytes(lastKey);
} else {
sliceOutput.writeInt(0);
+ nullKeyFlags |= LAST_KEY_IS_NULL;
}
sliceOutput.writeByte(hasNulls ? 1 : 0);
+ sliceOutput.writeByte(FORMAT_VERSION_WITH_NULL_FLAGS);
+ sliceOutput.writeByte(nullKeyFlags);
return sliceOutput.toSlice().getHeapMemory();
}
public static BTreeIndexMeta deserialize(byte[] data) {
MemorySliceInput sliceInput = MemorySlice.wrap(data).toInput();
int firstKeyLength = sliceInput.readInt();
- byte[] firstKey =
- firstKeyLength == 0 ? null :
sliceInput.readSlice(firstKeyLength).copyBytes();
+ byte[] firstKey = readKey(sliceInput, firstKeyLength);
int lastKeyLength = sliceInput.readInt();
- byte[] lastKey =
- lastKeyLength == 0 ? null :
sliceInput.readSlice(lastKeyLength).copyBytes();
+ byte[] lastKey = readKey(sliceInput, lastKeyLength);
boolean hasNulls = sliceInput.readByte() == 1;
+
+ if (sliceInput.available() >= 2) {
+ int formatVersion = sliceInput.readByte();
+ if (formatVersion == FORMAT_VERSION_WITH_NULL_FLAGS) {
+ int nullKeyFlags = sliceInput.readByte();
+ firstKey = (nullKeyFlags & FIRST_KEY_IS_NULL) != 0 ? null :
firstKey;
+ lastKey = (nullKeyFlags & LAST_KEY_IS_NULL) != 0 ? null :
lastKey;
+ }
+ } else if (firstKeyLength == 0 && lastKeyLength == 0 && hasNulls) {
+ // Compatibility with old metadata which used zero length to
represent null keys.
+ // A legacy all-null index file can be identified by both key
lengths being zero with
+ // null bitmap. When only one length is zero, the zero-length key
is a valid serialized
+ // key, for example an empty string.
+ firstKey = null;
+ lastKey = null;
+ }
return new BTreeIndexMeta(firstKey, lastKey, hasNulls);
}
+
+ private static byte[] readKey(MemorySliceInput sliceInput, int keyLength) {
+ return sliceInput.readSlice(keyLength).copyBytes();
+ }
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeIndexMetaTest.java
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeIndexMetaTest.java
new file mode 100644
index 0000000000..5b37aac50d
--- /dev/null
+++
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeIndexMetaTest.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.globalindex.btree;
+
+import org.apache.paimon.data.BinaryString;
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.fs.PositionOutputStream;
+import org.apache.paimon.fs.local.LocalFileIO;
+import org.apache.paimon.globalindex.GlobalIndexParallelWriter;
+import org.apache.paimon.globalindex.ResultEntry;
+import org.apache.paimon.globalindex.io.GlobalIndexFileWriter;
+import org.apache.paimon.memory.MemorySliceOutput;
+import org.apache.paimon.options.Options;
+import org.apache.paimon.types.DataField;
+import org.apache.paimon.types.VarCharType;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.io.IOException;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** Test for {@link BTreeIndexMeta}. */
+class BTreeIndexMetaTest {
+
+ @TempDir private java.nio.file.Path tempPath;
+
+ @Test
+ void testEmptyFirstKey() {
+ byte[] lastKey = new byte[] {1, 2, 3};
+
+ BTreeIndexMeta meta = roundTrip(new BTreeIndexMeta(new byte[0],
lastKey, false));
+
+ assertThat(meta.getFirstKey()).isEmpty();
+ assertThat(meta.getLastKey()).containsExactly(lastKey);
+ assertThat(meta.hasNulls()).isFalse();
+ assertThat(meta.onlyNulls()).isFalse();
+ }
+
+ @Test
+ void testEmptyFirstAndLastKeyWithNulls() {
+ BTreeIndexMeta meta = roundTrip(new BTreeIndexMeta(new byte[0], new
byte[0], true));
+
+ assertThat(meta.getFirstKey()).isEmpty();
+ assertThat(meta.getLastKey()).isEmpty();
+ assertThat(meta.hasNulls()).isTrue();
+ assertThat(meta.onlyNulls()).isFalse();
+ }
+
+ @Test
+ void testOnlyNulls() {
+ BTreeIndexMeta meta = roundTrip(new BTreeIndexMeta(null, null, true));
+
+ assertThat(meta.getFirstKey()).isNull();
+ assertThat(meta.getLastKey()).isNull();
+ assertThat(meta.hasNulls()).isTrue();
+ assertThat(meta.onlyNulls()).isTrue();
+ }
+
+ @Test
+ void testWriterEmptyStringKeyWithNulls() throws Exception {
+ List<ResultEntry> results =
+ writeVarCharIndex(
+ null, BinaryString.fromString(""),
BinaryString.fromString("abc"));
+
+ BTreeIndexMeta meta =
BTreeIndexMeta.deserialize(results.get(0).meta());
+
+ assertThat(meta.getFirstKey()).isEmpty();
+
assertThat(meta.getLastKey()).containsExactly(BinaryString.fromString("abc").toBytes());
+ assertThat(meta.hasNulls()).isTrue();
+ assertThat(meta.onlyNulls()).isFalse();
+ }
+
+ @Test
+ void testLegacyOnlyNulls() {
+ BTreeIndexMeta meta =
+ BTreeIndexMeta.deserialize(legacyMetaBytes(new byte[0], new
byte[0], true));
+
+ assertThat(meta.getFirstKey()).isNull();
+ assertThat(meta.getLastKey()).isNull();
+ assertThat(meta.hasNulls()).isTrue();
+ assertThat(meta.onlyNulls()).isTrue();
+ }
+
+ @Test
+ void testLegacyEmptyFirstKey() {
+ byte[] lastKey = new byte[] {1, 2, 3};
+
+ BTreeIndexMeta meta =
+ BTreeIndexMeta.deserialize(legacyMetaBytes(new byte[0],
lastKey, false));
+
+ assertThat(meta.getFirstKey()).isEmpty();
+ assertThat(meta.getLastKey()).containsExactly(lastKey);
+ assertThat(meta.hasNulls()).isFalse();
+ assertThat(meta.onlyNulls()).isFalse();
+ }
+
+ @Test
+ void testLegacyEmptyFirstAndLastKeyWithoutNulls() {
+ BTreeIndexMeta meta =
+ BTreeIndexMeta.deserialize(legacyMetaBytes(new byte[0], new
byte[0], false));
+
+ assertThat(meta.getFirstKey()).isEmpty();
+ assertThat(meta.getLastKey()).isEmpty();
+ assertThat(meta.hasNulls()).isFalse();
+ assertThat(meta.onlyNulls()).isFalse();
+ }
+
+ private BTreeIndexMeta roundTrip(BTreeIndexMeta meta) {
+ return BTreeIndexMeta.deserialize(meta.serialize());
+ }
+
+ private byte[] legacyMetaBytes(byte[] firstKey, byte[] lastKey, boolean
hasNulls) {
+ MemorySliceOutput output = new MemorySliceOutput(firstKey.length +
lastKey.length + 9);
+ output.writeInt(firstKey.length);
+ output.writeBytes(firstKey);
+ output.writeInt(lastKey.length);
+ output.writeBytes(lastKey);
+ output.writeByte(hasNulls ? 1 : 0);
+ return output.toSlice().copyBytes();
+ }
+
+ private List<ResultEntry> writeVarCharIndex(Object... keys) throws
IOException {
+ FileIO fileIO = LocalFileIO.create();
+ GlobalIndexFileWriter fileWriter =
+ new GlobalIndexFileWriter() {
+ @Override
+ public String newFileName(String prefix) {
+ return "test-btree-" + prefix;
+ }
+
+ @Override
+ public PositionOutputStream newOutputStream(String
fileName)
+ throws IOException {
+ return fileIO.newOutputStream(
+ new Path(new Path(tempPath.toUri()),
fileName), true);
+ }
+ };
+ GlobalIndexParallelWriter indexWriter =
+ new BTreeGlobalIndexer(
+ new DataField(
+ 1, "testField", new
VarCharType(VarCharType.MAX_LENGTH)),
+ new Options())
+ .createWriter(fileWriter);
+ for (int index = 0; index < keys.length; index++) {
+ indexWriter.write(keys[index], index);
+ }
+ List<ResultEntry> results = indexWriter.finish();
+ assertThat(results).hasSize(1);
+ return results;
+ }
+}