This is an automated email from the ASF dual-hosted git repository. snazy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/polaris.git
The following commit(s) were added to refs/heads/main by this push: new e5e173f11 Add `Varint` type for variable-length integer encoding (#1229) e5e173f11 is described below commit e5e173f11374a2c81abd873b3d10a7002a052166 Author: Robert Stupp <sn...@snazy.de> AuthorDate: Tue Apr 15 17:14:24 2025 +0200 Add `Varint` type for variable-length integer encoding (#1229) --- bom/build.gradle.kts | 1 + gradle/projects.main.properties | 1 + nosql/persistence/varint/build.gradle.kts | 28 ++++++ .../apache/polaris/persistence/varint/VarInt.java | 90 +++++++++++++++++ .../polaris/persistence/varint/TestVarInt.java | 109 +++++++++++++++++++++ 5 files changed, 229 insertions(+) diff --git a/bom/build.gradle.kts b/bom/build.gradle.kts index 971036895..c27d34c31 100644 --- a/bom/build.gradle.kts +++ b/bom/build.gradle.kts @@ -33,6 +33,7 @@ dependencies { api(project(":polaris-immutables")) api(project(":polaris-misc-types")) api(project(":polaris-version")) + api(project(":polaris-persistence-varint")) api(project(":polaris-config-docs-annotations")) api(project(":polaris-config-docs-generator")) diff --git a/gradle/projects.main.properties b/gradle/projects.main.properties index 3904dd06e..2b27560bc 100644 --- a/gradle/projects.main.properties +++ b/gradle/projects.main.properties @@ -39,6 +39,7 @@ polaris-immutables=tools/immutables polaris-container-spec-helper=tools/container-spec-helper polaris-version=tools/version polaris-misc-types=tools/misc-types +polaris-persistence-varint=nosql/persistence/varint polaris-config-docs-annotations=tools/config-docs/annotations polaris-config-docs-generator=tools/config-docs/generator diff --git a/nosql/persistence/varint/build.gradle.kts b/nosql/persistence/varint/build.gradle.kts new file mode 100644 index 000000000..165027bff --- /dev/null +++ b/nosql/persistence/varint/build.gradle.kts @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { id("polaris-server") } + +dependencies { + implementation(libs.guava) + + testFixturesApi(libs.assertj.core) +} + +description = "Provides variable length integer encoding" diff --git a/nosql/persistence/varint/src/main/java/org/apache/polaris/persistence/varint/VarInt.java b/nosql/persistence/varint/src/main/java/org/apache/polaris/persistence/varint/VarInt.java new file mode 100644 index 000000000..d755ab57e --- /dev/null +++ b/nosql/persistence/varint/src/main/java/org/apache/polaris/persistence/varint/VarInt.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.varint; + +import static com.google.common.base.Preconditions.checkArgument; + +import com.google.common.primitives.Ints; +import java.nio.ByteBuffer; + +/** Utility class to de-serialize <em>positive</em> integer values in a space efficient way. */ +public final class VarInt { + // var-int encoded length of Long.MAX_VALUE + private static final int MAX_LEN = 9; + // 7 bits + private static final int MAX_SHIFT_LEN = 7 * MAX_LEN; + + private VarInt() {} + + public static int varIntLen(long v) { + checkArgument(v >= 0); + int l = 0; + while (true) { + l++; + if (v <= 0x7f) { + return l; + } + v >>= 7; + } + } + + public static ByteBuffer putVarInt(ByteBuffer b, long v) { + checkArgument(v >= 0); + while (true) { + if (v <= 0x7f) { + // Current "byte" is <= 0x7f - encode as is. The highest bit (0x80) is not set, meaning that + // this is the last encoded byte value. + return b.put((byte) v); + } + + // Current value is > 0x7f - encode its lower 7 bits and set the "more data follows" flag + // (0x80). + b.put((byte) (v | 0x80)); + + v >>= 7; + } + } + + public static int readVarInt(ByteBuffer b) { + return Ints.checkedCast(readVarLong(b)); + } + + public static long readVarLong(ByteBuffer b) { + long r = 0; + for (int shift = 0; ; shift += 7) { + checkArgument(shift < MAX_SHIFT_LEN, "Illegal variable length integer representation"); + long v = b.get() & 0xff; + r |= (v & 0x7f) << shift; + if ((v & 0x80) == 0) { + break; + } + } + return r; + } + + public static void skipVarInt(ByteBuffer b) { + for (int shift = 0; ; shift += 7) { + checkArgument(shift < MAX_SHIFT_LEN, "Illegal variable length integer representation"); + int v = b.get() & 0xff; + if ((v & 0x80) == 0) { + break; + } + } + } +} diff --git a/nosql/persistence/varint/src/test/java/org/apache/polaris/persistence/varint/TestVarInt.java b/nosql/persistence/varint/src/test/java/org/apache/polaris/persistence/varint/TestVarInt.java new file mode 100644 index 000000000..3f98ab3e2 --- /dev/null +++ b/nosql/persistence/varint/src/test/java/org/apache/polaris/persistence/varint/TestVarInt.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.varint; + +import static org.junit.jupiter.params.provider.Arguments.arguments; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.stream.Stream; +import org.assertj.core.api.SoftAssertions; +import org.assertj.core.api.junit.jupiter.InjectSoftAssertions; +import org.assertj.core.api.junit.jupiter.SoftAssertionsExtension; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +@ExtendWith(SoftAssertionsExtension.class) +public class TestVarInt { + @InjectSoftAssertions SoftAssertions soft; + + @Test + public void negative() { + var buf = ByteBuffer.allocate(9); + soft.assertThatIllegalArgumentException().isThrownBy(() -> VarInt.putVarInt(buf, -1L)); + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> VarInt.putVarInt(buf, Long.MIN_VALUE)); + } + + @ParameterizedTest + @MethodSource + public void varInt(long value, byte[] binary) { + var buf = ByteBuffer.allocate(9); + VarInt.putVarInt(buf, value); + soft.assertThat(buf.position()).isEqualTo(binary.length); + soft.assertThat(Arrays.copyOf(buf.array(), buf.position())).containsExactly(binary); + soft.assertThat(VarInt.varIntLen(value)).isEqualTo(binary.length); + + var read = buf.duplicate().flip(); + VarInt.skipVarInt(read); + soft.assertThat(read.position()).isEqualTo(binary.length); + + if (value > Integer.MAX_VALUE) { + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> VarInt.readVarInt(buf.duplicate().flip())) + .withMessageStartingWith("Out of range: "); + soft.assertThat(VarInt.readVarLong(buf.duplicate().flip())).isEqualTo(value); + } else { + soft.assertThat(VarInt.readVarInt(buf.duplicate().flip())).isEqualTo(value); + soft.assertThat(VarInt.readVarLong(buf.duplicate().flip())).isEqualTo(value); + } + } + + @Test + public void notVarInt() { + var buf = new byte[] {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> VarInt.readVarInt(ByteBuffer.wrap(buf))); + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> VarInt.skipVarInt(ByteBuffer.wrap(buf))); + } + + static Stream<Arguments> varInt() { + return Stream.of( + // one byte + arguments(0L, new byte[] {0}), + arguments(1L, new byte[] {1}), + arguments(42L, new byte[] {42}), + arguments(127L, new byte[] {127}), + // 2 bytes + arguments(128L, new byte[] {(byte) 0x80, 1}), + // 21 bite -> 3 x 7 bits + arguments(0x1fffff, new byte[] {-1, -1, 127}), + // 28 bits -> 4 x 7 bits + arguments(0xfffffff, new byte[] {-1, -1, -1, 127}), + // 35 bits -> 5 x 7 bits + arguments(0x7ffffffffL, new byte[] {-1, -1, -1, -1, 127}), + arguments(0x321321321L, new byte[] {-95, -90, -56, -119, 50}), + // 42 bits -> 6 x 7 bits + arguments(0x3ffffffffffL, new byte[] {-1, -1, -1, -1, -1, 127}), + // 49 bits -> 7 x 7 bits + arguments(0x1ffffffffffffL, new byte[] {-1, -1, -1, -1, -1, -1, 127}), + // 56 bits -> 8 x 7 bits + arguments(0xffffffffffffffL, new byte[] {-1, -1, -1, -1, -1, -1, -1, 127}), + arguments(0x32132132132132L, new byte[] {-78, -62, -52, -112, -109, -28, -124, 25}), + // 63 bits -> 9 x 7 bits + arguments(Long.MAX_VALUE, new byte[] {-1, -1, -1, -1, -1, -1, -1, -1, 127}), + arguments( + Long.MAX_VALUE - 0x1111111111111111L, + new byte[] {-18, -35, -69, -9, -18, -35, -69, -9, 110})); + } +}