This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new 9d02c8873 feat(java): int array serializer support varint encoding
(#3124)
9d02c8873 is described below
commit 9d02c8873754da243ee4f98890877e97e1149f70
Author: Pigsy-Monk <[email protected]>
AuthorDate: Mon Jan 12 11:49:06 2026 +0800
feat(java): int array serializer support varint encoding (#3124)
## Why?
This PR adds variable-length encoding support for `int[]` arrays, which
provides space savings when arrays contain many small values. This is
particularly beneficial for use cases like sparse arrays, indices,
counters, and other scenarios where array values are predominantly small
integers.
## What does this PR do?
### Changes:
1. **Enhanced `IntArraySerializer` with variable-length encoding**:
- Added `writeInt32s()` method that uses `writeVarUint32()` for each
element
- Added `readInt32s()` method that uses `readVarInt32()` for each
element
- Enabled via `compressIntArray()` configuration flag
3. **Added comprehensive test cases**:
- `testVariableLengthIntArray()`: Tests serialization/deserialization
correctness for int arrays
- `testVariableLengthIntArrayEncodingEfficiencyForSmallValues()`:
Demonstrates space efficiency for int arrays
### Technical Details:
- **IntArraySerializer**: Uses `writeVarUint32()` / `readVarInt32()` for
variable-length encoding
- Compression is only enabled when:
- `compressIntArray()` is `true`
### Space Efficiency:
- **Int arrays with small values (0-127)**:
- Fixed-length: 4 bytes/element
- Variable-length: 1-2 bytes/element
---
.../apache/fory/serializer/ArraySerializers.java | 27 +++-
.../fory/serializer/ArraySerializersTest.java | 158 +++++++++++++++++++++
2 files changed, 183 insertions(+), 2 deletions(-)
diff --git
a/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java
b/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java
index 6d0282020..946062cdc 100644
---
a/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java
+++
b/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java
@@ -450,6 +450,10 @@ public class ArraySerializers {
@Override
public void write(MemoryBuffer buffer, int[] value) {
if (fory.getBufferCallback() == null) {
+ if (fory.getConfig().compressIntArray()) {
+ writeInt32Compressed(buffer, value);
+ return;
+ }
int size = Math.multiplyExact(value.length, 4);
buffer.writePrimitiveArrayWithSize(value, Platform.INT_ARRAY_OFFSET,
size);
} else {
@@ -476,7 +480,9 @@ public class ArraySerializers {
}
return values;
}
-
+ if (fory.getConfig().compressIntArray()) {
+ return readInt32Compressed(buffer);
+ }
int size = buffer.readVarUint32Small7();
int numElements = size / 4;
int[] values = new int[numElements];
@@ -485,6 +491,23 @@ public class ArraySerializers {
}
return values;
}
+
+ private void writeInt32Compressed(MemoryBuffer buffer, int[] value) {
+ buffer.writeVarUint32Small7(value.length);
+ for (int i : value) {
+ buffer.writeVarInt32(i);
+ }
+ }
+
+ private int[] readInt32Compressed(MemoryBuffer buffer) {
+ int numElements = buffer.readVarUint32Small7();
+ int[] values = new int[numElements];
+
+ for (int i = 0; i < numElements; i++) {
+ values[i] = buffer.readVarInt32();
+ }
+ return values;
+ }
}
public static final class LongArraySerializer extends
PrimitiveArraySerializer<long[]> {
@@ -558,7 +581,7 @@ public class ArraySerializers {
}
}
- public long[] readInt64Compressed(MemoryBuffer buffer, LongEncoding
longEncoding) {
+ private long[] readInt64Compressed(MemoryBuffer buffer, LongEncoding
longEncoding) {
int numElements = buffer.readVarUint32Small7();
long[] values = new long[numElements];
diff --git
a/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java
b/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java
index fe817d916..bd8e93d66 100644
---
a/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java
+++
b/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java
@@ -529,4 +529,162 @@ public class ArraySerializersTest extends ForyTestBase {
+ "for array with medium values",
variableSizeMedium, fixedSizeMedium));
}
+
+ /**
+ * Test variable-length encoding for int arrays. This test verifies that int
arrays can be
+ * serialized and deserialized using variable-length encoding when
compressIntArray is enabled.
+ */
+ @Test
+ public void testVariableLengthIntArray() {
+ // Create Fory instance with variable-length encoding enabled for int
arrays
+ Fory fory =
Fory.builder().requireClassRegistration(false).withIntArrayCompressed(true).build();
+
+ // Test empty array
+ int[] emptyArray = new int[0];
+ int[] deserializedEmpty = (int[]) serDe(fory, fory, emptyArray);
+ assertEquals(deserializedEmpty.length, 0);
+
+ // Test array with small values (benefits from variable-length encoding)
+ int[] smallValues = {1, 2, 3, 127, 128, 255};
+ int[] deserializedSmall = (int[]) serDe(fory, fory, smallValues);
+ assertTrue(Arrays.equals(deserializedSmall, smallValues));
+
+ // Test array with mixed small and large values
+ int[] mixedValues = {0, 1, -1, 100, -100, Integer.MAX_VALUE,
Integer.MIN_VALUE, 1000};
+ int[] deserializedMixed = (int[]) serDe(fory, fory, mixedValues);
+ assertTrue(Arrays.equals(deserializedMixed, mixedValues));
+
+ // Test array with large values
+ int[] largeValues = {
+ Integer.MAX_VALUE, Integer.MIN_VALUE, Integer.MAX_VALUE / 2,
Integer.MIN_VALUE / 2
+ };
+ int[] deserializedLarge = (int[]) serDe(fory, fory, largeValues);
+ assertTrue(Arrays.equals(deserializedLarge, largeValues));
+
+ // Test array with negative values
+ int[] negativeValues = {-1, -100, -1000, -1000000};
+ int[] deserializedNegative = (int[]) serDe(fory, fory, negativeValues);
+ assertTrue(Arrays.equals(deserializedNegative, negativeValues));
+
+ // Test large array with many small values
+ int[] largeArray = new int[1000];
+ for (int i = 0; i < largeArray.length; i++) {
+ largeArray[i] = i % 100; // Small values benefit from variable-length
encoding
+ }
+ int[] deserializedLargeArray = (int[]) serDe(fory, fory, largeArray);
+ assertTrue(Arrays.equals(deserializedLargeArray, largeArray));
+ }
+
+ /**
+ * Test that variable-length encoding is more efficient (smaller size) than
fixed-length encoding
+ * when the int array contains many small values. This demonstrates the
space efficiency benefit
+ * of variable-length encoding for arrays with predominantly small values.
+ */
+ @Test
+ public void testVariableLengthIntArrayEncodingEfficiencyForSmallValues() {
+ // Create a Fory instance with fixed-length encoding (compressIntArray
disabled)
+ Fory foryFixed =
+
Fory.builder().requireClassRegistration(false).withIntArrayCompressed(false).build();
+
+ // Create a Fory instance with variable-length encoding (compressIntArray
enabled)
+ Fory foryVariable =
+
Fory.builder().requireClassRegistration(false).withIntArrayCompressed(true).build();
+
+ // Create an array with many small values (0-127, which can be encoded in
1-2 bytes with varint)
+ int arraySize = 10000;
+ int[] smallValuesArray = new int[arraySize];
+ for (int i = 0; i < arraySize; i++) {
+ // Use values from 0 to 127, which benefit most from variable-length
encoding
+ smallValuesArray[i] = i % 128;
+ }
+
+ // Serialize with fixed-length encoding (4 bytes per element)
+ byte[] fixedBytes = foryFixed.serialize(smallValuesArray);
+ int fixedSize = fixedBytes.length;
+
+ // Serialize with variable-length encoding (1-2 bytes per small element)
+ byte[] variableBytes = foryVariable.serialize(smallValuesArray);
+ int variableSize = variableBytes.length;
+
+ // Verify both can be deserialized correctly
+ int[] deserializedFixed = (int[]) foryFixed.deserialize(fixedBytes);
+ int[] deserializedVariable = (int[])
foryVariable.deserialize(variableBytes);
+ assertTrue(Arrays.equals(deserializedFixed, smallValuesArray));
+ assertTrue(Arrays.equals(deserializedVariable, smallValuesArray));
+
+ // Calculate efficiency metrics
+ int sizeDifference = fixedSize - variableSize;
+ double percentageReduction = 100.0 * sizeDifference / fixedSize;
+
+ System.out.printf(
+ "Array size: %d elements (values 0-127)%n"
+ + "Fixed-length encoding: %d bytes (%.2f bytes/element)%n"
+ + "Variable-length encoding: %d bytes (%.2f bytes/element)%n"
+ + "Space savings: %d bytes (%.2f%% reduction)%n",
+ arraySize,
+ fixedSize,
+ (double) fixedSize / arraySize,
+ variableSize,
+ (double) variableSize / arraySize,
+ sizeDifference,
+ percentageReduction);
+
+ // Verify that variable-length encoding produces smaller or equal size
+ // For arrays with many small values, variable-length should be
significantly smaller
+ assertTrue(
+ variableSize < fixedSize,
+ String.format(
+ "Expected variable-length encoding (%d bytes) to be smaller than
fixed-length (%d bytes) "
+ + "for array with many small values",
+ variableSize, fixedSize));
+
+ // Verify significant space savings (at least 50% reduction for small
values)
+ // Fixed-length: 4 bytes per element + overhead
+ // Variable-length: 1-2 bytes per small element + overhead
+ // For values 0-127, we expect at least 50% reduction
+ assertTrue(
+ percentageReduction >= 50.0,
+ String.format(
+ "Expected at least 50%% size reduction for small values, but got
%.2f%%",
+ percentageReduction));
+
+ // Test with slightly larger values (0-32767) to show variable-length
still helps
+ int[] mediumValuesArray = new int[arraySize];
+ for (int i = 0; i < arraySize; i++) {
+ mediumValuesArray[i] = i % 32768;
+ }
+
+ byte[] fixedBytesMedium = foryFixed.serialize(mediumValuesArray);
+ byte[] variableBytesMedium = foryVariable.serialize(mediumValuesArray);
+ int fixedSizeMedium = fixedBytesMedium.length;
+ int variableSizeMedium = variableBytesMedium.length;
+
+ // Verify deserialization
+ int[] deserializedFixedMedium = (int[])
foryFixed.deserialize(fixedBytesMedium);
+ int[] deserializedVariableMedium = (int[])
foryVariable.deserialize(variableBytesMedium);
+ assertTrue(Arrays.equals(deserializedFixedMedium, mediumValuesArray));
+ assertTrue(Arrays.equals(deserializedVariableMedium, mediumValuesArray));
+
+ int sizeDifferenceMedium = fixedSizeMedium - variableSizeMedium;
+ double percentageReductionMedium = 100.0 * sizeDifferenceMedium /
fixedSizeMedium;
+
+ System.out.printf(
+ "Array size: %d elements (values 0-32767)%n"
+ + "Fixed-length encoding: %d bytes%n"
+ + "Variable-length encoding: %d bytes%n"
+ + "Space savings: %d bytes (%.2f%% reduction)%n",
+ arraySize,
+ fixedSizeMedium,
+ variableSizeMedium,
+ sizeDifferenceMedium,
+ percentageReductionMedium);
+
+ // For medium values (0-32767), variable-length should still be smaller
+ assertTrue(
+ variableSizeMedium < fixedSizeMedium,
+ String.format(
+ "Expected variable-length encoding (%d bytes) to be smaller than
fixed-length (%d bytes) "
+ + "for array with medium values",
+ variableSizeMedium, fixedSizeMedium));
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]