This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git


The following commit(s) were added to refs/heads/main by this push:
     new 9d02c8873 feat(java): int array serializer support varint encoding 
(#3124)
9d02c8873 is described below

commit 9d02c8873754da243ee4f98890877e97e1149f70
Author: Pigsy-Monk <[email protected]>
AuthorDate: Mon Jan 12 11:49:06 2026 +0800

    feat(java): int array serializer support varint encoding (#3124)
    
    ## Why?
    
    This PR adds variable-length encoding support for `int[]` arrays, which
    provides space savings when arrays contain many small values. This is
    particularly beneficial for use cases like sparse arrays, indices,
    counters, and other scenarios where array values are predominantly small
    integers.
    
    ## What does this PR do?
    
    ### Changes:
    
    1. **Enhanced `IntArraySerializer` with variable-length encoding**:
    - Added `writeInt32s()` method that uses `writeVarUint32()` for each
    element
    - Added `readInt32s()` method that uses `readVarInt32()` for each
    element
       - Enabled via `compressIntArray()` configuration flag
    
    3. **Added comprehensive test cases**:
    - `testVariableLengthIntArray()`: Tests serialization/deserialization
    correctness for int arrays
    - `testVariableLengthIntArrayEncodingEfficiencyForSmallValues()`:
    Demonstrates space efficiency for int arrays
    
    ### Technical Details:
    
    - **IntArraySerializer**: Uses `writeVarUint32()` / `readVarInt32()` for
    variable-length encoding
    - Compression is only enabled when:
      - `compressIntArray()` is `true`
    
    ### Space Efficiency:
    
    - **Int arrays with small values (0-127)**:
      - Fixed-length: 4 bytes/element
      - Variable-length: 1-2 bytes/element
---
 .../apache/fory/serializer/ArraySerializers.java   |  27 +++-
 .../fory/serializer/ArraySerializersTest.java      | 158 +++++++++++++++++++++
 2 files changed, 183 insertions(+), 2 deletions(-)

diff --git 
a/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java 
b/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java
index 6d0282020..946062cdc 100644
--- 
a/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java
+++ 
b/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java
@@ -450,6 +450,10 @@ public class ArraySerializers {
     @Override
     public void write(MemoryBuffer buffer, int[] value) {
       if (fory.getBufferCallback() == null) {
+        if (fory.getConfig().compressIntArray()) {
+          writeInt32Compressed(buffer, value);
+          return;
+        }
         int size = Math.multiplyExact(value.length, 4);
         buffer.writePrimitiveArrayWithSize(value, Platform.INT_ARRAY_OFFSET, 
size);
       } else {
@@ -476,7 +480,9 @@ public class ArraySerializers {
         }
         return values;
       }
-
+      if (fory.getConfig().compressIntArray()) {
+        return readInt32Compressed(buffer);
+      }
       int size = buffer.readVarUint32Small7();
       int numElements = size / 4;
       int[] values = new int[numElements];
@@ -485,6 +491,23 @@ public class ArraySerializers {
       }
       return values;
     }
+
+    private void writeInt32Compressed(MemoryBuffer buffer, int[] value) {
+      buffer.writeVarUint32Small7(value.length);
+      for (int i : value) {
+        buffer.writeVarInt32(i);
+      }
+    }
+
+    private int[] readInt32Compressed(MemoryBuffer buffer) {
+      int numElements = buffer.readVarUint32Small7();
+      int[] values = new int[numElements];
+
+      for (int i = 0; i < numElements; i++) {
+        values[i] = buffer.readVarInt32();
+      }
+      return values;
+    }
   }
 
   public static final class LongArraySerializer extends 
PrimitiveArraySerializer<long[]> {
@@ -558,7 +581,7 @@ public class ArraySerializers {
       }
     }
 
-    public long[] readInt64Compressed(MemoryBuffer buffer, LongEncoding 
longEncoding) {
+    private long[] readInt64Compressed(MemoryBuffer buffer, LongEncoding 
longEncoding) {
       int numElements = buffer.readVarUint32Small7();
       long[] values = new long[numElements];
 
diff --git 
a/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java
 
b/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java
index fe817d916..bd8e93d66 100644
--- 
a/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java
+++ 
b/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java
@@ -529,4 +529,162 @@ public class ArraySerializersTest extends ForyTestBase {
                 + "for array with medium values",
             variableSizeMedium, fixedSizeMedium));
   }
+
+  /**
+   * Test variable-length encoding for int arrays. This test verifies that int 
arrays can be
+   * serialized and deserialized using variable-length encoding when 
compressIntArray is enabled.
+   */
+  @Test
+  public void testVariableLengthIntArray() {
+    // Create Fory instance with variable-length encoding enabled for int 
arrays
+    Fory fory = 
Fory.builder().requireClassRegistration(false).withIntArrayCompressed(true).build();
+
+    // Test empty array
+    int[] emptyArray = new int[0];
+    int[] deserializedEmpty = (int[]) serDe(fory, fory, emptyArray);
+    assertEquals(deserializedEmpty.length, 0);
+
+    // Test array with small values (benefits from variable-length encoding)
+    int[] smallValues = {1, 2, 3, 127, 128, 255};
+    int[] deserializedSmall = (int[]) serDe(fory, fory, smallValues);
+    assertTrue(Arrays.equals(deserializedSmall, smallValues));
+
+    // Test array with mixed small and large values
+    int[] mixedValues = {0, 1, -1, 100, -100, Integer.MAX_VALUE, 
Integer.MIN_VALUE, 1000};
+    int[] deserializedMixed = (int[]) serDe(fory, fory, mixedValues);
+    assertTrue(Arrays.equals(deserializedMixed, mixedValues));
+
+    // Test array with large values
+    int[] largeValues = {
+      Integer.MAX_VALUE, Integer.MIN_VALUE, Integer.MAX_VALUE / 2, 
Integer.MIN_VALUE / 2
+    };
+    int[] deserializedLarge = (int[]) serDe(fory, fory, largeValues);
+    assertTrue(Arrays.equals(deserializedLarge, largeValues));
+
+    // Test array with negative values
+    int[] negativeValues = {-1, -100, -1000, -1000000};
+    int[] deserializedNegative = (int[]) serDe(fory, fory, negativeValues);
+    assertTrue(Arrays.equals(deserializedNegative, negativeValues));
+
+    // Test large array with many small values
+    int[] largeArray = new int[1000];
+    for (int i = 0; i < largeArray.length; i++) {
+      largeArray[i] = i % 100; // Small values benefit from variable-length 
encoding
+    }
+    int[] deserializedLargeArray = (int[]) serDe(fory, fory, largeArray);
+    assertTrue(Arrays.equals(deserializedLargeArray, largeArray));
+  }
+
+  /**
+   * Test that variable-length encoding is more efficient (smaller size) than 
fixed-length encoding
+   * when the int array contains many small values. This demonstrates the 
space efficiency benefit
+   * of variable-length encoding for arrays with predominantly small values.
+   */
+  @Test
+  public void testVariableLengthIntArrayEncodingEfficiencyForSmallValues() {
+    // Create a Fory instance with fixed-length encoding (compressIntArray 
disabled)
+    Fory foryFixed =
+        
Fory.builder().requireClassRegistration(false).withIntArrayCompressed(false).build();
+
+    // Create a Fory instance with variable-length encoding (compressIntArray 
enabled)
+    Fory foryVariable =
+        
Fory.builder().requireClassRegistration(false).withIntArrayCompressed(true).build();
+
+    // Create an array with many small values (0-127, which can be encoded in 
1-2 bytes with varint)
+    int arraySize = 10000;
+    int[] smallValuesArray = new int[arraySize];
+    for (int i = 0; i < arraySize; i++) {
+      // Use values from 0 to 127, which benefit most from variable-length 
encoding
+      smallValuesArray[i] = i % 128;
+    }
+
+    // Serialize with fixed-length encoding (4 bytes per element)
+    byte[] fixedBytes = foryFixed.serialize(smallValuesArray);
+    int fixedSize = fixedBytes.length;
+
+    // Serialize with variable-length encoding (1-2 bytes per small element)
+    byte[] variableBytes = foryVariable.serialize(smallValuesArray);
+    int variableSize = variableBytes.length;
+
+    // Verify both can be deserialized correctly
+    int[] deserializedFixed = (int[]) foryFixed.deserialize(fixedBytes);
+    int[] deserializedVariable = (int[]) 
foryVariable.deserialize(variableBytes);
+    assertTrue(Arrays.equals(deserializedFixed, smallValuesArray));
+    assertTrue(Arrays.equals(deserializedVariable, smallValuesArray));
+
+    // Calculate efficiency metrics
+    int sizeDifference = fixedSize - variableSize;
+    double percentageReduction = 100.0 * sizeDifference / fixedSize;
+
+    System.out.printf(
+        "Array size: %d elements (values 0-127)%n"
+            + "Fixed-length encoding: %d bytes (%.2f bytes/element)%n"
+            + "Variable-length encoding: %d bytes (%.2f bytes/element)%n"
+            + "Space savings: %d bytes (%.2f%% reduction)%n",
+        arraySize,
+        fixedSize,
+        (double) fixedSize / arraySize,
+        variableSize,
+        (double) variableSize / arraySize,
+        sizeDifference,
+        percentageReduction);
+
+    // Verify that variable-length encoding produces smaller or equal size
+    // For arrays with many small values, variable-length should be 
significantly smaller
+    assertTrue(
+        variableSize < fixedSize,
+        String.format(
+            "Expected variable-length encoding (%d bytes) to be smaller than 
fixed-length (%d bytes) "
+                + "for array with many small values",
+            variableSize, fixedSize));
+
+    // Verify significant space savings (at least 50% reduction for small 
values)
+    // Fixed-length: 4 bytes per element + overhead
+    // Variable-length: 1-2 bytes per small element + overhead
+    // For values 0-127, we expect at least 50% reduction
+    assertTrue(
+        percentageReduction >= 50.0,
+        String.format(
+            "Expected at least 50%% size reduction for small values, but got 
%.2f%%",
+            percentageReduction));
+
+    // Test with slightly larger values (0-32767) to show variable-length 
still helps
+    int[] mediumValuesArray = new int[arraySize];
+    for (int i = 0; i < arraySize; i++) {
+      mediumValuesArray[i] = i % 32768;
+    }
+
+    byte[] fixedBytesMedium = foryFixed.serialize(mediumValuesArray);
+    byte[] variableBytesMedium = foryVariable.serialize(mediumValuesArray);
+    int fixedSizeMedium = fixedBytesMedium.length;
+    int variableSizeMedium = variableBytesMedium.length;
+
+    // Verify deserialization
+    int[] deserializedFixedMedium = (int[]) 
foryFixed.deserialize(fixedBytesMedium);
+    int[] deserializedVariableMedium = (int[]) 
foryVariable.deserialize(variableBytesMedium);
+    assertTrue(Arrays.equals(deserializedFixedMedium, mediumValuesArray));
+    assertTrue(Arrays.equals(deserializedVariableMedium, mediumValuesArray));
+
+    int sizeDifferenceMedium = fixedSizeMedium - variableSizeMedium;
+    double percentageReductionMedium = 100.0 * sizeDifferenceMedium / 
fixedSizeMedium;
+
+    System.out.printf(
+        "Array size: %d elements (values 0-32767)%n"
+            + "Fixed-length encoding: %d bytes%n"
+            + "Variable-length encoding: %d bytes%n"
+            + "Space savings: %d bytes (%.2f%% reduction)%n",
+        arraySize,
+        fixedSizeMedium,
+        variableSizeMedium,
+        sizeDifferenceMedium,
+        percentageReductionMedium);
+
+    // For medium values (0-32767), variable-length should still be smaller
+    assertTrue(
+        variableSizeMedium < fixedSizeMedium,
+        String.format(
+            "Expected variable-length encoding (%d bytes) to be smaller than 
fixed-length (%d bytes) "
+                + "for array with medium values",
+            variableSizeMedium, fixedSizeMedium));
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to