Copilot commented on code in PR #18092:
URL: https://github.com/apache/pinot/pull/18092#discussion_r3032516175


##########
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/io/compression/ChunkTransformTest.java:
##########
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.compression;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+import org.apache.pinot.segment.spi.compression.ChunkTransform;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+/**
+ * Tests for {@link DeltaTransform} and {@link DoubleDeltaTransform}.
+ */
+public class ChunkTransformTest {
+
+  @Test
+  public void testDeltaEncodeDecodeInts() {
+    int[] values = {100, 105, 108, 120, 125, 130};
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaEncodeDecodeLongs() {
+    long[] values = {1000000L, 1000050L, 1000100L, 1000200L, 1000250L};
+    assertRoundTripLongs(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaEmptyInts() {
+    assertRoundTripInts(DeltaTransform.INSTANCE, new int[0]);
+  }
+
+  @Test
+  public void testDeltaSingleInt() {
+    assertRoundTripInts(DeltaTransform.INSTANCE, new int[]{42});
+  }
+
+  @Test
+  public void testDeltaSingleLong() {
+    assertRoundTripLongs(DeltaTransform.INSTANCE, new long[]{42L});
+  }
+
+  @Test
+  public void testDeltaNegativeValues() {
+    int[] values = {-100, -50, 0, 50, 100, -200};
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaOverflowInt() {
+    int[] values = {Integer.MAX_VALUE, Integer.MIN_VALUE, 0, 
Integer.MAX_VALUE};
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaOverflowLong() {
+    long[] values = {Long.MAX_VALUE, Long.MIN_VALUE, 0L, Long.MAX_VALUE};
+    assertRoundTripLongs(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaMonotonicInts() {
+    int[] values = new int[1000];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = i;
+    }
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaConstantInts() {
+    int[] values = new int[100];
+    java.util.Arrays.fill(values, 42);
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaRandomInts() {
+    Random random = new Random(12345);
+    int[] values = new int[500];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = random.nextInt();
+    }
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaRandomLongs() {
+    Random random = new Random(12345);
+    long[] values = new long[500];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = random.nextLong();
+    }
+    assertRoundTripLongs(DeltaTransform.INSTANCE, values);
+  }
+
+  // --- Double-delta tests ---
+
+  @Test
+  public void testDoubleDeltaEncodeDecodeInts() {
+    int[] values = {100, 110, 120, 130, 140, 150};
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaEncodeDecodeLongs() {
+    long[] values = {1000L, 1100L, 1200L, 1300L, 1400L};
+    assertRoundTripLongs(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaEmptyInts() {
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, new int[0]);
+  }
+
+  @Test
+  public void testDoubleDeltaSingleInt() {
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, new int[]{42});
+  }
+
+  @Test
+  public void testDoubleDeltaTwoInts() {
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, new int[]{10, 20});
+  }
+
+  @Test
+  public void testDoubleDeltaTwoLongs() {
+    assertRoundTripLongs(DoubleDeltaTransform.INSTANCE, new long[]{10L, 20L});
+  }
+
+  @Test
+  public void testDoubleDeltaOverflowInt() {
+    int[] values = {Integer.MAX_VALUE, Integer.MIN_VALUE, 0, 
Integer.MAX_VALUE};
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaOverflowLong() {
+    long[] values = {Long.MAX_VALUE, Long.MIN_VALUE, 0L, Long.MAX_VALUE};
+    assertRoundTripLongs(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaConstantStep() {
+    // Perfect constant step: double-deltas should all be 0
+    int[] values = new int[100];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = 1000 + i * 60;
+    }
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaRandomInts() {
+    Random random = new Random(67890);
+    int[] values = new int[500];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = random.nextInt();
+    }
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaRandomLongs() {
+    Random random = new Random(67890);
+    long[] values = new long[500];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = random.nextLong();
+    }
+    assertRoundTripLongs(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  // --- Helpers ---
+
+  private void assertRoundTripInts(ChunkTransform transform, int[] original) {
+    if (original.length == 0) {
+      return;
+    }
+    ByteBuffer buffer = ByteBuffer.allocate(original.length * Integer.BYTES);
+    for (int v : original) {
+      buffer.putInt(v);
+    }
+    buffer.flip();
+
+    int numBytes = original.length * Integer.BYTES;
+    transform.encode(buffer, numBytes, Integer.BYTES);
+    transform.decode(buffer, numBytes, Integer.BYTES);
+
+    buffer.position(0);
+    for (int i = 0; i < original.length; i++) {
+      assertEquals(buffer.getInt(), original[i], "Mismatch at index " + i);
+    }
+  }
+
+  private void assertRoundTripLongs(ChunkTransform transform, long[] original) 
{
+    if (original.length == 0) {
+      return;
+    }

Review Comment:
   Same issue as the int helper: returning early on `original.length == 0` 
means the empty-long test cases never call `encode/decode`. Consider running 
encode/decode with `numBytes=0` on an empty buffer (or otherwise asserting 
no-throw) to make the empty-long tests meaningful.



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/compression/DoubleDeltaTransform.java:
##########
@@ -0,0 +1,185 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.compression;
+
+import java.nio.ByteBuffer;
+import org.apache.pinot.segment.spi.compression.ChunkTransform;
+
+
+/**
+ * Double-delta (delta-of-delta) transform for numeric values. Stores the 
first value as-is,
+ * the first delta as-is, then each subsequent value as the difference between 
consecutive deltas.
+ * Operates in-place on the ByteBuffer.
+ *
+ * <p>Effective for data with constant or near-constant step sizes (e.g., 
fixed-interval timestamps).</p>
+ */
+public class DoubleDeltaTransform implements ChunkTransform {
+
+  public static final DoubleDeltaTransform INSTANCE = new 
DoubleDeltaTransform();
+
+  private DoubleDeltaTransform() {
+  }
+
+  @Override
+  public void encode(ByteBuffer buffer, int numBytes, int valueSizeInBytes) {
+    if (valueSizeInBytes == Integer.BYTES) {
+      encodeInts(buffer, numBytes);
+    } else {
+      encodeLongs(buffer, numBytes);
+    }
+  }
+
+  @Override
+  public void decode(ByteBuffer buffer, int numBytes, int valueSizeInBytes) {
+    if (valueSizeInBytes == Integer.BYTES) {
+      decodeInts(buffer, numBytes);
+    } else {
+      decodeLongs(buffer, numBytes);
+    }
+  }
+
+  private void encodeInts(ByteBuffer buffer, int numBytes) {
+    int numValues = numBytes / Integer.BYTES;
+    if (numValues <= 2) {
+      // For 0, 1, or 2 values, just apply regular delta (first value stays, 
second becomes delta)
+      if (numValues == 2) {
+        int pos = buffer.position();
+        int v0 = buffer.getInt(pos);
+        int v1 = buffer.getInt(pos + Integer.BYTES);
+        buffer.putInt(pos + Integer.BYTES, v1 - v0);
+      }
+      return;
+    }
+    int pos = buffer.position();
+
+    // Encode in reverse to avoid overwriting values we still need.
+    // First pass: compute all original values we need (they're in the buffer).
+    // We encode from the end to index 2, then handle index 1.

Review Comment:
   The inline comment says “Encode in reverse … from the end to index 2”, but 
the implementation encodes in forward order (`for (int i = 2; i < numValues; 
i++)`). Please update the comment to match the actual algorithm to avoid 
confusion during future maintenance.
   ```suggestion
       // Encode in forward order, keeping the previous original value and 
delta in local variables
       // so we can safely overwrite each position in-place as we advance from 
index 2 onward.
   ```



##########
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/io/compression/ChunkTransformTest.java:
##########
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.compression;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+import org.apache.pinot.segment.spi.compression.ChunkTransform;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+/**
+ * Tests for {@link DeltaTransform} and {@link DoubleDeltaTransform}.
+ */
+public class ChunkTransformTest {
+
+  @Test
+  public void testDeltaEncodeDecodeInts() {
+    int[] values = {100, 105, 108, 120, 125, 130};
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaEncodeDecodeLongs() {
+    long[] values = {1000000L, 1000050L, 1000100L, 1000200L, 1000250L};
+    assertRoundTripLongs(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaEmptyInts() {
+    assertRoundTripInts(DeltaTransform.INSTANCE, new int[0]);
+  }
+
+  @Test
+  public void testDeltaSingleInt() {
+    assertRoundTripInts(DeltaTransform.INSTANCE, new int[]{42});
+  }
+
+  @Test
+  public void testDeltaSingleLong() {
+    assertRoundTripLongs(DeltaTransform.INSTANCE, new long[]{42L});
+  }
+
+  @Test
+  public void testDeltaNegativeValues() {
+    int[] values = {-100, -50, 0, 50, 100, -200};
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaOverflowInt() {
+    int[] values = {Integer.MAX_VALUE, Integer.MIN_VALUE, 0, 
Integer.MAX_VALUE};
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaOverflowLong() {
+    long[] values = {Long.MAX_VALUE, Long.MIN_VALUE, 0L, Long.MAX_VALUE};
+    assertRoundTripLongs(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaMonotonicInts() {
+    int[] values = new int[1000];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = i;
+    }
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaConstantInts() {
+    int[] values = new int[100];
+    java.util.Arrays.fill(values, 42);
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaRandomInts() {
+    Random random = new Random(12345);
+    int[] values = new int[500];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = random.nextInt();
+    }
+    assertRoundTripInts(DeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDeltaRandomLongs() {
+    Random random = new Random(12345);
+    long[] values = new long[500];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = random.nextLong();
+    }
+    assertRoundTripLongs(DeltaTransform.INSTANCE, values);
+  }
+
+  // --- Double-delta tests ---
+
+  @Test
+  public void testDoubleDeltaEncodeDecodeInts() {
+    int[] values = {100, 110, 120, 130, 140, 150};
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaEncodeDecodeLongs() {
+    long[] values = {1000L, 1100L, 1200L, 1300L, 1400L};
+    assertRoundTripLongs(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaEmptyInts() {
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, new int[0]);
+  }
+
+  @Test
+  public void testDoubleDeltaSingleInt() {
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, new int[]{42});
+  }
+
+  @Test
+  public void testDoubleDeltaTwoInts() {
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, new int[]{10, 20});
+  }
+
+  @Test
+  public void testDoubleDeltaTwoLongs() {
+    assertRoundTripLongs(DoubleDeltaTransform.INSTANCE, new long[]{10L, 20L});
+  }
+
+  @Test
+  public void testDoubleDeltaOverflowInt() {
+    int[] values = {Integer.MAX_VALUE, Integer.MIN_VALUE, 0, 
Integer.MAX_VALUE};
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaOverflowLong() {
+    long[] values = {Long.MAX_VALUE, Long.MIN_VALUE, 0L, Long.MAX_VALUE};
+    assertRoundTripLongs(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaConstantStep() {
+    // Perfect constant step: double-deltas should all be 0
+    int[] values = new int[100];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = 1000 + i * 60;
+    }
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaRandomInts() {
+    Random random = new Random(67890);
+    int[] values = new int[500];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = random.nextInt();
+    }
+    assertRoundTripInts(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  @Test
+  public void testDoubleDeltaRandomLongs() {
+    Random random = new Random(67890);
+    long[] values = new long[500];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = random.nextLong();
+    }
+    assertRoundTripLongs(DoubleDeltaTransform.INSTANCE, values);
+  }
+
+  // --- Helpers ---
+
+  private void assertRoundTripInts(ChunkTransform transform, int[] original) {
+    if (original.length == 0) {
+      return;
+    }

Review Comment:
   The empty-array test cases are currently no-ops because this helper returns 
early when `original.length == 0`, so `encode/decode` never run. Consider 
exercising the transform on an empty buffer (numBytes=0) and asserting it 
doesn’t throw, so `testDeltaEmptyInts`/`testDoubleDeltaEmptyInts` actually 
validate behavior.



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/forward/ForwardIndexType.java:
##########
@@ -113,6 +113,8 @@ private void validateForwardIndexEnabled(ForwardIndexConfig 
forwardIndexConfig,
     if (dictionaryConfig.isEnabled()) {
       Preconditions.checkState(compressionCodec == null || 
compressionCodec.isApplicableToDictEncodedIndex(),
           "Compression codec: %s is not applicable to dictionary encoded 
column: %s", compressionCodec, column);
+      Preconditions.checkState(forwardIndexConfig.getTransformCodec() == null,
+          "Transform codec is not supported for dictionary-encoded column: 
%s", column);

Review Comment:
   In dictionary-encoded columns, validation currently requires 
`forwardIndexConfig.getTransformCodec() == null`, which will also reject an 
explicitly configured `transformCodec: NONE`. Since NONE is semantically 
equivalent to “no transform”, this should allow either null or NONE (e.g., 
check `transformCodec == null || transformCodec == TransformCodec.NONE`).



##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/compression/TransformCodec.java:
##########
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.compression;
+
+/**
+ * Enum representing reversible numeric transforms that can be applied to 
chunk data before compression.
+ * Unlike {@link ChunkCompressionType}, which defines byte-level compression 
algorithms, TransformCodec
+ * operates on typed numeric values (INT/LONG) to reduce entropy before the 
compression stage.
+ *
+ * <p>The transform is applied per-chunk: each chunk is independently 
transformed and compressed.
+ * On read, the reverse transform is applied after decompression.</p>
+ *
+ * <p>Supported in V1 for single-value INT and LONG RAW forward index columns 
only.</p>
+ */
+public enum TransformCodec {
+
+  /** No transform applied. */
+  NONE(0),
+
+  /**
+   * Delta encoding: stores the first value as-is, then each subsequent value 
as the difference
+   * from the previous value. Effective for monotonically increasing data 
(e.g., timestamps).
+   */
+  DELTA(1),
+
+  /**
+   * Double-delta encoding: stores the first value as-is, the first delta 
as-is, then each
+   * subsequent value as the difference of consecutive deltas. Effective for 
data with constant
+   * or near-constant step sizes (e.g., fixed-interval timestamps).
+   */
+  DOUBLE_DELTA(2);
+
+  private static final TransformCodec[] VALUES = values();
+
+  private final int _value;
+
+  TransformCodec(int value) {
+    _value = value;
+  }
+
+  public int getValue() {
+    return _value;
+  }
+
+  public static TransformCodec valueOf(int ordinal) {
+    if (ordinal < 0 || ordinal >= VALUES.length) {
+      throw new IllegalArgumentException("Invalid TransformCodec ordinal: " + 
ordinal);
+    }
+    return VALUES[ordinal];
+  }

Review Comment:
   `TransformCodec.valueOf(int)` currently indexes into `values()` using the 
provided int, but the header stores `TransformCodec.getValue()`. This is only 
correct as long as `_value` always matches the enum ordinal; reordering or 
introducing non-sequential values would silently corrupt decoding. Consider 
decoding by the explicit `_value` instead of the ordinal (e.g., precomputed 
lookup by value).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to