This is an automated email from the ASF dual-hosted git repository.
yashmayya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 24138821e2c Fix dictionary backward compatibility issue for empty
string columns (#18447)
24138821e2c is described below
commit 24138821e2c872b0d0384aa578b5a2b9a56a1ecc
Author: Yash Mayya <[email protected]>
AuthorDate: Fri May 8 17:25:56 2026 -0700
Fix dictionary backward compatibility issue for empty string columns
(#18447)
---
.../index/readers/ImmutableDictionaryTest.java | 48 ++++++++++++++++++++++
.../spi/index/metadata/ColumnMetadataImpl.java | 6 ++-
2 files changed, 53 insertions(+), 1 deletion(-)
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
index db4a4fd6a20..5df10389da3 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
@@ -30,14 +30,18 @@ import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
+import org.apache.commons.configuration2.PropertiesConfiguration;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.pinot.segment.local.PinotBuffersAfterMethodCheckRule;
import
org.apache.pinot.segment.local.segment.creator.impl.SegmentDictionaryCreator;
import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column;
+import org.apache.pinot.segment.spi.index.metadata.ColumnMetadataImpl;
import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.data.FieldSpec.FieldType;
import org.apache.pinot.spi.data.MetricFieldSpec;
import org.apache.pinot.spi.utils.BigDecimalUtils;
import org.apache.pinot.spi.utils.ByteArray;
@@ -466,6 +470,50 @@ public class ImmutableDictionaryTest implements
PinotBuffersAfterMethodCheckRule
}
}
+ /**
+ * Regression test for old segments (pre-1.6.0) that have a STRING column
with all-empty values:
+ * the segment lacks LENGTH_OF_LONGEST_ELEMENT (introduced in 1.6.0) and has
DICTIONARY_ELEMENT_SIZE=0
+ * (the longest entry length is zero when every entry is empty). The
metadata loader must canonicalize
+ * the longest-element length to 0 here; otherwise it leaves the field at
the UNAVAILABLE sentinel
+ * (-1), which propagates into StringDictionary.getBuffer() as `new
byte[-1]` and fails query
+ * execution with NegativeArraySizeException.
+ */
+ @Test
+ public void testStringDictionaryReadOnPre16OldSegmentMetadata()
+ throws Exception {
+ String columnName = "emptyStringCol";
+ // Build a real var-length string dictionary on disk holding a single
empty value.
+ try (SegmentDictionaryCreator dictCreator = new SegmentDictionaryCreator(
+ new DimensionFieldSpec(columnName, DataType.STRING, true), TEMP_DIR,
true)) {
+ dictCreator.build(new String[]{""});
+ assertEquals(dictCreator.getNumBytesPerEntry(), 0);
+ }
+
+ // Simulate pre-1.6.0 metadata: HAS_DICTIONARY=true,
DICTIONARY_ELEMENT_SIZE=0, no LENGTH_OF_LONGEST_ELEMENT.
+ PropertiesConfiguration config = new PropertiesConfiguration();
+ config.setProperty(Column.getKeyFor(columnName, Column.COLUMN_NAME),
columnName);
+ config.setProperty(Column.getKeyFor(columnName, Column.COLUMN_TYPE),
FieldType.DIMENSION.name());
+ config.setProperty(Column.getKeyFor(columnName, Column.DATA_TYPE),
DataType.STRING.name());
+ config.setProperty(Column.getKeyFor(columnName, Column.IS_SINGLE_VALUED),
true);
+ config.setProperty(Column.getKeyFor(columnName, Column.CARDINALITY), 1);
+ config.setProperty(Column.getKeyFor(columnName, Column.HAS_DICTIONARY),
true);
+ config.setProperty(Column.getKeyFor(columnName,
Column.DICTIONARY_ELEMENT_SIZE), 0);
+ // LENGTH_OF_LONGEST_ELEMENT intentionally NOT set.
+
+ ColumnMetadataImpl metadata =
ColumnMetadataImpl.fromPropertiesConfiguration(config, 1, columnName);
+
+ // Reproduce the production code path: DictionaryIndexType passes
metadata.getLengthOfLongestElement()
+ // as numBytesPerValue. Without the canonicalization fix this is -1, and
StringDictionary.readStringValues
+ // throws NegativeArraySizeException at
BaseImmutableDictionary.getBuffer().
+ try (PinotDataBuffer buffer = PinotDataBuffer.mapReadOnlyBigEndianFile(
+ new File(TEMP_DIR, columnName + V1Constants.Dict.FILE_EXTENSION));
+ StringDictionary dict = new StringDictionary(buffer, 1,
metadata.getLengthOfLongestElement())) {
+ String[] outValues = new String[1];
+ dict.readStringValues(new int[]{0}, 1, outValues);
+ assertEquals(outValues[0], "");
+ }
+ }
+
@AfterClass
public void tearDown() {
FileUtils.deleteQuietly(TEMP_DIR);
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
index 8ce7d53f820..45c423e66c5 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
@@ -571,7 +571,11 @@ public class ColumnMetadataImpl implements ColumnMetadata {
_lengthOfShortestElement = size;
_lengthOfLongestElement = size;
} else {
- if (_lengthOfLongestElement < 0 && _dictionaryElementSize > 0) {
+ // Pre-1.6.0 segments don't write LENGTH_OF_LONGEST_ELEMENT; fall back
to DICTIONARY_ELEMENT_SIZE,
+ // which has been written for dictionary-encoded columns since well
before 1.6.0 (including the
+ // zero-length case where every entry is an empty string). Leaving the
field at the UNAVAILABLE
+ // sentinel would propagate as `numBytesPerValue = -1` into
BaseImmutableDictionary.getBuffer().
+ if (_lengthOfLongestElement < 0 && _hasDictionary) {
_lengthOfLongestElement = _dictionaryElementSize;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]