This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new fe3e5deebd9 Reduce buffer allocation when collecting value size
(#18012)
fe3e5deebd9 is described below
commit fe3e5deebd981dd40e873d3a0f934dd4e8fddde8
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Sat Mar 28 01:04:09 2026 -0700
Reduce buffer allocation when collecting value size (#18012)
Auto-merged by automated PR review bot.
---
.../local/io/util/FixedByteValueReaderWriter.java | 28 ++++++++++++
.../pinot/segment/local/io/util/ValueReader.java | 6 +++
.../local/io/util/VarLengthValueReader.java | 13 ++++++
.../writer/impl/MutableOffHeapByteArrayStore.java | 23 ++++++++++
.../converter/stats/MutableColumnStatistics.java | 43 +++++--------------
.../BigDecimalOffHeapMutableDictionary.java | 5 +++
.../BigDecimalOnHeapMutableDictionary.java | 8 +++-
.../dictionary/BytesOffHeapMutableDictionary.java | 5 +++
.../dictionary/BytesOnHeapMutableDictionary.java | 5 +++
.../dictionary/SameValueMutableDictionary.java | 5 +++
.../dictionary/StringOffHeapMutableDictionary.java | 5 +++
.../dictionary/StringOnHeapMutableDictionary.java | 6 +++
...IndexAndDictionaryBasedForwardIndexCreator.java | 50 +++-------------------
.../index/readers/BaseImmutableDictionary.java | 8 ++++
.../index/readers/BigDecimalDictionary.java | 5 +++
.../segment/index/readers/BytesDictionary.java | 5 +++
.../readers/ConstantValueBigDecimalDictionary.java | 13 ++++++
.../readers/ConstantValueBytesDictionary.java | 5 +++
.../readers/ConstantValueStringDictionary.java | 5 +++
.../index/readers/OnHeapBigDecimalDictionary.java | 5 +++
.../index/readers/OnHeapBytesDictionary.java | 10 +++++
.../index/readers/OnHeapStringDictionary.java | 5 +++
.../segment/index/readers/StringDictionary.java | 5 +++
.../PartitionIdVirtualColumnProvider.java | 15 ++++++-
.../io/util/VarLengthValueReaderWriterTest.java | 4 ++
.../index/readers/ImmutableDictionaryTest.java | 8 ++++
.../FixedByteValueReaderWriterTest.java | 22 ++++++++++
.../pinot/segment/spi/index/reader/Dictionary.java | 13 ++++--
.../converter/DictionaryToRawIndexConverter.java | 8 +---
29 files changed, 249 insertions(+), 89 deletions(-)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/FixedByteValueReaderWriter.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/FixedByteValueReaderWriter.java
index 2e85a81c1ae..7395efb4572 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/FixedByteValueReaderWriter.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/FixedByteValueReaderWriter.java
@@ -100,6 +100,34 @@ public final class FixedByteValueReaderWriter implements
ValueReader {
return value;
}
+ @Override
+ public int getUnpaddedByteSize(int index, int numBytesPerValue) {
+ // Based on the ZeroInWord algorithm:
http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+ long startOffset = (long) index * numBytesPerValue;
+ boolean littleEndian = _dataBuffer.order() == ByteOrder.LITTLE_ENDIAN;
+ int endIndex = numBytesPerValue & 0xFFFFFFF8;
+ int i = 0;
+ for (; i < endIndex; i += Long.BYTES) {
+ long word = _dataBuffer.getLong(startOffset + i);
+ long tmp = ~(((word & 0x7F7F7F7F7F7F7F7FL) + 0x7F7F7F7F7F7F7F7FL) | word
| 0x7F7F7F7F7F7F7F7FL);
+ if (tmp != 0) {
+ return i + ((littleEndian ? Long.numberOfTrailingZeros(tmp) :
Long.numberOfLeadingZeros(tmp)) >>> 3);
+ }
+ }
+ for (; i < numBytesPerValue; i++) {
+ byte b = _dataBuffer.getByte(startOffset + i);
+ if (b == 0) {
+ break;
+ }
+ }
+ return i;
+ }
+
+ @Override
+ public int getByteSize(int index, int numBytesPerValue) {
+ return numBytesPerValue;
+ }
+
@Override
public int compareUtf8Bytes(int index, int numBytesPerValue, byte[] bytes) {
long startOffset = (long) index * numBytesPerValue;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/ValueReader.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/ValueReader.java
index acf3d4fea2a..8478ba056ac 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/ValueReader.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/ValueReader.java
@@ -77,6 +77,12 @@ public interface ValueReader extends Closeable {
*/
byte[] getBytes(int index, int numBytesPerValue);
+ /// Applicable to STRING only.
+ int getUnpaddedByteSize(int index, int numBytesPerValue);
+
+ /// Applicable to variable sized types other than STRING, i.e. BIG_DECIMAL,
BYTES.
+ int getByteSize(int index, int numBytesPerValue);
+
/**
* NOTE: The passed in reusable buffer should have capacity of at least
{@code numBytesPerValue}.
*/
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/VarLengthValueReader.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/VarLengthValueReader.java
index 502d38cfaaa..1139d20fc6d 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/VarLengthValueReader.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/util/VarLengthValueReader.java
@@ -128,6 +128,19 @@ public class VarLengthValueReader implements ValueReader {
return value;
}
+ @Override
+ public int getUnpaddedByteSize(int index, int numBytesPerValue) {
+ return getByteSize(index, numBytesPerValue);
+ }
+
+ @Override
+ public int getByteSize(int index, int numBytesPerValue) {
+ int offsetPosition = _dataSectionStartOffSet + Integer.BYTES * index;
+ int startOffset = _dataBuffer.getInt(offsetPosition);
+ int endOffset = _dataBuffer.getInt(offsetPosition + Integer.BYTES);
+ return endOffset - startOffset;
+ }
+
@Override
public int compareUtf8Bytes(int index, int numBytesPerValue, byte[] bytes) {
int offsetPosition = _dataSectionStartOffSet + Integer.BYTES * index;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/MutableOffHeapByteArrayStore.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/MutableOffHeapByteArrayStore.java
index 8ddd6d6a87e..276f167ec97 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/MutableOffHeapByteArrayStore.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/MutableOffHeapByteArrayStore.java
@@ -146,6 +146,17 @@ public class MutableOffHeapByteArrayStore implements
Closeable {
return value;
}
+ private int getValueSize(int index) {
+ int startOffset = _pinotDataBuffer.getInt(index * Integer.BYTES);
+ int endOffset;
+ if (index != 0) {
+ endOffset = _pinotDataBuffer.getInt((index - 1) * Integer.BYTES);
+ } else {
+ endOffset = _size;
+ }
+ return endOffset - startOffset;
+ }
+
private int getSize() {
return _size;
}
@@ -216,6 +227,18 @@ public class MutableOffHeapByteArrayStore implements
Closeable {
throw new RuntimeException("dictionary ID '" + index + "' too low");
}
+ public int getValueSize(int index) {
+ List<Buffer> bufList = _buffers;
+ for (int x = bufList.size() - 1; x >= 0; x--) {
+ Buffer buffer = bufList.get(x);
+ if (index >= buffer.getStartIndex()) {
+ return buffer.getValueSize(index - buffer.getStartIndex());
+ }
+ }
+ // Assumed that we will never ask for an index that does not exist.
+ throw new RuntimeException("dictionary ID '" + index + "' too low");
+ }
+
// Adds a byte array and returns the index. No verification is made as to
whether the byte array already exists or not
public int add(byte[] value) {
int valueLength = value.length;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/stats/MutableColumnStatistics.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/stats/MutableColumnStatistics.java
index 9bb751e3dcd..ca1dc1fba43 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/stats/MutableColumnStatistics.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/stats/MutableColumnStatistics.java
@@ -19,7 +19,6 @@
package org.apache.pinot.segment.local.realtime.converter.stats;
import com.google.common.base.Preconditions;
-import com.google.common.base.Utf8;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
@@ -30,7 +29,6 @@ import
org.apache.pinot.segment.spi.index.mutable.MutableForwardIndex;
import org.apache.pinot.segment.spi.index.reader.Dictionary;
import org.apache.pinot.segment.spi.partition.PartitionFunction;
import org.apache.pinot.spi.data.FieldSpec.DataType;
-import org.apache.pinot.spi.utils.BigDecimalUtils;
/**
@@ -99,37 +97,16 @@ public class MutableColumnStatistics implements
ColumnStatistics {
if (storedType.isFixedWidth()) {
_minElementLength = storedType.size();
_maxElementLength = storedType.size();
- return;
- }
-
- // If the stored type is not fixed width, iterate over the dictionary to
find the min/max element length
- _minElementLength = Integer.MAX_VALUE;
- _maxElementLength = 0;
- int length = _dictionary.length();
- switch (storedType) {
- case BIG_DECIMAL:
- for (int i = 0; i < length; i++) {
- int elementLength =
BigDecimalUtils.byteSize(_dictionary.getBigDecimalValue(i));
- _minElementLength = Math.min(_minElementLength, elementLength);
- _maxElementLength = Math.max(_maxElementLength, elementLength);
- }
- break;
- case STRING:
- for (int i = 0; i < length; i++) {
- int elementLength =
Utf8.encodedLength(_dictionary.getStringValue(i));
- _minElementLength = Math.min(_minElementLength, elementLength);
- _maxElementLength = Math.max(_maxElementLength, elementLength);
- }
- break;
- case BYTES:
- for (int i = 0; i < length; i++) {
- int elementLength = _dictionary.getBytesValue(i).length;
- _minElementLength = Math.min(_minElementLength, elementLength);
- _maxElementLength = Math.max(_maxElementLength, elementLength);
- }
- break;
- default:
- throw new IllegalStateException("Unsupported stored type: " +
storedType);
+ } else {
+ // If the stored type is not fixed width, iterate over the dictionary to
find the min/max element length
+ _minElementLength = Integer.MAX_VALUE;
+ _maxElementLength = 0;
+ int length = _dictionary.length();
+ for (int i = 0; i < length; i++) {
+ int elementLength = _dictionary.getValueSize(i);
+ _minElementLength = Math.min(_minElementLength, elementLength);
+ _maxElementLength = Math.max(_maxElementLength, elementLength);
+ }
}
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOffHeapMutableDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOffHeapMutableDictionary.java
index 6e39aff860a..9eff5af67de 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOffHeapMutableDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOffHeapMutableDictionary.java
@@ -220,6 +220,11 @@ public class BigDecimalOffHeapMutableDictionary extends
BaseOffHeapMutableDictio
return getBigDecimalValue(dictId).toPlainString();
}
+ @Override
+ public int getValueSize(int dictId) {
+ return _byteStore.getValueSize(dictId);
+ }
+
@Override
protected void setValue(int dictId, Object value, byte[] serializedValue) {
_byteStore.add(serializedValue);
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOnHeapMutableDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOnHeapMutableDictionary.java
index ce1882903aa..6e45bc07d97 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOnHeapMutableDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BigDecimalOnHeapMutableDictionary.java
@@ -25,6 +25,7 @@ import java.math.BigDecimal;
import java.util.Arrays;
import org.apache.pinot.common.request.context.predicate.RangePredicate;
import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.utils.BigDecimalUtils;
@SuppressWarnings("Duplicates")
@@ -204,7 +205,12 @@ public class BigDecimalOnHeapMutableDictionary extends
BaseOnHeapMutableDictiona
@Override
public byte[] getBytesValue(int dictId) {
- return getBytesValue(dictId);
+ return BigDecimalUtils.serialize(getBigDecimalValue(dictId));
+ }
+
+ @Override
+ public int getValueSize(int dictId) {
+ return BigDecimalUtils.byteSize(getBigDecimalValue(dictId));
}
private void updateMinMax(BigDecimal value) {
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java
index b465860c474..2f9519c5b45 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java
@@ -196,6 +196,11 @@ public class BytesOffHeapMutableDictionary extends
BaseOffHeapMutableDictionary
return _byteStore.get(dictId);
}
+ @Override
+ public int getValueSize(int dictId) {
+ return _byteStore.getValueSize(dictId);
+ }
+
@Override
protected void setValue(int dictId, Object value, byte[] serializedValue) {
_byteStore.add(serializedValue);
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java
index a2bd8718276..ac25f855d55 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java
@@ -175,6 +175,11 @@ public class BytesOnHeapMutableDictionary extends
BaseOnHeapMutableDictionary {
return getByteArrayValue(dictId).getBytes();
}
+ @Override
+ public int getValueSize(int dictId) {
+ return getByteArrayValue(dictId).length();
+ }
+
@Override
public ByteArray getByteArrayValue(int dictId) {
return (ByteArray) super.get(dictId);
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/SameValueMutableDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/SameValueMutableDictionary.java
index a13f4661559..b94121a329d 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/SameValueMutableDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/SameValueMutableDictionary.java
@@ -123,6 +123,11 @@ public class SameValueMutableDictionary implements
MutableDictionary {
return _actualValue.toString();
}
+ @Override
+ public int getValueSize(int dictId) {
+ return _delegate.getValueSize(dictId);
+ }
+
@Override
public void close()
throws IOException {
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOffHeapMutableDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOffHeapMutableDictionary.java
index 9fab9aa8398..838723c0830 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOffHeapMutableDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOffHeapMutableDictionary.java
@@ -177,6 +177,11 @@ public class StringOffHeapMutableDictionary extends
BaseOffHeapMutableDictionary
return _byteStore.get(dictId);
}
+ @Override
+ public int getValueSize(int dictId) {
+ return _byteStore.getValueSize(dictId);
+ }
+
@Override
protected void setValue(int dictId, Object value, byte[] serializedValue) {
_byteStore.add(serializedValue);
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOnHeapMutableDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOnHeapMutableDictionary.java
index f8586f011fe..09fde55a585 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOnHeapMutableDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/dictionary/StringOnHeapMutableDictionary.java
@@ -18,6 +18,7 @@
*/
package org.apache.pinot.segment.local.realtime.impl.dictionary;
+import com.google.common.base.Utf8;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.ints.IntSets;
@@ -161,6 +162,11 @@ public class StringOnHeapMutableDictionary extends
BaseOnHeapMutableDictionary {
return getStringValue(dictId).getBytes(UTF_8);
}
+ @Override
+ public int getValueSize(int dictId) {
+ return Utf8.encodedLength(getStringValue(dictId));
+ }
+
private void updateMinMax(String value) {
if (_min == null) {
_min = value;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
index 066695a0cb2..4d45240dd62 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
@@ -19,7 +19,6 @@
package org.apache.pinot.segment.local.segment.index.loader;
import com.google.common.base.Preconditions;
-import com.google.common.base.Utf8;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
@@ -41,7 +40,6 @@ import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
import org.apache.pinot.segment.spi.store.SegmentDirectory;
import org.apache.pinot.segment.spi.utils.SegmentMetadataUtils;
import org.apache.pinot.spi.data.FieldSpec;
-import org.apache.pinot.spi.utils.BigDecimalUtils;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -263,7 +261,7 @@ public class
InvertedIndexAndDictionaryBasedForwardIndexCreator implements AutoC
int finalDictId = dictId;
docIdsBitmap.stream().forEach(docId ->
putInt(_forwardIndexValueBuffer, docId, finalDictId));
if (!isFixedWidth) {
- lengthOfLongestEntry = trackLengthOfLongestEntry(dictionary,
lengthOfLongestEntry, dictId);
+ lengthOfLongestEntry = Math.max(lengthOfLongestEntry,
dictionary.getValueSize(dictId));
}
}
@@ -315,7 +313,7 @@ public class
InvertedIndexAndDictionaryBasedForwardIndexCreator implements AutoC
});
if (!isFixedWidth) {
- lengthOfLongestEntry = trackLengthOfLongestEntry(dictionary,
lengthOfLongestEntry, dictId);
+ lengthOfLongestEntry = Math.max(lengthOfLongestEntry,
dictionary.getValueSize(dictId));
}
}
@@ -387,49 +385,11 @@ public class
InvertedIndexAndDictionaryBasedForwardIndexCreator implements AutoC
}
}
- private int trackLengthOfLongestEntry(Dictionary dictionary, int
lengthOfLongestEntry, int dictId) {
- int updatedLengthOfLongestEntry;
- switch (_storedType) {
- case STRING:
- updatedLengthOfLongestEntry =
Math.max(Utf8.encodedLength(dictionary.getStringValue(dictId)),
- lengthOfLongestEntry);
- break;
- case BYTES:
- updatedLengthOfLongestEntry =
Math.max(dictionary.getBytesValue(dictId).length, lengthOfLongestEntry);
- break;
- case BIG_DECIMAL:
- updatedLengthOfLongestEntry = Math.max(
- BigDecimalUtils.byteSize(dictionary.getBigDecimalValue(dictId)),
lengthOfLongestEntry);
- break;
- default:
- throw new IllegalStateException("Trying to calculate
lengthOfLongestEntry for invalid stored type: "
- + _storedType);
- }
- return updatedLengthOfLongestEntry;
- }
-
private void trackMaxRowLengthInBytes(Dictionary dictionary, int[]
maxRowLengthInBytes, int docId, int dictId) {
int curSizeOfRow = getInt(_forwardIndexMaxSizeBuffer, docId);
- switch (_storedType) {
- case STRING:
- int newSizeOfEntry =
Utf8.encodedLength(dictionary.getStringValue(dictId)) + curSizeOfRow;
- putInt(_forwardIndexMaxSizeBuffer, docId, newSizeOfEntry);
- maxRowLengthInBytes[0] = Math.max(newSizeOfEntry,
maxRowLengthInBytes[0]);
- break;
- case BYTES:
- newSizeOfEntry = dictionary.getBytesValue(dictId).length +
curSizeOfRow;
- putInt(_forwardIndexMaxSizeBuffer, docId, newSizeOfEntry);
- maxRowLengthInBytes[0] = Math.max(newSizeOfEntry,
maxRowLengthInBytes[0]);
- break;
- case BIG_DECIMAL:
- newSizeOfEntry =
BigDecimalUtils.byteSize(dictionary.getBigDecimalValue(dictId)) + curSizeOfRow;
- putInt(_forwardIndexMaxSizeBuffer, docId, newSizeOfEntry);
- maxRowLengthInBytes[0] = Math.max(newSizeOfEntry,
maxRowLengthInBytes[0]);
- break;
- default:
- throw new IllegalStateException("Trying to calculate
maxRowLengthInBytes for invalid stored type: "
- + _storedType);
- }
+ int newSizeOfEntry = dictionary.getValueSize(dictId) + curSizeOfRow;
+ putInt(_forwardIndexMaxSizeBuffer, docId, newSizeOfEntry);
+ maxRowLengthInBytes[0] = Math.max(maxRowLengthInBytes[0], newSizeOfEntry);
}
private void writeToForwardIndex(Dictionary dictionary, IndexCreationContext
context)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BaseImmutableDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BaseImmutableDictionary.java
index 4ba8895beaa..32a2f7aa172 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BaseImmutableDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BaseImmutableDictionary.java
@@ -278,6 +278,14 @@ public abstract class BaseImmutableDictionary implements
Dictionary {
return _valueReader.getBytes(dictId, _numBytesPerValue);
}
+ protected int getUnpaddedByteSize(int dictId) {
+ return _valueReader.getUnpaddedByteSize(dictId, _numBytesPerValue);
+ }
+
+ protected int getByteSize(int dictId) {
+ return _valueReader.getByteSize(dictId, _numBytesPerValue);
+ }
+
public int get32BitsMurmur3Hash(int dictId, byte[] buffer) {
return _valueReader.get32BitsMurmur3Hash(dictId, _numBytesPerValue,
buffer);
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BigDecimalDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BigDecimalDictionary.java
index 00efcc3b61e..7f0aca47938 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BigDecimalDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BigDecimalDictionary.java
@@ -98,6 +98,11 @@ public class BigDecimalDictionary extends
BaseImmutableDictionary {
return getBytes(dictId);
}
+ @Override
+ public int getValueSize(int dictId) {
+ return getByteSize(dictId);
+ }
+
@Override
public void read32BitsMurmur3HashValues(int[] dictIds, int length, int[]
outValues) {
byte[] buffer = getBuffer();
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BytesDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BytesDictionary.java
index db3401d3af5..30467e113b6 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BytesDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BytesDictionary.java
@@ -105,6 +105,11 @@ public class BytesDictionary extends
BaseImmutableDictionary {
return getBytes(dictId);
}
+ @Override
+ public int getValueSize(int dictId) {
+ return getByteSize(dictId);
+ }
+
@Override
public void read32BitsMurmur3HashValues(int[] dictIds, int length, int[]
outValues) {
byte[] buffer = getBuffer();
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBigDecimalDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBigDecimalDictionary.java
index 2e74bb5422a..cfa0bf337be 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBigDecimalDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBigDecimalDictionary.java
@@ -20,6 +20,7 @@ package org.apache.pinot.segment.local.segment.index.readers;
import java.math.BigDecimal;
import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.utils.BigDecimalUtils;
/**
@@ -27,10 +28,12 @@ import org.apache.pinot.spi.data.FieldSpec.DataType;
*/
public class ConstantValueBigDecimalDictionary extends BaseImmutableDictionary
{
private final BigDecimal _value;
+ private final byte[] _bytes;
public ConstantValueBigDecimalDictionary(BigDecimal value) {
super(1);
_value = value;
+ _bytes = BigDecimalUtils.serialize(_value);
}
@Override
@@ -110,4 +113,14 @@ public class ConstantValueBigDecimalDictionary extends
BaseImmutableDictionary {
public String getStringValue(int dictId) {
return _value.toPlainString();
}
+
+ @Override
+ public byte[] getBytesValue(int dictId) {
+ return _bytes;
+ }
+
+ @Override
+ public int getValueSize(int dictId) {
+ return _bytes.length;
+ }
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBytesDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBytesDictionary.java
index f6fa9e261d3..d08d835c041 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBytesDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueBytesDictionary.java
@@ -123,4 +123,9 @@ public class ConstantValueBytesDictionary extends
BaseImmutableDictionary {
public byte[] getBytesValue(int dictId) {
return _value;
}
+
+ @Override
+ public int getValueSize(int dictId) {
+ return _value.length;
+ }
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueStringDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueStringDictionary.java
index 270a8905d2f..4fe32efd536 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueStringDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/ConstantValueStringDictionary.java
@@ -117,6 +117,11 @@ public class ConstantValueStringDictionary extends
BaseImmutableDictionary {
return _bytes;
}
+ @Override
+ public int getValueSize(int dictId) {
+ return _bytes.length;
+ }
+
@Override
public void getDictIds(List<String> values, IntSet dictIds) {
if (values.contains(_value)) {
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBigDecimalDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBigDecimalDictionary.java
index 4642254ff3c..edc458a8327 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBigDecimalDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBigDecimalDictionary.java
@@ -101,4 +101,9 @@ public class OnHeapBigDecimalDictionary extends
BaseImmutableDictionary {
public byte[] getBytesValue(int dictId) {
return BigDecimalUtils.serialize(_dictIdToVal[dictId]);
}
+
+ @Override
+ public int getValueSize(int dictId) {
+ return BigDecimalUtils.byteSize(_dictIdToVal[dictId]);
+ }
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBytesDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBytesDictionary.java
index fe47b8360d9..9ce621c8c1a 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBytesDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapBytesDictionary.java
@@ -124,4 +124,14 @@ public class OnHeapBytesDictionary extends
BaseImmutableDictionary {
public byte[] getBytesValue(int dictId) {
return _dictIdToVal[dictId].getBytes();
}
+
+ @Override
+ public ByteArray getByteArrayValue(int dictId) {
+ return _dictIdToVal[dictId];
+ }
+
+ @Override
+ public int getValueSize(int dictId) {
+ return _dictIdToVal[dictId].length();
+ }
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapStringDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapStringDictionary.java
index 3a647112c55..0f6e52585e4 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapStringDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/OnHeapStringDictionary.java
@@ -128,4 +128,9 @@ public class OnHeapStringDictionary extends
BaseImmutableDictionary {
public byte[] getBytesValue(int dictId) {
return _unpaddedBytes[dictId];
}
+
+ @Override
+ public int getValueSize(int dictId) {
+ return _unpaddedBytes[dictId].length;
+ }
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/StringDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/StringDictionary.java
index 60f604660fb..ff1921a98b3 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/StringDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/StringDictionary.java
@@ -90,6 +90,11 @@ public class StringDictionary extends
BaseImmutableDictionary {
return getUnpaddedBytes(dictId, getBuffer());
}
+ @Override
+ public int getValueSize(int dictId) {
+ return getUnpaddedByteSize(dictId);
+ }
+
@Override
public void readIntValues(int[] dictIds, int length, int[] outValues) {
byte[] buffer = getBuffer();
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/virtualcolumn/PartitionIdVirtualColumnProvider.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/virtualcolumn/PartitionIdVirtualColumnProvider.java
index f44b91a2704..f8beac3baa9 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/virtualcolumn/PartitionIdVirtualColumnProvider.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/virtualcolumn/PartitionIdVirtualColumnProvider.java
@@ -18,7 +18,10 @@
*/
package org.apache.pinot.segment.local.segment.virtualcolumn;
+import com.google.common.base.Utf8;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
+import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -209,6 +212,16 @@ public class PartitionIdVirtualColumnProvider implements
VirtualColumnProvider {
return _values.get(dictId);
}
+ @Override
+ public byte[] getBytesValue(int dictId) {
+ return _values.get(dictId).getBytes(StandardCharsets.UTF_8);
+ }
+
+ @Override
+ public int getValueSize(int dictId) {
+ return Utf8.encodedLength(_values.get(dictId));
+ }
+
@Override
public int getIntValue(int dictId) {
throw new UnsupportedOperationException();
@@ -230,7 +243,7 @@ public class PartitionIdVirtualColumnProvider implements
VirtualColumnProvider {
}
@Override
- public java.math.BigDecimal getBigDecimalValue(int dictId) {
+ public BigDecimal getBigDecimalValue(int dictId) {
throw new UnsupportedOperationException();
}
}
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/io/util/VarLengthValueReaderWriterTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/io/util/VarLengthValueReaderWriterTest.java
index a7629569046..301db4b130b 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/io/util/VarLengthValueReaderWriterTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/io/util/VarLengthValueReaderWriterTest.java
@@ -83,6 +83,8 @@ public class VarLengthValueReaderWriterTest implements
PinotBuffersAfterMethodCh
byte[] buffer = new byte[MAX_STRING_LENGTH];
assertEquals(reader.getUnpaddedString(0, MAX_STRING_LENGTH, buffer),
value);
assertEquals(reader.getBytes(0, MAX_STRING_LENGTH), valueBytes);
+ assertEquals(reader.getByteSize(0, MAX_STRING_LENGTH),
valueBytes.length);
+ assertEquals(reader.getUnpaddedByteSize(0, MAX_STRING_LENGTH),
valueBytes.length);
}
}
}
@@ -111,6 +113,8 @@ public class VarLengthValueReaderWriterTest implements
PinotBuffersAfterMethodCh
for (int i = 0; i < NUM_VALUES; i++) {
assertEquals(reader.getUnpaddedString(i, MAX_STRING_LENGTH, buffer),
values[i]);
assertEquals(reader.getBytes(i, MAX_STRING_LENGTH),
valueBytesArray[i]);
+ assertEquals(reader.getByteSize(i, MAX_STRING_LENGTH),
valueBytesArray[i].length);
+ assertEquals(reader.getUnpaddedByteSize(i, MAX_STRING_LENGTH),
valueBytesArray[i].length);
}
}
}
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
index 220028c36bc..c78f62c3aa2 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readers/ImmutableDictionaryTest.java
@@ -18,6 +18,7 @@
*/
package org.apache.pinot.segment.local.segment.index.readers;
+import com.google.common.base.Utf8;
import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet;
import it.unimi.dsi.fastutil.floats.FloatOpenHashSet;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
@@ -205,6 +206,7 @@ public class ImmutableDictionaryTest implements
PinotBuffersAfterMethodCheckRule
assertEquals(intDictionary.getFloatValue(i), (float) _intValues[i]);
assertEquals(intDictionary.getDoubleValue(i), (double) _intValues[i]);
Assert.assertEquals(Integer.parseInt(intDictionary.getStringValue(i)),
_intValues[i]);
+ assertEquals(intDictionary.getValueSize(i), Integer.BYTES);
assertEquals(intDictionary.indexOf(String.valueOf(_intValues[i])), i);
@@ -242,6 +244,7 @@ public class ImmutableDictionaryTest implements
PinotBuffersAfterMethodCheckRule
assertEquals(longDictionary.getFloatValue(i), (float) _longValues[i]);
assertEquals(longDictionary.getDoubleValue(i), (double) _longValues[i]);
Assert.assertEquals(Long.parseLong(longDictionary.getStringValue(i)),
_longValues[i]);
+ assertEquals(longDictionary.getValueSize(i), Long.BYTES);
assertEquals(longDictionary.indexOf(String.valueOf(_longValues[i])), i);
@@ -279,6 +282,7 @@ public class ImmutableDictionaryTest implements
PinotBuffersAfterMethodCheckRule
assertEquals(floatDictionary.getFloatValue(i), _floatValues[i]);
assertEquals(floatDictionary.getDoubleValue(i), (double)
_floatValues[i]);
Assert.assertEquals(Float.parseFloat(floatDictionary.getStringValue(i)),
_floatValues[i], 0.0f);
+ assertEquals(floatDictionary.getValueSize(i), Float.BYTES);
assertEquals(floatDictionary.indexOf(String.valueOf(_floatValues[i])),
i);
@@ -316,6 +320,7 @@ public class ImmutableDictionaryTest implements
PinotBuffersAfterMethodCheckRule
assertEquals(doubleDictionary.getFloatValue(i), (float)
_doubleValues[i]);
assertEquals(doubleDictionary.getDoubleValue(i), _doubleValues[i]);
Assert.assertEquals(Double.parseDouble(doubleDictionary.getStringValue(i)),
_doubleValues[i], 0.0);
+ assertEquals(doubleDictionary.getValueSize(i), Double.BYTES);
assertEquals(doubleDictionary.indexOf(String.valueOf(_doubleValues[i])),
i);
@@ -356,6 +361,7 @@ public class ImmutableDictionaryTest implements
PinotBuffersAfterMethodCheckRule
assertEquals(bigDecimalDictionary.getDoubleValue(i),
_bigDecimalValues[i].doubleValue());
assertEquals(bigDecimalDictionary.getBigDecimalValue(i),
_bigDecimalValues[i]);
Assert.assertEquals(new
BigDecimal(bigDecimalDictionary.getStringValue(i)), _bigDecimalValues[i]);
+ assertEquals(bigDecimalDictionary.getValueSize(i),
BigDecimalUtils.byteSize(_bigDecimalValues[i]));
assertEquals(bigDecimalDictionary.indexOf(String.valueOf(_bigDecimalValues[i])),
i);
@@ -401,6 +407,7 @@ public class ImmutableDictionaryTest implements
PinotBuffersAfterMethodCheckRule
for (int i = 0; i < NUM_VALUES; i++) {
assertEquals(stringDictionary.get(i), _stringValues[i]);
assertEquals(stringDictionary.getStringValue(i), _stringValues[i]);
+ assertEquals(stringDictionary.getValueSize(i),
Utf8.encodedLength(_stringValues[i]));
assertEquals(stringDictionary.indexOf(_stringValues[i]), i);
@@ -448,6 +455,7 @@ public class ImmutableDictionaryTest implements
PinotBuffersAfterMethodCheckRule
assertEquals(bytesDictionary.get(i), _bytesValues[i].getBytes());
assertEquals(bytesDictionary.getStringValue(i),
_bytesValues[i].toHexString());
assertEquals(bytesDictionary.getBytesValue(i),
_bytesValues[i].getBytes());
+ assertEquals(bytesDictionary.getValueSize(i), BYTES_LENGTH);
assertEquals(bytesDictionary.indexOf(_bytesValues[i].toHexString()), i);
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java
index 5e47acf8bca..66c0804ebbe 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java
@@ -75,6 +75,28 @@ public class FixedByteValueReaderWriterTest implements
PinotBuffersAfterMethodCh
}
}
+ @Test(dataProvider = "params")
+ public void testGetValueSize(int maxStringLength, int configuredMaxLength,
ByteOrder byteOrder)
+ throws IOException {
+ byte[] bytes = new byte[configuredMaxLength];
+ try (PinotDataBuffer buffer =
PinotDataBuffer.allocateDirect(configuredMaxLength * 1000L, byteOrder,
+ "testGetValueSize")) {
+ FixedByteValueReaderWriter readerWriter = new
FixedByteValueReaderWriter(buffer);
+ List<Integer> lengths = new ArrayList<>(1000);
+ for (int i = 0; i < 1000; i++) {
+ int length = ThreadLocalRandom.current().nextInt(maxStringLength);
+ Arrays.fill(bytes, 0, length, (byte) 'a');
+ readerWriter.writeBytes(i, configuredMaxLength, bytes);
+ lengths.add(length);
+ Arrays.fill(bytes, 0, length, (byte) 0);
+ }
+ for (int i = 0; i < 1000; i++) {
+ assertEquals(readerWriter.getByteSize(i, configuredMaxLength),
configuredMaxLength);
+ assertEquals(readerWriter.getUnpaddedByteSize(i, configuredMaxLength),
(int) lengths.get(i));
+ }
+ }
+ }
+
@Test(dataProvider = "params")
public void testFixedByteValueReaderWriterNonAscii(int maxStringLength, int
configuredMaxLength, ByteOrder byteOrder)
throws IOException {
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/Dictionary.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/Dictionary.java
index 0f61d3947e1..12a93d2df48 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/Dictionary.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/Dictionary.java
@@ -192,9 +192,8 @@ public interface Dictionary extends IndexReader {
String getStringValue(int dictId);
- /**
- * NOTE: Should be overridden for STRING, BIG_DECIMAL and BYTES dictionary.
- */
+ /// Returns the bytes representation of the value.
+ /// Should be overridden for variable sized types, i.e. BIG_DECIMAL, STRING,
BYTES.
default byte[] getBytesValue(int dictId) {
throw new UnsupportedOperationException();
}
@@ -203,6 +202,14 @@ public interface Dictionary extends IndexReader {
return new ByteArray(getBytesValue(dictId));
}
+ /// Returns the size of the value in bytes.
+ /// Should be overridden for variable sized types, i.e. BIG_DECIMAL, STRING,
BYTES.
+ /// - For BIG_DECIMAL, returns the length of the serialized bytes
+ /// - For STRING, returns the length of the UTF_8 encoded bytes
+ default int getValueSize(int dictId) {
+ return getValueType().size();
+ }
+
default int get32BitsMurmur3HashValue(int dictId) {
return MurmurHashFunctions.murmurHash3X64Bit32(getBytesValue(dictId), 0);
}
diff --git
a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
index 35b87faa75d..c93b141f8d2 100644
---
a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
+++
b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
@@ -19,7 +19,6 @@
package org.apache.pinot.tools.segment.converter;
import com.google.common.base.Preconditions;
-import com.google.common.base.Utf8;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field;
@@ -316,7 +315,7 @@ public class DictionaryToRawIndexConverter {
ChunkCompressionType compressionType =
ChunkCompressionType.valueOf(_compressionType);
DataType storedType = dictionary.getValueType();
int numDocs = segment.getSegmentMetadata().getTotalDocs();
- int lengthOfLongestEntry = (storedType == DataType.STRING) ?
getLengthOfLongestEntry(dictionary) : -1;
+ int lengthOfLongestEntry = !storedType.isFixedWidth() ?
getLengthOfLongestEntry(dictionary) : -1;
try (ForwardIndexCreator rawIndexCreator =
ForwardIndexCreatorFactory.getRawIndexCreatorForSVColumn(newSegment,
compressionType, column, storedType, numDocs, lengthOfLongestEntry,
false,
@@ -386,13 +385,10 @@ public class DictionaryToRawIndexConverter {
*/
private int getLengthOfLongestEntry(Dictionary dictionary) {
int lengthOfLongestEntry = 0;
-
int length = dictionary.length();
for (int dictId = 0; dictId < length; dictId++) {
- String value = (String) dictionary.get(dictId);
- lengthOfLongestEntry = Math.max(lengthOfLongestEntry,
Utf8.encodedLength(value));
+ lengthOfLongestEntry = Math.max(lengthOfLongestEntry,
dictionary.getValueSize(dictId));
}
-
return lengthOfLongestEntry;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]