This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 4442d4199e Fix range index on raw TIMESTAMP column (#15589)
4442d4199e is described below
commit 4442d4199e843451896ca31c7878aca2596bd2f7
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Fri Apr 18 18:28:38 2025 -0600
Fix range index on raw TIMESTAMP column (#15589)
---
.../pinot/core/data/manager/TableIndexingTest.java | 19 ++++------
.../src/test/resources/TableIndexingTest.csv | 42 ++++++++++++++++------
.../impl/inv/BitSlicedRangeIndexCreator.java | 15 ++++----
.../creator/impl/inv/RangeIndexCreator.java | 2 +-
.../loader/invertedindex/RangeIndexHandler.java | 4 +--
.../local/segment/index/range/RangeIndexType.java | 13 +++----
.../creator/CombinedInvertedIndexCreator.java | 27 +++++++++-----
7 files changed, 74 insertions(+), 48 deletions(-)
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java
index 6def6bc101..90b7e4c0c4 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java
@@ -149,7 +149,7 @@ public class TableIndexingTest {
String schemaName = enc + "_" + cardType + "_" + dataType;
TestCase testCase = _testCaseMap.get(new TestCase(schemaName, -1,
indexType));
if (testCase == null) {
- throw new AssertionError("Expected testCase not found: " + testCase);
+ Assert.fail("Expected testCase not found: " + schemaName);
} else {
testCase._expectedSuccess = Boolean.valueOf(result);
testCase._expectedMessage = error;
@@ -164,19 +164,13 @@ public class TableIndexingTest {
}
for (String encoding : List.of("raw", "dict")) {
- if (type == DataType.BOOLEAN && "dict".equals(encoding)) {
- // pinot doesn't support dictionary encoding for boolean type
- continue;
- }
-
if (type == DataType.TIMESTAMP) {
//create separate tables for all data types
_schemas.add(new Schema.SchemaBuilder().setSchemaName(encoding +
"_sv_" + type.name())
.addDateTime(COLUMN_NAME, type, "1:MILLISECONDS:TIMESTAMP",
"1:MILLISECONDS")
.build());
-
_schemas.add(new Schema.SchemaBuilder().setSchemaName(encoding +
"_mv_" + type.name())
- .addDateTime(COLUMN_NAME, type, "1:MILLISECONDS:TIMESTAMP",
"1:MILLISECONDS")
+ .addMultiValueDimension(COLUMN_NAME, type)
.build());
} else {
_schemas.add(new Schema.SchemaBuilder().setSchemaName(encoding +
"_sv_" + type.name())
@@ -509,13 +503,12 @@ public class TableIndexingTest {
}
if (testCase._expectedSuccess == null) {
- throw new AssertionError("No expected status found for test case: " +
testCase);
+ Assert.fail("No expected status found for test case: " + testCase);
} else if (testCase._expectedSuccess && testCase._error != null) {
- throw new AssertionError("Expected success for test case: " + testCase +
" but got error: " + testCase._error);
+ Assert.fail("Expected success for test case: " + testCase + " but got
error: " + testCase._error);
} else if (!testCase._expectedSuccess &&
!testCase.getErrorMessage().equals(testCase._expectedMessage)) {
- throw new AssertionError(
- "Expected error: \"" + testCase._expectedMessage + "\" for test
case: " + testCase + " but got: \""
- + testCase.getErrorMessage() + " \"");
+ Assert.fail("Expected error: \"" + testCase._expectedMessage + "\" for
test case: " + testCase + " but got: \""
+ + testCase.getErrorMessage() + "\"");
}
}
diff --git a/pinot-core/src/test/resources/TableIndexingTest.csv
b/pinot-core/src/test/resources/TableIndexingTest.csv
index d3b046a908..9ad8a25c30 100644
--- a/pinot-core/src/test/resources/TableIndexingTest.csv
+++ b/pinot-core/src/test/resources/TableIndexingTest.csv
@@ -205,7 +205,7 @@ BOOLEAN;sv;raw;inverted_index;false;Cannot create inverted
index for raw index c
BOOLEAN;sv;raw;json_index;false;Json index is currently only supported on
STRING columns
BOOLEAN;sv;raw;native_text_index;false;Cannot create text index on column:
col, it can only be applied to string columns
BOOLEAN;sv;raw;text_index;false;Cannot create text index on column: col, it
can only be applied to string columns
-BOOLEAN;sv;raw;range_index;false;Unsupported data type BOOLEAN for range index
+BOOLEAN;sv;raw;range_index;true;
BOOLEAN;sv;raw;startree_index;false;Dimension: col does not have dictionary
BOOLEAN;sv;raw;vector_index;false;Vector index is currently only supported on
float array columns
BOOLEAN;mv;raw;timestamp_index;false;Caught exception while reading data
@@ -216,9 +216,31 @@ BOOLEAN;mv;raw;inverted_index;false;Cannot create inverted
index for raw index c
BOOLEAN;mv;raw;json_index;false;Json index is currently only supported on
single-value columns
BOOLEAN;mv;raw;native_text_index;false;Cannot create text index on column:
col, it can only be applied to string columns
BOOLEAN;mv;raw;text_index;false;Cannot create text index on column: col, it
can only be applied to string columns
-BOOLEAN;mv;raw;range_index;false;Range index is not supported for columns of
data type:BOOLEAN
+BOOLEAN;mv;raw;range_index;true;
BOOLEAN;mv;raw;startree_index;false;Column Name col defined in StarTreeIndex
Config must be a single value column
BOOLEAN;mv;raw;vector_index;false;Vector index is currently only supported on
float array columns
+BOOLEAN;sv;dict;timestamp_index;true;
+BOOLEAN;sv;dict;bloom_filter;false;Cannot create a bloom filter on boolean
column col
+BOOLEAN;sv;dict;fst_index;false;Cannot create FST index on column: col, it can
only be applied to dictionary encoded single value string columns
+BOOLEAN;sv;dict;h3_index;false;H3 index is currently only supported on BYTES
columns
+BOOLEAN;sv;dict;inverted_index;true;
+BOOLEAN;sv;dict;json_index;false;Json index is currently only supported on
STRING columns
+BOOLEAN;sv;dict;native_text_index;false;Cannot create text index on column:
col, it can only be applied to string columns
+BOOLEAN;sv;dict;text_index;false;Cannot create text index on column: col, it
can only be applied to string columns
+BOOLEAN;sv;dict;range_index;true;
+BOOLEAN;sv;dict;startree_index;true;
+BOOLEAN;sv;dict;vector_index;false;Vector index is currently only supported on
float array columns
+BOOLEAN;mv;dict;timestamp_index;false;Caught exception while reading data
+BOOLEAN;mv;dict;bloom_filter;false;Cannot create a bloom filter on boolean
column col
+BOOLEAN;mv;dict;fst_index;false;Cannot create FST index on column: col, it can
only be applied to dictionary encoded single value string columns
+BOOLEAN;mv;dict;h3_index;false;H3 index is currently only supported on
single-value columns
+BOOLEAN;mv;dict;inverted_index;true;
+BOOLEAN;mv;dict;json_index;false;Json index is currently only supported on
single-value columns
+BOOLEAN;mv;dict;native_text_index;false;Cannot create text index on column:
col, it can only be applied to string columns
+BOOLEAN;mv;dict;text_index;false;Cannot create text index on column: col, it
can only be applied to string columns
+BOOLEAN;mv;dict;range_index;true;
+BOOLEAN;mv;dict;startree_index;false;Column Name col defined in StarTreeIndex
Config must be a single value column
+BOOLEAN;mv;dict;vector_index;false;Vector index is currently only supported on
float array columns
TIMESTAMP;sv;raw;timestamp_index;true;
TIMESTAMP;sv;raw;bloom_filter;true;
TIMESTAMP;sv;raw;fst_index;false;Cannot create FST index on column: col, it
can only be applied to dictionary encoded single value string columns
@@ -227,19 +249,19 @@ TIMESTAMP;sv;raw;inverted_index;false;Cannot create
inverted index for raw index
TIMESTAMP;sv;raw;json_index;false;Json index is currently only supported on
STRING columns
TIMESTAMP;sv;raw;native_text_index;false;Cannot create text index on column:
col, it can only be applied to string columns
TIMESTAMP;sv;raw;text_index;false;Cannot create text index on column: col, it
can only be applied to string columns
-TIMESTAMP;sv;raw;range_index;false;Unsupported data type TIMESTAMP for range
index
+TIMESTAMP;sv;raw;range_index;true;
TIMESTAMP;sv;raw;startree_index;false;Dimension: col does not have dictionary
TIMESTAMP;sv;raw;vector_index;false;Vector index is currently only supported
on float array columns
TIMESTAMP;mv;raw;timestamp_index;true;
TIMESTAMP;mv;raw;bloom_filter;true;
TIMESTAMP;mv;raw;fst_index;false;Cannot create FST index on column: col, it
can only be applied to dictionary encoded single value string columns
-TIMESTAMP;mv;raw;h3_index;false;H3 index is currently only supported on BYTES
columns
+TIMESTAMP;mv;raw;h3_index;false;H3 index is currently only supported on
single-value columns
TIMESTAMP;mv;raw;inverted_index;false;Cannot create inverted index for raw
index column: col
-TIMESTAMP;mv;raw;json_index;false;Json index is currently only supported on
STRING columns
+TIMESTAMP;mv;raw;json_index;false;Json index is currently only supported on
single-value columns
TIMESTAMP;mv;raw;native_text_index;false;Cannot create text index on column:
col, it can only be applied to string columns
TIMESTAMP;mv;raw;text_index;false;Cannot create text index on column: col, it
can only be applied to string columns
-TIMESTAMP;mv;raw;range_index;false;Unsupported data type TIMESTAMP for range
index
-TIMESTAMP;mv;raw;startree_index;false;Dimension: col does not have dictionary
+TIMESTAMP;mv;raw;range_index;true;
+TIMESTAMP;mv;raw;startree_index;false;Column Name col defined in StarTreeIndex
Config must be a single value column
TIMESTAMP;mv;raw;vector_index;false;Vector index is currently only supported
on float array columns
TIMESTAMP;sv;dict;timestamp_index;true;
TIMESTAMP;sv;dict;bloom_filter;true;
@@ -255,13 +277,13 @@ TIMESTAMP;sv;dict;vector_index;false;Vector index is
currently only supported on
TIMESTAMP;mv;dict;timestamp_index;true;
TIMESTAMP;mv;dict;bloom_filter;true;
TIMESTAMP;mv;dict;fst_index;false;Cannot create FST index on column: col, it
can only be applied to dictionary encoded single value string columns
-TIMESTAMP;mv;dict;h3_index;false;H3 index is currently only supported on BYTES
columns
+TIMESTAMP;mv;dict;h3_index;false;H3 index is currently only supported on
single-value columns
TIMESTAMP;mv;dict;inverted_index;true;
-TIMESTAMP;mv;dict;json_index;false;Json index is currently only supported on
STRING columns
+TIMESTAMP;mv;dict;json_index;false;Json index is currently only supported on
single-value columns
TIMESTAMP;mv;dict;native_text_index;false;Cannot create text index on column:
col, it can only be applied to string columns
TIMESTAMP;mv;dict;text_index;false;Cannot create text index on column: col, it
can only be applied to string columns
TIMESTAMP;mv;dict;range_index;true;
-TIMESTAMP;mv;dict;startree_index;true;
+TIMESTAMP;mv;dict;startree_index;false;Column Name col defined in
StarTreeIndex Config must be a single value column
TIMESTAMP;mv;dict;vector_index;false;Vector index is currently only supported
on float array columns
STRING;sv;raw;timestamp_index;false;Caught exception while reading data
STRING;sv;raw;bloom_filter;true;
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitSlicedRangeIndexCreator.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitSlicedRangeIndexCreator.java
index 296272190a..b9b008fe30 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitSlicedRangeIndexCreator.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitSlicedRangeIndexCreator.java
@@ -24,6 +24,7 @@ import java.io.IOException;
import org.apache.pinot.segment.local.utils.FPOrdering;
import org.apache.pinot.segment.spi.index.creator.CombinedInvertedIndexCreator;
import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.roaringbitmap.RangeBitmap;
import static
org.apache.pinot.segment.spi.V1Constants.Indexes.BITMAP_RANGE_INDEX_FILE_EXTENSION;
@@ -40,10 +41,10 @@ public class BitSlicedRangeIndexCreator implements
CombinedInvertedIndexCreator
private final RangeBitmap.Appender _appender;
private final File _rangeIndexFile;
private final long _minValue;
- private final FieldSpec.DataType _valueType;
+ private final DataType _valueType;
private BitSlicedRangeIndexCreator(File indexDir, FieldSpec fieldSpec, long
minValue, long maxValue,
- FieldSpec.DataType valueType) {
+ DataType valueType) {
Preconditions.checkArgument(fieldSpec.isSingleValueField(), "MV columns
not supported");
_rangeIndexFile = new File(indexDir, fieldSpec.getName() +
BITMAP_RANGE_INDEX_FILE_EXTENSION);
_appender = RangeBitmap.appender(maxValue);
@@ -58,7 +59,7 @@ public class BitSlicedRangeIndexCreator implements
CombinedInvertedIndexCreator
* @param cardinality the cardinality of the dictionary
*/
public BitSlicedRangeIndexCreator(File indexDir, FieldSpec fieldSpec, int
cardinality) {
- this(indexDir, fieldSpec, 0, cardinality - 1, fieldSpec.getDataType());
+ this(indexDir, fieldSpec, 0, cardinality - 1,
fieldSpec.getDataType().getStoredType());
}
/**
@@ -71,11 +72,11 @@ public class BitSlicedRangeIndexCreator implements
CombinedInvertedIndexCreator
public BitSlicedRangeIndexCreator(File indexDir, FieldSpec fieldSpec,
Comparable<?> minValue,
Comparable<?> maxValue) {
this(indexDir, fieldSpec, minValue(fieldSpec, minValue),
maxValue(fieldSpec, minValue, maxValue),
- fieldSpec.getDataType());
+ fieldSpec.getDataType().getStoredType());
}
@Override
- public FieldSpec.DataType getDataType() {
+ public DataType getValueType() {
return _valueType;
}
@@ -140,7 +141,7 @@ public class BitSlicedRangeIndexCreator implements
CombinedInvertedIndexCreator
}
private static long maxValue(FieldSpec fieldSpec, Comparable<?> minValue,
Comparable<?> maxValue) {
- FieldSpec.DataType storedType = fieldSpec.getDataType().getStoredType();
+ DataType storedType = fieldSpec.getDataType().getStoredType();
if (storedType == INT || storedType == LONG) {
return ((Number) maxValue).longValue() - ((Number) minValue).longValue();
}
@@ -154,7 +155,7 @@ public class BitSlicedRangeIndexCreator implements
CombinedInvertedIndexCreator
}
private static long minValue(FieldSpec fieldSpec, Comparable<?> minValue) {
- FieldSpec.DataType storedType = fieldSpec.getDataType().getStoredType();
+ DataType storedType = fieldSpec.getDataType().getStoredType();
if (storedType == INT || storedType == LONG) {
return ((Number) minValue).longValue();
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/RangeIndexCreator.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/RangeIndexCreator.java
index a97c45fec6..1fcb0e21d3 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/RangeIndexCreator.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/RangeIndexCreator.java
@@ -162,7 +162,7 @@ public final class RangeIndexCreator implements
CombinedInvertedIndexCreator {
}
@Override
- public DataType getDataType() {
+ public DataType getValueType() {
return _valueType;
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java
index 32af73bbdf..18252ad520 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java
@@ -189,7 +189,7 @@ public class RangeIndexHandler extends BaseIndexHandler {
CombinedInvertedIndexCreator rangeIndexCreator =
newRangeIndexCreator(columnMetadata)) {
if (columnMetadata.isSingleValue()) {
// Single-value column.
- switch (columnMetadata.getDataType()) {
+ switch (columnMetadata.getDataType().getStoredType()) {
case INT:
for (int i = 0; i < numDocs; i++) {
rangeIndexCreator.add(forwardIndexReader.getInt(i,
readerContext));
@@ -216,7 +216,7 @@ public class RangeIndexHandler extends BaseIndexHandler {
} else {
// Multi-value column
int maxNumValuesPerMVEntry =
columnMetadata.getMaxNumberOfMultiValues();
- switch (columnMetadata.getDataType()) {
+ switch (columnMetadata.getDataType().getStoredType()) {
case INT:
int[] intValues = new int[maxNumValuesPerMVEntry];
for (int i = 0; i < numDocs; i++) {
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/range/RangeIndexType.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/range/RangeIndexType.java
index 43c6332f5c..cecbc3ec2c 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/range/RangeIndexType.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/range/RangeIndexType.java
@@ -103,17 +103,18 @@ public class RangeIndexType
@Override
public CombinedInvertedIndexCreator createIndexCreator(IndexCreationContext
context, RangeIndexConfig indexConfig)
throws IOException {
- if (indexConfig.getVersion() == BitSlicedRangeIndexCreator.VERSION &&
context.getFieldSpec().isSingleValueField()) {
+ FieldSpec fieldSpec = context.getFieldSpec();
+ if (indexConfig.getVersion() == BitSlicedRangeIndexCreator.VERSION &&
fieldSpec.isSingleValueField()) {
if (context.hasDictionary()) {
- return new BitSlicedRangeIndexCreator(context.getIndexDir(),
context.getFieldSpec(), context.getCardinality());
+ return new BitSlicedRangeIndexCreator(context.getIndexDir(),
fieldSpec, context.getCardinality());
}
- return new BitSlicedRangeIndexCreator(context.getIndexDir(),
context.getFieldSpec(), context.getMinValue(),
+ return new BitSlicedRangeIndexCreator(context.getIndexDir(), fieldSpec,
context.getMinValue(),
context.getMaxValue());
}
// default to RangeIndexCreator for the time being
- return new RangeIndexCreator(context.getIndexDir(), context.getFieldSpec(),
- context.hasDictionary() ? FieldSpec.DataType.INT :
context.getFieldSpec().getDataType(), -1,
- -1, context.getTotalDocs(), context.getTotalNumberOfEntries());
+ return new RangeIndexCreator(context.getIndexDir(), fieldSpec,
+ context.hasDictionary() ? FieldSpec.DataType.INT :
fieldSpec.getDataType().getStoredType(), -1, -1,
+ context.getTotalDocs(), context.getTotalNumberOfEntries());
}
@Override
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/CombinedInvertedIndexCreator.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/CombinedInvertedIndexCreator.java
index 0011c5b1c8..d75011b14c 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/CombinedInvertedIndexCreator.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/CombinedInvertedIndexCreator.java
@@ -18,9 +18,8 @@
*/
package org.apache.pinot.segment.spi.index.creator;
-import javax.annotation.Nonnull;
import javax.annotation.Nullable;
-import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
/**
@@ -29,14 +28,23 @@ import org.apache.pinot.spi.data.FieldSpec;
public interface CombinedInvertedIndexCreator
extends DictionaryBasedInvertedIndexCreator,
RawValueBasedInvertedIndexCreator {
- FieldSpec.DataType getDataType();
+ @Deprecated
+ default DataType getDataType() {
+ throw new UnsupportedOperationException();
+ }
+
+ /// Returns the data type of the values in the index. The type returned
should be the internal stored type.
+ default DataType getValueType() {
+ return getDataType().getStoredType();
+ }
@Override
- default void add(@Nonnull Object value, int dictId) {
+ default void add(Object value, int dictId) {
if (dictId >= 0) {
add(dictId);
} else {
- switch (getDataType()) {
+ DataType valueType = getValueType();
+ switch (valueType) {
case INT:
add((Integer) value);
break;
@@ -50,17 +58,18 @@ public interface CombinedInvertedIndexCreator
add((Double) value);
break;
default:
- throw new RuntimeException("Unsupported data type " + getDataType()
+ " for range index");
+ throw new RuntimeException("Unsupported data type " + valueType + "
for range index");
}
}
}
@Override
- default void add(@Nonnull Object[] values, @Nullable int[] dictIds) {
+ default void add(Object[] values, @Nullable int[] dictIds) {
if (dictIds != null) {
add(dictIds, dictIds.length);
} else {
- switch (getDataType()) {
+ DataType valueType = getValueType();
+ switch (valueType) {
case INT:
int[] intValues = new int[values.length];
for (int i = 0; i < values.length; i++) {
@@ -90,7 +99,7 @@ public interface CombinedInvertedIndexCreator
add(doubleValues, values.length);
break;
default:
- throw new RuntimeException("Unsupported data type " + getDataType()
+ " for range index");
+ throw new RuntimeException("Unsupported data type " + valueType + "
for range index");
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]