siddharthteotia commented on code in PR #9678:
URL: https://github.com/apache/pinot/pull/9678#discussion_r1012323342
##########
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java:
##########
@@ -321,15 +348,106 @@ public void
testEnableFSTIndexOnExistingColumnDictEncoded()
assertNotNull(columnMetadata);
checkFSTIndexCreation(EXISTING_STRING_COL_DICT, 9, 4,
_newColumnsSchemaWithFST, false, false, 26);
- constructV1Segment();
+ constructV1Segment(Collections.emptyList(), Collections.emptyList(),
Collections.emptyList());
segmentMetadata = new SegmentMetadataImpl(_indexDir);
columnMetadata =
segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_DICT);
assertNotNull(columnMetadata);
checkFSTIndexCreation(EXISTING_STRING_COL_DICT, 9, 4,
_newColumnsSchemaWithFST, false, false, 26);
}
@Test
- public void testForwardIndexHandler()
+ public void testForwardIndexHandlerEnableDictionary()
+ throws Exception {
+ // Add raw columns in indexingConfig so that the ForwardIndexHandler
doesn't end up converting them to dictionary
+ // enabled columns
+ _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW_MV);
+ _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW);
+ _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_STRING_COL_RAW);
+
+ // TEST 1. Check running forwardIndexHandler on a V1 segment. No-op for
all existing raw columns.
+ constructV1Segment(Collections.emptyList(), Collections.emptyList(),
Collections.emptyList());
+ checkForwardIndexCreation(EXISTING_STRING_COL_DICT, 9, 4, _schema, false,
true, false, 26, null, true, 0,
+ DataType.STRING, 100000);
+ validateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_STRING_COL_RAW, 5,
3, _schema, false, false, false, 0, true,
+ 0, ChunkCompressionType.LZ4, false, DataType.STRING, 100000);
+ validateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_INT_COL_RAW_MV,
18499, 15, _schema, false, false, false, 0,
+ false, 13, ChunkCompressionType.LZ4, false, DataType.INT, 106688);
+ validateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_INT_COL_RAW, 42242,
16, _schema, false, false, false, 0, true,
+ 0, ChunkCompressionType.LZ4, false, DataType.INT, 100000);
+
+ // Convert the segment to V3.
+ new SegmentV1V2ToV3FormatConverter().convert(_indexDir);
+
+ // TEST 2: Run reload with no-changes.
+ checkForwardIndexCreation(EXISTING_STRING_COL_DICT, 9, 4, _schema, false,
true, false, 26, null, true, 0,
+ DataType.STRING, 100000);
+
+ // TEST 3: EXISTING_STRING_COL_RAW. Enable dictionary. Also add inverted
index and text index. Reload code path
+ // will create dictionary, inverted index and text index.
+
_indexLoadingConfig.getNoDictionaryColumns().remove(EXISTING_STRING_COL_RAW);
+ _indexLoadingConfig.getInvertedIndexColumns().add(EXISTING_STRING_COL_RAW);
+ _indexLoadingConfig.getTextIndexColumns().add(EXISTING_STRING_COL_RAW);
+ checkForwardIndexCreation(EXISTING_STRING_COL_RAW, 5, 3, _schema, false,
true, false, 4, null, true, 0,
+ DataType.STRING, 100000);
+ validateIndex(ColumnIndexType.INVERTED_INDEX, EXISTING_STRING_COL_RAW, 5,
3, _schema, false, true, false, 4, true,
+ 0, null, false, DataType.STRING, 100000);
+ validateIndex(ColumnIndexType.TEXT_INDEX, EXISTING_STRING_COL_RAW, 5, 3,
_schema, false, true, false, 4, true, 0,
+ null, false, DataType.STRING, 100000);
+
+ // TEST4: EXISTING_STRING_COL_RAW. Enable dictionary on a raw column that
already has text index.
+ List<String> textIndexCols = new ArrayList<>();
+ textIndexCols.add(EXISTING_STRING_COL_RAW);
+ constructV1Segment(Collections.emptyList(), textIndexCols,
Collections.emptyList());
+ new SegmentV1V2ToV3FormatConverter().convert(_indexDir);
+ validateIndex(ColumnIndexType.TEXT_INDEX, EXISTING_STRING_COL_RAW, 5, 3,
_schema, false, false, false, 0, true, 0,
+ null, false, DataType.STRING, 100000);
+
+ // At this point, the segment has text index. Now, the reload path should
create a dictionary.
+ checkForwardIndexCreation(EXISTING_STRING_COL_RAW, 5, 3, _schema, false,
true, false, 4, null, true, 0,
+ DataType.STRING, 100000);
+ validateIndex(ColumnIndexType.TEXT_INDEX, EXISTING_STRING_COL_RAW, 5, 3,
_schema, false, true, false, 4, true, 0,
+ null, false, DataType.STRING, 100000);
+ // Add it back so that this column is not rewritten for the other tests
below.
+ _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_STRING_COL_RAW);
+
+ // TEST 5: EXISTING_INT_COL_RAW. Enable dictionary on a column that
already has range index.
+ List<String> rangeIndexCols = new ArrayList<>();
+ rangeIndexCols.add(EXISTING_INT_COL_RAW);
+ constructV1Segment(Collections.emptyList(), Collections.emptyList(),
rangeIndexCols);
+ new SegmentV1V2ToV3FormatConverter().convert(_indexDir);
+ validateIndex(ColumnIndexType.RANGE_INDEX, EXISTING_INT_COL_RAW, 42242,
16, _schema, false, false, false, 0, true,
+ 0, ChunkCompressionType.LZ4, false, DataType.INT, 100000);
+ // At this point, the segment has range index. Now the reload path should
create a dictionary and rewrite the
+ // range index.
+ _indexLoadingConfig.getNoDictionaryColumns().remove(EXISTING_INT_COL_RAW);
+ _indexLoadingConfig.getRangeIndexColumns().add(EXISTING_INT_COL_RAW);
+ checkForwardIndexCreation(EXISTING_INT_COL_RAW, 42242, 16, _schema, false,
true, false, 0, null, true, 0,
+ DataType.INT, 100000);
+ validateIndex(ColumnIndexType.RANGE_INDEX, EXISTING_INT_COL_RAW, 42242,
16, _schema, false, true, false, 0, true, 0,
Review Comment:
Does this actually validate that range index got correctly rewritten to be
dict based ? I guess that's where query exec tests will be useful
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]