siddharthteotia commented on code in PR #9678:
URL: https://github.com/apache/pinot/pull/9678#discussion_r1012322511
##########
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java:
##########
@@ -321,15 +348,106 @@ public void
testEnableFSTIndexOnExistingColumnDictEncoded()
assertNotNull(columnMetadata);
checkFSTIndexCreation(EXISTING_STRING_COL_DICT, 9, 4,
_newColumnsSchemaWithFST, false, false, 26);
- constructV1Segment();
+ constructV1Segment(Collections.emptyList(), Collections.emptyList(),
Collections.emptyList());
segmentMetadata = new SegmentMetadataImpl(_indexDir);
columnMetadata =
segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_DICT);
assertNotNull(columnMetadata);
checkFSTIndexCreation(EXISTING_STRING_COL_DICT, 9, 4,
_newColumnsSchemaWithFST, false, false, 26);
}
@Test
- public void testForwardIndexHandler()
+ public void testForwardIndexHandlerEnableDictionary()
+ throws Exception {
+ // Add raw columns in indexingConfig so that the ForwardIndexHandler
doesn't end up converting them to dictionary
+ // enabled columns
+ _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW_MV);
+ _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW);
+ _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_STRING_COL_RAW);
+
+ // TEST 1. Check running forwardIndexHandler on a V1 segment. No-op for
all existing raw columns.
+ constructV1Segment(Collections.emptyList(), Collections.emptyList(),
Collections.emptyList());
+ checkForwardIndexCreation(EXISTING_STRING_COL_DICT, 9, 4, _schema, false,
true, false, 26, null, true, 0,
+ DataType.STRING, 100000);
+ validateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_STRING_COL_RAW, 5,
3, _schema, false, false, false, 0, true,
+ 0, ChunkCompressionType.LZ4, false, DataType.STRING, 100000);
+ validateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_INT_COL_RAW_MV,
18499, 15, _schema, false, false, false, 0,
+ false, 13, ChunkCompressionType.LZ4, false, DataType.INT, 106688);
+ validateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_INT_COL_RAW, 42242,
16, _schema, false, false, false, 0, true,
+ 0, ChunkCompressionType.LZ4, false, DataType.INT, 100000);
+
+ // Convert the segment to V3.
+ new SegmentV1V2ToV3FormatConverter().convert(_indexDir);
+
+ // TEST 2: Run reload with no-changes.
+ checkForwardIndexCreation(EXISTING_STRING_COL_DICT, 9, 4, _schema, false,
true, false, 26, null, true, 0,
+ DataType.STRING, 100000);
+
+ // TEST 3: EXISTING_STRING_COL_RAW. Enable dictionary. Also add inverted
index and text index. Reload code path
+ // will create dictionary, inverted index and text index.
+
_indexLoadingConfig.getNoDictionaryColumns().remove(EXISTING_STRING_COL_RAW);
+ _indexLoadingConfig.getInvertedIndexColumns().add(EXISTING_STRING_COL_RAW);
+ _indexLoadingConfig.getTextIndexColumns().add(EXISTING_STRING_COL_RAW);
+ checkForwardIndexCreation(EXISTING_STRING_COL_RAW, 5, 3, _schema, false,
true, false, 4, null, true, 0,
+ DataType.STRING, 100000);
+ validateIndex(ColumnIndexType.INVERTED_INDEX, EXISTING_STRING_COL_RAW, 5,
3, _schema, false, true, false, 4, true,
+ 0, null, false, DataType.STRING, 100000);
+ validateIndex(ColumnIndexType.TEXT_INDEX, EXISTING_STRING_COL_RAW, 5, 3,
_schema, false, true, false, 4, true, 0,
+ null, false, DataType.STRING, 100000);
+
+ // TEST4: EXISTING_STRING_COL_RAW. Enable dictionary on a raw column that
already has text index.
+ List<String> textIndexCols = new ArrayList<>();
+ textIndexCols.add(EXISTING_STRING_COL_RAW);
+ constructV1Segment(Collections.emptyList(), textIndexCols,
Collections.emptyList());
+ new SegmentV1V2ToV3FormatConverter().convert(_indexDir);
+ validateIndex(ColumnIndexType.TEXT_INDEX, EXISTING_STRING_COL_RAW, 5, 3,
_schema, false, false, false, 0, true, 0,
+ null, false, DataType.STRING, 100000);
+
+ // At this point, the segment has text index. Now, the reload path should
create a dictionary.
+ checkForwardIndexCreation(EXISTING_STRING_COL_RAW, 5, 3, _schema, false,
true, false, 4, null, true, 0,
+ DataType.STRING, 100000);
+ validateIndex(ColumnIndexType.TEXT_INDEX, EXISTING_STRING_COL_RAW, 5, 3,
_schema, false, true, false, 4, true, 0,
+ null, false, DataType.STRING, 100000);
+ // Add it back so that this column is not rewritten for the other tests
below.
+ _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_STRING_COL_RAW);
+
+ // TEST 5: EXISTING_INT_COL_RAW. Enable dictionary on a column that
already has range index.
+ List<String> rangeIndexCols = new ArrayList<>();
+ rangeIndexCols.add(EXISTING_INT_COL_RAW);
+ constructV1Segment(Collections.emptyList(), Collections.emptyList(),
rangeIndexCols);
+ new SegmentV1V2ToV3FormatConverter().convert(_indexDir);
+ validateIndex(ColumnIndexType.RANGE_INDEX, EXISTING_INT_COL_RAW, 42242,
16, _schema, false, false, false, 0, true,
+ 0, ChunkCompressionType.LZ4, false, DataType.INT, 100000);
+ // At this point, the segment has range index. Now the reload path should
create a dictionary and rewrite the
+ // range index.
+ _indexLoadingConfig.getNoDictionaryColumns().remove(EXISTING_INT_COL_RAW);
+ _indexLoadingConfig.getRangeIndexColumns().add(EXISTING_INT_COL_RAW);
+ checkForwardIndexCreation(EXISTING_INT_COL_RAW, 42242, 16, _schema, false,
true, false, 0, null, true, 0,
+ DataType.INT, 100000);
+ validateIndex(ColumnIndexType.RANGE_INDEX, EXISTING_INT_COL_RAW, 42242,
16, _schema, false, true, false, 0, true, 0,
+ null, false, DataType.INT, 100000);
+ // Add it back so that this column is not rewritten for the other tests
below.
+ _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW);
+
+ // TEST 6: EXISTING_INT_COL_RAW_MV. Enable dictionary for an MV column.
Also enable inverted index and range index.
+ constructV1Segment(Collections.emptyList(), Collections.emptyList(),
Collections.emptyList());
+ new SegmentV1V2ToV3FormatConverter().convert(_indexDir);
+ validateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_INT_COL_RAW_MV,
18499, 15, _schema, false, false, false, 0,
+ false, 13, ChunkCompressionType.LZ4, false, DataType.INT, 106688);
+
+ // Enable dictionary and inverted index.
Review Comment:
(nit) should be enable dict, inv index and range index
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]