mcvsubbu commented on a change in pull request #5074: Support segment reload
for text index
URL: https://github.com/apache/incubator-pinot/pull/5074#discussion_r381552273
##########
File path:
pinot-core/src/test/java/org/apache/pinot/core/segment/index/loader/SegmentPreProcessorTest.java
##########
@@ -140,6 +154,124 @@ private void constructV3Segment()
new SegmentV1V2ToV3FormatConverter().convert(_indexDir);
}
+ /**
+ * Test to check for default column handling and text index creation during
+ * segment load after a new column is added to the schema with text index
+ * creation enabled
+ * @throws Exception
+ */
+ @Test
+ public void testEnableTextIndexOnNewlyAddedStringColumn() throws Exception {
+ constructV3Segment();
+ Set<String> textIndexColumns = new HashSet<>();
+ textIndexColumns.add(NEWLY_ADDED_STRING_COL);
+ _indexLoadingConfig.setTextIndexColumns(textIndexColumns);
+
+ SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir);
+ ColumnMetadata columnMetadata =
segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_STRING_COL);
+ Assert.assertNull(columnMetadata);
+ try (SegmentPreProcessor processor = new SegmentPreProcessor(_indexDir,
_indexLoadingConfig, _newColumnsSchemaWithText)) {
+ processor.process();
+
+ segmentMetadata = new SegmentMetadataImpl(_indexDir);
+ columnMetadata =
segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_STRING_COL);
+ Assert.assertNotNull(columnMetadata);
+ Assert.assertEquals(columnMetadata.getCardinality(), 1);
+ Assert.assertEquals(columnMetadata.getTotalDocs(), 100000);
+ Assert.assertEquals(columnMetadata.getTotalRawDocs(), 100000);
+ Assert.assertEquals(columnMetadata.getTotalAggDocs(), 0);
+ Assert.assertEquals(columnMetadata.getDataType(),
FieldSpec.DataType.STRING);
+ Assert.assertEquals(columnMetadata.getBitsPerElement(), 1);
+ Assert.assertEquals(columnMetadata.getColumnMaxLength(), 0);
+ Assert.assertEquals(columnMetadata.getFieldType(),
FieldSpec.FieldType.DIMENSION);
+ Assert.assertFalse(columnMetadata.isSorted());
+ Assert.assertFalse(columnMetadata.hasNulls());
+ Assert.assertFalse(columnMetadata.hasDictionary());
+ Assert.assertFalse(columnMetadata.hasInvertedIndex());
+ Assert.assertTrue(columnMetadata.hasTextIndex());
+ Assert.assertTrue(columnMetadata.isSingleValue());
+ Assert.assertEquals(columnMetadata.getMaxNumberOfMultiValues(), 0);
+ Assert.assertEquals(columnMetadata.getTotalNumberOfEntries(), 100000);
+ Assert.assertTrue(columnMetadata.isAutoGenerated());
+ Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "null");
+
+ try (SegmentDirectory segmentDirectory =
SegmentDirectory.createFromLocalFS(_indexDir, ReadMode.mmap);
+ SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
+ Assert.assertTrue(reader.hasIndexFor(NEWLY_ADDED_STRING_COL,
ColumnIndexType.TEXT_INDEX));
+ }
+ }
+ }
+
+ /**
+ * Test to check text index creation during segment load after text index
+ * creation is enabled on an existing column
+ * @throws Exception
+ */
+ @Test
+ public void testEnableTextIndexOnExistingColumn() throws Exception {
+ constructV3Segment();
+ Set<String> textIndexColumns = new HashSet<>();
+ textIndexColumns.add(EXISTING_STRING_COL_RAW);
+ _indexLoadingConfig.setTextIndexColumns(textIndexColumns);
+ SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir);
+ ColumnMetadata columnMetadata =
segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_RAW);
+ Assert.assertNotNull(columnMetadata);
+ Assert.assertFalse(columnMetadata.hasTextIndex());
+
+ try (SegmentPreProcessor processor = new SegmentPreProcessor(_indexDir,
_indexLoadingConfig, _newColumnsSchemaWithText)) {
+ processor.process();
+ segmentMetadata = new SegmentMetadataImpl(_indexDir);
+ columnMetadata =
segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_RAW);
+
+ Assert.assertEquals(columnMetadata.getCardinality(), 5);
+ Assert.assertEquals(columnMetadata.getTotalDocs(), 100000);
+ Assert.assertEquals(columnMetadata.getTotalRawDocs(), 100000);
+ Assert.assertEquals(columnMetadata.getTotalAggDocs(), 0);
+ Assert.assertEquals(columnMetadata.getDataType(),
FieldSpec.DataType.STRING);
+ Assert.assertEquals(columnMetadata.getBitsPerElement(), 3);
+ Assert.assertEquals(columnMetadata.getColumnMaxLength(), 0);
+ Assert.assertEquals(columnMetadata.getFieldType(),
FieldSpec.FieldType.DIMENSION);
+ Assert.assertFalse(columnMetadata.isSorted());
+ Assert.assertFalse(columnMetadata.hasNulls());
+ Assert.assertFalse(columnMetadata.hasDictionary());
+ Assert.assertTrue(columnMetadata.hasTextIndex());
+ Assert.assertTrue(columnMetadata.isSingleValue());
+ Assert.assertEquals(columnMetadata.getMaxNumberOfMultiValues(), 0);
+ Assert.assertEquals(columnMetadata.getTotalNumberOfEntries(), 100000);
+ Assert.assertFalse(columnMetadata.isAutoGenerated());
+ Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "null");
+
+ try (SegmentDirectory segmentDirectory =
SegmentDirectory.createFromLocalFS(_indexDir, ReadMode.mmap);
+ SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
+ Assert.assertTrue(reader.hasIndexFor(EXISTING_STRING_COL_RAW,
ColumnIndexType.TEXT_INDEX));
+ }
+ }
+ }
+
+ /**
+ * Test to check for failure case where text index is enabled on an existing
+ * column that is dictionary encoded. This is currently not supported.
+ * @throws Exception
+ */
+ @Test
+ public void testEnableTextIndexOnExistingColumnDictEncoded() throws
Exception {
+ constructV3Segment();
+ Set<String> textIndexColumns = new HashSet<>();
+ textIndexColumns.add(EXISTING_STRING_COL_DICT);
+ _indexLoadingConfig.setTextIndexColumns(textIndexColumns);
+ SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir);
+ ColumnMetadata columnMetadata =
segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_DICT);
+ Assert.assertNotNull(columnMetadata);
+ Assert.assertFalse(columnMetadata.hasTextIndex());
+
+ try (SegmentPreProcessor processor = new SegmentPreProcessor(_indexDir,
_indexLoadingConfig, _newColumnsSchemaWithText)) {
+ processor.process();
+ Assert.fail("operation should have failed");
+ } catch (Exception e) {
Review comment:
It is best to check for as close to the exception thrown as possible.
Otherwise, if we throw some exception from some other part of the code, we will
catch that here and assume that the test passed.
I would not be opposed to checking the exact exception message here, with
the column name, for example. It will also help us refine our messages to be
consistent when the exceptions are thrown
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]