siddharthteotia commented on a change in pull request #5074: Support segment 
reload for text index
URL: https://github.com/apache/incubator-pinot/pull/5074#discussion_r382871924
 
 

 ##########
 File path: 
pinot-core/src/test/java/org/apache/pinot/core/segment/index/loader/SegmentPreProcessorTest.java
 ##########
 @@ -140,6 +154,124 @@ private void constructV3Segment()
     new SegmentV1V2ToV3FormatConverter().convert(_indexDir);
   }
 
+  /**
+   * Test to check for default column handling and text index creation during
+   * segment load after a new column is added to the schema with text index
+   * creation enabled
+   * @throws Exception
+   */
+  @Test
+  public void testEnableTextIndexOnNewlyAddedStringColumn() throws Exception {
+    constructV3Segment();
+    Set<String> textIndexColumns = new HashSet<>();
+    textIndexColumns.add(NEWLY_ADDED_STRING_COL);
+    _indexLoadingConfig.setTextIndexColumns(textIndexColumns);
+
+    SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir);
+    ColumnMetadata columnMetadata = 
segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_STRING_COL);
+    Assert.assertNull(columnMetadata);
+    try (SegmentPreProcessor processor = new SegmentPreProcessor(_indexDir, 
_indexLoadingConfig, _newColumnsSchemaWithText)) {
+      processor.process();
+
+      segmentMetadata = new SegmentMetadataImpl(_indexDir);
+      columnMetadata = 
segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_STRING_COL);
+      Assert.assertNotNull(columnMetadata);
+      Assert.assertEquals(columnMetadata.getCardinality(), 1);
+      Assert.assertEquals(columnMetadata.getTotalDocs(), 100000);
+      Assert.assertEquals(columnMetadata.getTotalRawDocs(), 100000);
+      Assert.assertEquals(columnMetadata.getTotalAggDocs(), 0);
+      Assert.assertEquals(columnMetadata.getDataType(), 
FieldSpec.DataType.STRING);
+      Assert.assertEquals(columnMetadata.getBitsPerElement(), 1);
+      Assert.assertEquals(columnMetadata.getColumnMaxLength(), 0);
+      Assert.assertEquals(columnMetadata.getFieldType(), 
FieldSpec.FieldType.DIMENSION);
+      Assert.assertFalse(columnMetadata.isSorted());
+      Assert.assertFalse(columnMetadata.hasNulls());
+      Assert.assertFalse(columnMetadata.hasDictionary());
+      Assert.assertFalse(columnMetadata.hasInvertedIndex());
+      Assert.assertTrue(columnMetadata.hasTextIndex());
+      Assert.assertTrue(columnMetadata.isSingleValue());
+      Assert.assertEquals(columnMetadata.getMaxNumberOfMultiValues(), 0);
+      Assert.assertEquals(columnMetadata.getTotalNumberOfEntries(), 100000);
+      Assert.assertTrue(columnMetadata.isAutoGenerated());
+      Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "null");
+
+      try (SegmentDirectory segmentDirectory = 
SegmentDirectory.createFromLocalFS(_indexDir, ReadMode.mmap);
+          SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
+        Assert.assertTrue(reader.hasIndexFor(NEWLY_ADDED_STRING_COL, 
ColumnIndexType.TEXT_INDEX));
+      }
+    }
+  }
+
+  /**
+   * Test to check text index creation during segment load after text index
+   * creation is enabled on an existing column
+   * @throws Exception
+   */
+  @Test
+  public void testEnableTextIndexOnExistingColumn() throws Exception {
+    constructV3Segment();
+    Set<String> textIndexColumns = new HashSet<>();
+    textIndexColumns.add(EXISTING_STRING_COL_RAW);
+    _indexLoadingConfig.setTextIndexColumns(textIndexColumns);
+    SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir);
+    ColumnMetadata columnMetadata = 
segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_RAW);
+    Assert.assertNotNull(columnMetadata);
+    Assert.assertFalse(columnMetadata.hasTextIndex());
+
+    try (SegmentPreProcessor processor = new SegmentPreProcessor(_indexDir, 
_indexLoadingConfig, _newColumnsSchemaWithText)) {
+      processor.process();
+      segmentMetadata = new SegmentMetadataImpl(_indexDir);
+      columnMetadata = 
segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_RAW);
+
+      Assert.assertEquals(columnMetadata.getCardinality(), 5);
+      Assert.assertEquals(columnMetadata.getTotalDocs(), 100000);
+      Assert.assertEquals(columnMetadata.getTotalRawDocs(), 100000);
+      Assert.assertEquals(columnMetadata.getTotalAggDocs(), 0);
+      Assert.assertEquals(columnMetadata.getDataType(), 
FieldSpec.DataType.STRING);
+      Assert.assertEquals(columnMetadata.getBitsPerElement(), 3);
+      Assert.assertEquals(columnMetadata.getColumnMaxLength(), 0);
+      Assert.assertEquals(columnMetadata.getFieldType(), 
FieldSpec.FieldType.DIMENSION);
+      Assert.assertFalse(columnMetadata.isSorted());
+      Assert.assertFalse(columnMetadata.hasNulls());
+      Assert.assertFalse(columnMetadata.hasDictionary());
+      Assert.assertTrue(columnMetadata.hasTextIndex());
+      Assert.assertTrue(columnMetadata.isSingleValue());
+      Assert.assertEquals(columnMetadata.getMaxNumberOfMultiValues(), 0);
+      Assert.assertEquals(columnMetadata.getTotalNumberOfEntries(), 100000);
+      Assert.assertFalse(columnMetadata.isAutoGenerated());
+      Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "null");
+
+      try (SegmentDirectory segmentDirectory = 
SegmentDirectory.createFromLocalFS(_indexDir, ReadMode.mmap);
+          SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
+        Assert.assertTrue(reader.hasIndexFor(EXISTING_STRING_COL_RAW, 
ColumnIndexType.TEXT_INDEX));
+      }
+    }
+  }
+
+  /**
+   * Test to check for failure case where text index is enabled on an existing
+   * column that is dictionary encoded. This is currently not supported.
+   * @throws Exception
+   */
+  @Test
+  public void testEnableTextIndexOnExistingColumnDictEncoded() throws 
Exception {
+    constructV3Segment();
+    Set<String> textIndexColumns = new HashSet<>();
+    textIndexColumns.add(EXISTING_STRING_COL_DICT);
+    _indexLoadingConfig.setTextIndexColumns(textIndexColumns);
+    SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir);
+    ColumnMetadata columnMetadata = 
segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_DICT);
+    Assert.assertNotNull(columnMetadata);
+    Assert.assertFalse(columnMetadata.hasTextIndex());
+
+    try (SegmentPreProcessor processor = new SegmentPreProcessor(_indexDir, 
_indexLoadingConfig, _newColumnsSchemaWithText)) {
+      processor.process();
+      Assert.fail("operation should have failed");
+    } catch (Exception e) {
 
 Review comment:
   Assert.fail ensures that if exception is not thrown by line above, the test 
will fail. I am now checking for the full error message including column name

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to