This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new c62a1fb  make getColumnIndices accurate (#7370)
c62a1fb is described below

commit c62a1fb328f1b151fa4a7a4a6ef17875662d6059
Author: Xiaobing <[email protected]>
AuthorDate: Thu Aug 26 11:36:47 2021 -0700

    make getColumnIndices accurate (#7370)
    
    - make getColumnIndices accurate
    - makes V1's findIndex() aware of H3 index
---
 .../local/segment/store/FilePerIndexDirectory.java | 12 +++--
 .../segment/store/SingleFileIndexDirectory.java    |  9 ++++
 .../segment/store/FilePerIndexDirectoryTest.java   | 42 +++++++++++++--
 .../store/SingleFileIndexDirectoryTest.java        | 59 +++++++++++++++++-----
 4 files changed, 100 insertions(+), 22 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectory.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectory.java
index 7969e58..aba55ca 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectory.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectory.java
@@ -108,10 +108,13 @@ class FilePerIndexDirectory extends ColumnIndexDirectory {
 
   @Override
   public Set<String> getColumnsWithIndex(ColumnIndexType type) {
+    // _indexBuffers is just a cache of index files, thus not reliable as
+    // the source of truth about which indices exist in the directory.
+    // Call hasIndexFor() to check if a column-index exists for sure.
     Set<String> columns = new HashSet<>();
-    for (IndexKey indexKey : _indexBuffers.keySet()) {
-      if (indexKey._type == type) {
-        columns.add(indexKey._name);
+    for (String column : _segmentMetadata.getAllColumns()) {
+      if (hasIndexFor(column, type)) {
+        columns.add(column);
       }
     }
     return columns;
@@ -188,6 +191,9 @@ class FilePerIndexDirectory extends ColumnIndexDirectory {
       case JSON_INDEX:
         fileExtension = V1Constants.Indexes.JSON_INDEX_FILE_EXTENSION;
         break;
+      case H3_INDEX:
+        fileExtension = V1Constants.Indexes.H3_INDEX_FILE_EXTENSION;
+        break;
       default:
         throw new IllegalStateException("Unsupported index type: " + 
indexType);
     }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
index 80baf9f..744dd59 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
@@ -386,6 +386,15 @@ class SingleFileIndexDirectory extends 
ColumnIndexDirectory {
   @Override
   public Set<String> getColumnsWithIndex(ColumnIndexType type) {
     Set<String> columns = new HashSet<>();
+    // TEXT_INDEX is not tracked via _columnEntries, so handled separately.
+    if (type == ColumnIndexType.TEXT_INDEX) {
+      for (String column : _segmentMetadata.getAllColumns()) {
+        if (TextIndexUtils.hasTextIndex(_segmentDirectory, column)) {
+          columns.add(column);
+        }
+      }
+      return columns;
+    }
     for (IndexKey indexKey : _columnEntries.keySet()) {
       if (indexKey._type == type) {
         columns.add(indexKey._name);
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
index 0733449..3076cf6 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
@@ -40,6 +40,7 @@ import org.testng.annotations.AfterMethod;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
+import static org.mockito.Mockito.when;
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertFalse;
 import static org.testng.Assert.assertTrue;
@@ -230,26 +231,57 @@ public class FilePerIndexDirectoryTest {
   @Test
   public void testGetColumnIndices()
       throws IOException {
-    try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, 
_segmentMetadata, ReadMode.mmap)) {
-      fpi.newBuffer("col1", ColumnIndexType.FORWARD_INDEX, 1024);
-      fpi.newBuffer("col2", ColumnIndexType.DICTIONARY, 100);
-      fpi.newBuffer("col3", ColumnIndexType.FORWARD_INDEX, 1024);
-      fpi.newBuffer("col4", ColumnIndexType.INVERTED_INDEX, 100);
+    // Write sth to buffers and flush them to index files on disk
+    try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, 
_segmentMetadata, ReadMode.mmap);
+        LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", 
TEMP_DIR, true);
+        LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", 
TEMP_DIR, true)) {
+      PinotDataBuffer buf = fpi.newBuffer("col1", 
ColumnIndexType.FORWARD_INDEX, 1024);
+      buf.putInt(0, 111);
+      buf = fpi.newBuffer("col2", ColumnIndexType.DICTIONARY, 1024);
+      buf.putInt(0, 222);
+      buf = fpi.newBuffer("col3", ColumnIndexType.FORWARD_INDEX, 1024);
+      buf.putInt(0, 333);
+      buf = fpi.newBuffer("col4", ColumnIndexType.INVERTED_INDEX, 1024);
+      buf.putInt(0, 444);
+      buf = fpi.newBuffer("col5", ColumnIndexType.H3_INDEX, 1024);
+      buf.putInt(0, 555);
+
+      fooCreator.add("{\"clean\":\"this\"}");
+      fooCreator.seal();
+      barCreator.add("{\"retain\":\"this\"}");
+      barCreator.add("{\"keep\":\"this\"}");
+      barCreator.add("{\"hold\":\"this\"}");
+      barCreator.seal();
+    }
 
+    // Need segmentMetadata to tell the full set of columns in this segment.
+    when(_segmentMetadata.getAllColumns())
+        .thenReturn(new HashSet<>(Arrays.asList("col1", "col2", "col3", 
"col4", "col5", "foo", "bar")));
+    try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, 
_segmentMetadata, ReadMode.mmap)) {
       assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
           new HashSet<>(Arrays.asList("col1", "col3")));
       assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.DICTIONARY),
           new HashSet<>(Collections.singletonList("col2")));
       assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
           new HashSet<>(Collections.singletonList("col4")));
+      assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.H3_INDEX),
+          new HashSet<>(Collections.singletonList("col5")));
+      assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.TEXT_INDEX), new 
HashSet<>(Arrays.asList("foo", "bar")));
 
       fpi.removeIndex("col1", ColumnIndexType.FORWARD_INDEX);
       fpi.removeIndex("col2", ColumnIndexType.DICTIONARY);
+      fpi.removeIndex("col5", ColumnIndexType.H3_INDEX);
+      fpi.removeIndex("foo", ColumnIndexType.TEXT_INDEX);
       fpi.removeIndex("col111", ColumnIndexType.DICTIONARY);
+
       assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
           new HashSet<>(Collections.singletonList("col3")));
+      assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.DICTIONARY), new 
HashSet<>(Collections.emptySet()));
       assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
           new HashSet<>(Collections.singletonList("col4")));
+      assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.H3_INDEX), new 
HashSet<>(Collections.emptySet()));
+      assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.TEXT_INDEX),
+          new HashSet<>(Collections.singletonList("bar")));
     }
   }
 }
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
index fc595b6..3664a66 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
@@ -50,6 +50,7 @@ import org.testng.annotations.AfterMethod;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
+import static org.mockito.Mockito.when;
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertFalse;
 import static org.testng.Assert.assertTrue;
@@ -289,7 +290,7 @@ public class SingleFileIndexDirectoryTest {
   }
 
   @Test
-  public void testCopyIndicesTo()
+  public void testCopyIndices()
       throws IOException {
     File srcTmp = new File(TEMP_DIR, UUID.randomUUID().toString());
     if (!srcTmp.exists()) {
@@ -334,26 +335,56 @@ public class SingleFileIndexDirectoryTest {
   @Test
   public void testGetColumnIndices()
       throws Exception {
-    try (SingleFileIndexDirectory spi = new SingleFileIndexDirectory(TEMP_DIR, 
_segmentMetadata, ReadMode.mmap)) {
-      spi.newBuffer("col1", ColumnIndexType.FORWARD_INDEX, 1024);
-      spi.newBuffer("col2", ColumnIndexType.DICTIONARY, 100);
-      spi.newBuffer("col3", ColumnIndexType.FORWARD_INDEX, 1024);
-      spi.newBuffer("col4", ColumnIndexType.INVERTED_INDEX, 100);
+    try (SingleFileIndexDirectory sfd = new SingleFileIndexDirectory(TEMP_DIR, 
_segmentMetadata, ReadMode.mmap);
+        LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", 
TEMP_DIR, true);
+        LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", 
TEMP_DIR, true)) {
+      PinotDataBuffer buf = sfd.newBuffer("col1", 
ColumnIndexType.FORWARD_INDEX, 1024);
+      buf.putInt(0, 111);
+      buf = sfd.newBuffer("col2", ColumnIndexType.DICTIONARY, 1024);
+      buf.putInt(0, 222);
+      buf = sfd.newBuffer("col3", ColumnIndexType.FORWARD_INDEX, 1024);
+      buf.putInt(0, 333);
+      buf = sfd.newBuffer("col4", ColumnIndexType.INVERTED_INDEX, 1024);
+      buf.putInt(0, 444);
+      buf = sfd.newBuffer("col5", ColumnIndexType.H3_INDEX, 1024);
+      buf.putInt(0, 555);
 
-      assertEquals(spi.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
+      fooCreator.add("{\"clean\":\"this\"}");
+      fooCreator.seal();
+      barCreator.add("{\"retain\":\"this\"}");
+      barCreator.add("{\"keep\":\"this\"}");
+      barCreator.add("{\"hold\":\"this\"}");
+      barCreator.seal();
+    }
+
+    // Need segmentMetadata to tell the full set of columns in this segment.
+    when(_segmentMetadata.getAllColumns())
+        .thenReturn(new HashSet<>(Arrays.asList("col1", "col2", "col3", 
"col4", "foo", "bar")));
+    try (SingleFileIndexDirectory sfd = new SingleFileIndexDirectory(TEMP_DIR, 
_segmentMetadata, ReadMode.mmap)) {
+      assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
           new HashSet<>(Arrays.asList("col1", "col3")));
-      assertEquals(spi.getColumnsWithIndex(ColumnIndexType.DICTIONARY),
+      assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.DICTIONARY),
           new HashSet<>(Collections.singletonList("col2")));
-      assertEquals(spi.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
+      assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
           new HashSet<>(Collections.singletonList("col4")));
+      assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.H3_INDEX),
+          new HashSet<>(Collections.singletonList("col5")));
+      assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.TEXT_INDEX), new 
HashSet<>(Arrays.asList("foo", "bar")));
+
+      sfd.removeIndex("col1", ColumnIndexType.FORWARD_INDEX);
+      sfd.removeIndex("col2", ColumnIndexType.DICTIONARY);
+      sfd.removeIndex("col5", ColumnIndexType.H3_INDEX);
+      sfd.removeIndex("foo", ColumnIndexType.TEXT_INDEX);
+      sfd.removeIndex("col111", ColumnIndexType.DICTIONARY);
 
-      spi.removeIndex("col1", ColumnIndexType.FORWARD_INDEX);
-      spi.removeIndex("col2", ColumnIndexType.DICTIONARY);
-      spi.removeIndex("col111", ColumnIndexType.DICTIONARY);
-      assertEquals(spi.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
+      assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
           new HashSet<>(Collections.singletonList("col3")));
-      assertEquals(spi.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
+      assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.DICTIONARY), new 
HashSet<>(Collections.emptySet()));
+      assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
           new HashSet<>(Collections.singletonList("col4")));
+      assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.H3_INDEX), new 
HashSet<>(Collections.emptySet()));
+      assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.TEXT_INDEX),
+          new HashSet<>(Collections.singletonList("bar")));
     }
   }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to