This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new c62a1fb make getColumnIndices accurate (#7370)
c62a1fb is described below
commit c62a1fb328f1b151fa4a7a4a6ef17875662d6059
Author: Xiaobing <[email protected]>
AuthorDate: Thu Aug 26 11:36:47 2021 -0700
make getColumnIndices accurate (#7370)
- make getColumnIndices accurate
- makes V1's findIndex() aware of H3 index
---
.../local/segment/store/FilePerIndexDirectory.java | 12 +++--
.../segment/store/SingleFileIndexDirectory.java | 9 ++++
.../segment/store/FilePerIndexDirectoryTest.java | 42 +++++++++++++--
.../store/SingleFileIndexDirectoryTest.java | 59 +++++++++++++++++-----
4 files changed, 100 insertions(+), 22 deletions(-)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectory.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectory.java
index 7969e58..aba55ca 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectory.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectory.java
@@ -108,10 +108,13 @@ class FilePerIndexDirectory extends ColumnIndexDirectory {
@Override
public Set<String> getColumnsWithIndex(ColumnIndexType type) {
+ // _indexBuffers is just a cache of index files, thus not reliable as
+ // the source of truth about which indices exist in the directory.
+ // Call hasIndexFor() to check if a column-index exists for sure.
Set<String> columns = new HashSet<>();
- for (IndexKey indexKey : _indexBuffers.keySet()) {
- if (indexKey._type == type) {
- columns.add(indexKey._name);
+ for (String column : _segmentMetadata.getAllColumns()) {
+ if (hasIndexFor(column, type)) {
+ columns.add(column);
}
}
return columns;
@@ -188,6 +191,9 @@ class FilePerIndexDirectory extends ColumnIndexDirectory {
case JSON_INDEX:
fileExtension = V1Constants.Indexes.JSON_INDEX_FILE_EXTENSION;
break;
+ case H3_INDEX:
+ fileExtension = V1Constants.Indexes.H3_INDEX_FILE_EXTENSION;
+ break;
default:
throw new IllegalStateException("Unsupported index type: " +
indexType);
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
index 80baf9f..744dd59 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
@@ -386,6 +386,15 @@ class SingleFileIndexDirectory extends
ColumnIndexDirectory {
@Override
public Set<String> getColumnsWithIndex(ColumnIndexType type) {
Set<String> columns = new HashSet<>();
+ // TEXT_INDEX is not tracked via _columnEntries, so handled separately.
+ if (type == ColumnIndexType.TEXT_INDEX) {
+ for (String column : _segmentMetadata.getAllColumns()) {
+ if (TextIndexUtils.hasTextIndex(_segmentDirectory, column)) {
+ columns.add(column);
+ }
+ }
+ return columns;
+ }
for (IndexKey indexKey : _columnEntries.keySet()) {
if (indexKey._type == type) {
columns.add(indexKey._name);
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
index 0733449..3076cf6 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
@@ -40,6 +40,7 @@ import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
+import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
@@ -230,26 +231,57 @@ public class FilePerIndexDirectoryTest {
@Test
public void testGetColumnIndices()
throws IOException {
- try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR,
_segmentMetadata, ReadMode.mmap)) {
- fpi.newBuffer("col1", ColumnIndexType.FORWARD_INDEX, 1024);
- fpi.newBuffer("col2", ColumnIndexType.DICTIONARY, 100);
- fpi.newBuffer("col3", ColumnIndexType.FORWARD_INDEX, 1024);
- fpi.newBuffer("col4", ColumnIndexType.INVERTED_INDEX, 100);
+ // Write sth to buffers and flush them to index files on disk
+ try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR,
_segmentMetadata, ReadMode.mmap);
+ LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo",
TEMP_DIR, true);
+ LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar",
TEMP_DIR, true)) {
+ PinotDataBuffer buf = fpi.newBuffer("col1",
ColumnIndexType.FORWARD_INDEX, 1024);
+ buf.putInt(0, 111);
+ buf = fpi.newBuffer("col2", ColumnIndexType.DICTIONARY, 1024);
+ buf.putInt(0, 222);
+ buf = fpi.newBuffer("col3", ColumnIndexType.FORWARD_INDEX, 1024);
+ buf.putInt(0, 333);
+ buf = fpi.newBuffer("col4", ColumnIndexType.INVERTED_INDEX, 1024);
+ buf.putInt(0, 444);
+ buf = fpi.newBuffer("col5", ColumnIndexType.H3_INDEX, 1024);
+ buf.putInt(0, 555);
+
+ fooCreator.add("{\"clean\":\"this\"}");
+ fooCreator.seal();
+ barCreator.add("{\"retain\":\"this\"}");
+ barCreator.add("{\"keep\":\"this\"}");
+ barCreator.add("{\"hold\":\"this\"}");
+ barCreator.seal();
+ }
+ // Need segmentMetadata to tell the full set of columns in this segment.
+ when(_segmentMetadata.getAllColumns())
+ .thenReturn(new HashSet<>(Arrays.asList("col1", "col2", "col3",
"col4", "col5", "foo", "bar")));
+ try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR,
_segmentMetadata, ReadMode.mmap)) {
assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
new HashSet<>(Arrays.asList("col1", "col3")));
assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.DICTIONARY),
new HashSet<>(Collections.singletonList("col2")));
assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
new HashSet<>(Collections.singletonList("col4")));
+ assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.H3_INDEX),
+ new HashSet<>(Collections.singletonList("col5")));
+ assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.TEXT_INDEX), new
HashSet<>(Arrays.asList("foo", "bar")));
fpi.removeIndex("col1", ColumnIndexType.FORWARD_INDEX);
fpi.removeIndex("col2", ColumnIndexType.DICTIONARY);
+ fpi.removeIndex("col5", ColumnIndexType.H3_INDEX);
+ fpi.removeIndex("foo", ColumnIndexType.TEXT_INDEX);
fpi.removeIndex("col111", ColumnIndexType.DICTIONARY);
+
assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
new HashSet<>(Collections.singletonList("col3")));
+ assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.DICTIONARY), new
HashSet<>(Collections.emptySet()));
assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
new HashSet<>(Collections.singletonList("col4")));
+ assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.H3_INDEX), new
HashSet<>(Collections.emptySet()));
+ assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.TEXT_INDEX),
+ new HashSet<>(Collections.singletonList("bar")));
}
}
}
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
index fc595b6..3664a66 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
@@ -50,6 +50,7 @@ import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
+import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
@@ -289,7 +290,7 @@ public class SingleFileIndexDirectoryTest {
}
@Test
- public void testCopyIndicesTo()
+ public void testCopyIndices()
throws IOException {
File srcTmp = new File(TEMP_DIR, UUID.randomUUID().toString());
if (!srcTmp.exists()) {
@@ -334,26 +335,56 @@ public class SingleFileIndexDirectoryTest {
@Test
public void testGetColumnIndices()
throws Exception {
- try (SingleFileIndexDirectory spi = new SingleFileIndexDirectory(TEMP_DIR,
_segmentMetadata, ReadMode.mmap)) {
- spi.newBuffer("col1", ColumnIndexType.FORWARD_INDEX, 1024);
- spi.newBuffer("col2", ColumnIndexType.DICTIONARY, 100);
- spi.newBuffer("col3", ColumnIndexType.FORWARD_INDEX, 1024);
- spi.newBuffer("col4", ColumnIndexType.INVERTED_INDEX, 100);
+ try (SingleFileIndexDirectory sfd = new SingleFileIndexDirectory(TEMP_DIR,
_segmentMetadata, ReadMode.mmap);
+ LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo",
TEMP_DIR, true);
+ LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar",
TEMP_DIR, true)) {
+ PinotDataBuffer buf = sfd.newBuffer("col1",
ColumnIndexType.FORWARD_INDEX, 1024);
+ buf.putInt(0, 111);
+ buf = sfd.newBuffer("col2", ColumnIndexType.DICTIONARY, 1024);
+ buf.putInt(0, 222);
+ buf = sfd.newBuffer("col3", ColumnIndexType.FORWARD_INDEX, 1024);
+ buf.putInt(0, 333);
+ buf = sfd.newBuffer("col4", ColumnIndexType.INVERTED_INDEX, 1024);
+ buf.putInt(0, 444);
+ buf = sfd.newBuffer("col5", ColumnIndexType.H3_INDEX, 1024);
+ buf.putInt(0, 555);
- assertEquals(spi.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
+ fooCreator.add("{\"clean\":\"this\"}");
+ fooCreator.seal();
+ barCreator.add("{\"retain\":\"this\"}");
+ barCreator.add("{\"keep\":\"this\"}");
+ barCreator.add("{\"hold\":\"this\"}");
+ barCreator.seal();
+ }
+
+ // Need segmentMetadata to tell the full set of columns in this segment.
+ when(_segmentMetadata.getAllColumns())
+ .thenReturn(new HashSet<>(Arrays.asList("col1", "col2", "col3",
"col4", "foo", "bar")));
+ try (SingleFileIndexDirectory sfd = new SingleFileIndexDirectory(TEMP_DIR,
_segmentMetadata, ReadMode.mmap)) {
+ assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
new HashSet<>(Arrays.asList("col1", "col3")));
- assertEquals(spi.getColumnsWithIndex(ColumnIndexType.DICTIONARY),
+ assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.DICTIONARY),
new HashSet<>(Collections.singletonList("col2")));
- assertEquals(spi.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
+ assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
new HashSet<>(Collections.singletonList("col4")));
+ assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.H3_INDEX),
+ new HashSet<>(Collections.singletonList("col5")));
+ assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.TEXT_INDEX), new
HashSet<>(Arrays.asList("foo", "bar")));
+
+ sfd.removeIndex("col1", ColumnIndexType.FORWARD_INDEX);
+ sfd.removeIndex("col2", ColumnIndexType.DICTIONARY);
+ sfd.removeIndex("col5", ColumnIndexType.H3_INDEX);
+ sfd.removeIndex("foo", ColumnIndexType.TEXT_INDEX);
+ sfd.removeIndex("col111", ColumnIndexType.DICTIONARY);
- spi.removeIndex("col1", ColumnIndexType.FORWARD_INDEX);
- spi.removeIndex("col2", ColumnIndexType.DICTIONARY);
- spi.removeIndex("col111", ColumnIndexType.DICTIONARY);
- assertEquals(spi.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
+ assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
new HashSet<>(Collections.singletonList("col3")));
- assertEquals(spi.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
+ assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.DICTIONARY), new
HashSet<>(Collections.emptySet()));
+ assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
new HashSet<>(Collections.singletonList("col4")));
+ assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.H3_INDEX), new
HashSet<>(Collections.emptySet()));
+ assertEquals(sfd.getColumnsWithIndex(ColumnIndexType.TEXT_INDEX),
+ new HashSet<>(Collections.singletonList("bar")));
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]