This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 83ed397210 add multi-value support for native text index (#11204)
83ed397210 is described below
commit 83ed3972103379709d773d8ae96d479e126c19fd
Author: Christopher Peck <[email protected]>
AuthorDate: Tue Aug 1 22:30:44 2023 -0700
add multi-value support for native text index (#11204)
---
.../queries/NativeAndLuceneComparisonTest.java | 122 +++++++++++++++------
.../impl/invertedindex/NativeMutableTextIndex.java | 23 ++--
.../creator/impl/text/NativeTextIndexCreator.java | 41 ++++---
.../index/readers/text/NativeTextIndexReader.java | 2 +-
.../local/segment/index/text/TextIndexType.java | 3 -
.../NativeAndLuceneMutableTextIndexTest.java | 61 ++++++++---
6 files changed, 176 insertions(+), 76 deletions(-)
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
index 53db2f7abc..a066a45dc4 100644
---
a/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
@@ -57,8 +57,10 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
private static final String TABLE_NAME = "MyTable";
private static final String SEGMENT_NAME_LUCENE = "testSegmentLucene";
private static final String SEGMENT_NAME_NATIVE = "testSegmentNative";
- private static final String DOMAIN_NAMES_COL_LUCENE = "DOMAIN_NAMES_LUCENE";
- private static final String DOMAIN_NAMES_COL_NATIVE = "DOMAIN_NAMES_NATIVE";
+ private static final String QUOTES_COL_LUCENE = "QUOTES_LUCENE";
+ private static final String QUOTES_COL_NATIVE = "QUOTES_NATIVE";
+ private static final String QUOTES_COL_LUCENE_MV = "QUOTES_LUCENE_MV";
+ private static final String QUOTES_COL_NATIVE_MV = "QUOTES_NATIVE_MV";
private static final Integer NUM_ROWS = 1024;
private IndexSegment _indexSegment;
@@ -107,8 +109,8 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
FileUtils.deleteQuietly(INDEX_DIR);
}
- private List<String> getDomainNames() {
- return Arrays.asList("Prince Andrew kept looking with an amused smile from
Pierre",
+ private String[] getTextData() {
+ return new String[]{"Prince Andrew kept looking with an amused smile from
Pierre",
"vicomte and from the vicomte to their hostess. In the first moment
of",
"Pierre’s outburst Anna Pávlovna, despite her social experience, was",
"horror-struck. But when she saw that Pierre’s sacrilegious words",
@@ -116,17 +118,34 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
"impossible to stop him, she rallied her forces and joined the vicomte
in", "a vigorous attack on the orator",
"horror-struck. But when she", "she rallied her forces and joined",
"outburst Anna Pávlovna",
"she rallied her forces and", "despite her social experience", "had
not exasperated the vicomte",
- " despite her social experience", "impossible to stop him", "despite
her social experience");
+ " despite her social experience", "impossible to stop him", "despite
her social experience"};
+ }
+
+ private String[][] getMVTextData() {
+ return new String[][]{
+ {"Prince Andrew kept", "looking with an"}, {"amused smile", "from
Pierre"}, {"vicomte and from the"}, {
+ "vicomte to", "their hostess."}, {"In the first moment of"},
{"Pierre’s outburst Anna Pávlovna,"}, {
+ "despite her", "social", "experience, was"}, {"horror-struck.", "But
when she"}, {"saw that Pierre’s"}, {
+ "sacrilegious words"}, {"had not exasperated the vicomte, and had
convinced herself that it was"}, {
+ "impossible to stop him,", "she rallied her"}, {"forces and joined
the vicomte in", "a vigorous attack on "
+ + "the orator"}, {"horror-struck. But when she", "she rallied her
forces and joined", "outburst Anna "
+ + "Pávlovna"}, {"she rallied her forces and", "despite her social
experience", "had not exasperated the "
+ + "vicomte"}, {"despite her social experience", "impossible to stop
him", "despite her social experience"}
+ };
}
private List<GenericRow> createTestData(int numRows) {
List<GenericRow> rows = new ArrayList<>();
- List<String> domainNames = getDomainNames();
+ String[] textData = getTextData();
+ String[][] mvTextData = getMVTextData();
for (int i = 0; i < numRows; i++) {
- String domain = domainNames.get(i % domainNames.size());
+ String doc = textData[i % textData.length];
+ String[] mvDoc = mvTextData[i % mvTextData.length];
GenericRow row = new GenericRow();
- row.putField(DOMAIN_NAMES_COL_LUCENE, domain);
- row.putField(DOMAIN_NAMES_COL_NATIVE, domain);
+ row.putValue(QUOTES_COL_LUCENE, doc);
+ row.putValue(QUOTES_COL_NATIVE, doc);
+ row.putValue(QUOTES_COL_LUCENE_MV, mvDoc);
+ row.putValue(QUOTES_COL_NATIVE_MV, mvDoc);
rows.add(row);
}
@@ -139,13 +158,18 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
List<FieldConfig> fieldConfigs = new ArrayList<>();
fieldConfigs.add(
- new FieldConfig(DOMAIN_NAMES_COL_LUCENE,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+ new FieldConfig(QUOTES_COL_LUCENE,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+ null));
+ fieldConfigs.add(
+ new FieldConfig(QUOTES_COL_LUCENE_MV,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
null));
TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
-
.setInvertedIndexColumns(Arrays.asList(DOMAIN_NAMES_COL_LUCENE)).setFieldConfigList(fieldConfigs).build();
+ .setInvertedIndexColumns(Arrays.asList(QUOTES_COL_LUCENE,
QUOTES_COL_LUCENE_MV))
+ .setFieldConfigList(fieldConfigs).build();
Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
- .addSingleValueDimension(DOMAIN_NAMES_COL_LUCENE,
FieldSpec.DataType.STRING).build();
+ .addSingleValueDimension(QUOTES_COL_LUCENE, FieldSpec.DataType.STRING)
+ .addMultiValueDimension(QUOTES_COL_LUCENE_MV,
FieldSpec.DataType.STRING).build();
SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig,
schema);
config.setOutDir(INDEX_DIR.getPath());
config.setTableName(TABLE_NAME);
@@ -168,13 +192,18 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
propertiesMap.put(FieldConfig.TEXT_FST_TYPE,
FieldConfig.TEXT_NATIVE_FST_LITERAL);
fieldConfigs.add(
- new FieldConfig(DOMAIN_NAMES_COL_NATIVE,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+ new FieldConfig(QUOTES_COL_NATIVE,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+ propertiesMap));
+ fieldConfigs.add(
+ new FieldConfig(QUOTES_COL_NATIVE_MV,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
propertiesMap));
TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
-
.setInvertedIndexColumns(Arrays.asList(DOMAIN_NAMES_COL_NATIVE)).setFieldConfigList(fieldConfigs).build();
+ .setInvertedIndexColumns(Arrays.asList(QUOTES_COL_NATIVE,
QUOTES_COL_NATIVE_MV))
+ .setFieldConfigList(fieldConfigs).build();
Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
- .addSingleValueDimension(DOMAIN_NAMES_COL_NATIVE,
FieldSpec.DataType.STRING).build();
+ .addSingleValueDimension(QUOTES_COL_NATIVE, FieldSpec.DataType.STRING)
+ .addMultiValueDimension(QUOTES_COL_NATIVE_MV,
FieldSpec.DataType.STRING).build();
SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig,
schema);
config.setOutDir(INDEX_DIR.getPath());
config.setTableName(TABLE_NAME);
@@ -192,10 +221,12 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
throws Exception {
IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
Set<String> textIndexCols = new HashSet<>();
- textIndexCols.add(DOMAIN_NAMES_COL_LUCENE);
+ textIndexCols.add(QUOTES_COL_LUCENE);
+ textIndexCols.add(QUOTES_COL_LUCENE_MV);
indexLoadingConfig.setTextIndexColumns(textIndexCols);
Set<String> invertedIndexCols = new HashSet<>();
- invertedIndexCols.add(DOMAIN_NAMES_COL_LUCENE);
+ invertedIndexCols.add(QUOTES_COL_LUCENE);
+ invertedIndexCols.add(QUOTES_COL_LUCENE_MV);
indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
return ImmutableSegmentLoader.load(new File(INDEX_DIR,
SEGMENT_NAME_LUCENE), indexLoadingConfig);
}
@@ -209,13 +240,16 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
Map<String, Map<String, String>> columnPropertiesParentMap = new
HashMap<>();
Set<String> textIndexCols = new HashSet<>();
- textIndexCols.add(DOMAIN_NAMES_COL_NATIVE);
+ textIndexCols.add(QUOTES_COL_NATIVE);
+ textIndexCols.add(QUOTES_COL_NATIVE_MV);
indexLoadingConfig.setTextIndexColumns(textIndexCols);
indexLoadingConfig.setFSTIndexType(fstType);
Set<String> invertedIndexCols = new HashSet<>();
- invertedIndexCols.add(DOMAIN_NAMES_COL_NATIVE);
+ invertedIndexCols.add(QUOTES_COL_NATIVE);
+ invertedIndexCols.add(QUOTES_COL_NATIVE_MV);
indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
- columnPropertiesParentMap.put(DOMAIN_NAMES_COL_NATIVE, propertiesMap);
+ columnPropertiesParentMap.put(QUOTES_COL_NATIVE, propertiesMap);
+ columnPropertiesParentMap.put(QUOTES_COL_NATIVE_MV, propertiesMap);
indexLoadingConfig.setColumnProperties(columnPropertiesParentMap);
return ImmutableSegmentLoader.load(new File(INDEX_DIR,
SEGMENT_NAME_NATIVE), indexLoadingConfig);
}
@@ -247,29 +281,51 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
}
}
}
-
@Test
public void testQueries() {
- String nativeQuery = "SELECT * FROM MyTable WHERE
TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'vico.*') LIMIT 50000";
- String luceneQuery = "SELECT * FROM MyTable WHERE
TEXT_MATCH(DOMAIN_NAMES_LUCENE, 'vico*') LIMIT 50000";
+
+ String nativeQuery = "SELECT * FROM MyTable WHERE
TEXT_CONTAINS(QUOTES_NATIVE, 'vico.*') LIMIT 50000";
+ String luceneQuery = "SELECT * FROM MyTable WHERE
TEXT_MATCH(QUOTES_LUCENE, 'vico*') LIMIT 50000";
testSelectionResults(nativeQuery, luceneQuery);
- nativeQuery = "SELECT * FROM MyTable WHERE
TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'convi.*ced') LIMIT 50000";
- luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(DOMAIN_NAMES_LUCENE,
'convi*ced') LIMIT 50000";
+ nativeQuery = "SELECT * FROM MyTable WHERE TEXT_CONTAINS(QUOTES_NATIVE,
'convi.*ced') LIMIT 50000";
+ luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE,
'convi*ced') LIMIT 50000";
testSelectionResults(nativeQuery, luceneQuery);
- nativeQuery = "SELECT * FROM MyTable WHERE
TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'vicomte') AND "
- + "TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'hos.*') LIMIT 50000";
- luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(DOMAIN_NAMES_LUCENE,
'vicomte AND hos*') LIMIT 50000";
+ nativeQuery = "SELECT * FROM MyTable WHERE TEXT_CONTAINS(QUOTES_NATIVE,
'vicomte') AND "
+ + "TEXT_CONTAINS(QUOTES_NATIVE, 'hos.*') LIMIT 50000";
+ luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE,
'vicomte AND hos*') LIMIT 50000";
testSelectionResults(nativeQuery, luceneQuery);
- nativeQuery = "SELECT * FROM MyTable WHERE
TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'sac.*') OR "
- + "TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'herself') LIMIT 50000";
- luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(DOMAIN_NAMES_LUCENE,
'sac* OR herself') LIMIT 50000";
+ nativeQuery = "SELECT * FROM MyTable WHERE TEXT_CONTAINS(QUOTES_NATIVE,
'sac.*') OR "
+ + "TEXT_CONTAINS(QUOTES_NATIVE, 'herself') LIMIT 50000";
+ luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE, 'sac*
OR herself') LIMIT 50000";
testSelectionResults(nativeQuery, luceneQuery);
- nativeQuery = "SELECT * FROM MyTable WHERE
TEXT_CONTAINS(DOMAIN_NAMES_NATIVE, 'vicomte') LIMIT 50000";
- luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(DOMAIN_NAMES_LUCENE,
'vicomte') LIMIT 50000";
+ nativeQuery = "SELECT * FROM MyTable WHERE TEXT_CONTAINS(QUOTES_NATIVE,
'vicomte') LIMIT 50000";
+ luceneQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE,
'vicomte') LIMIT 50000";
testSelectionResults(nativeQuery, luceneQuery);
+
+ String nativeMVQuery = "SELECT * FROM MyTable WHERE
TEXT_CONTAINS(QUOTES_NATIVE_MV, 'vico.*') LIMIT 50000";
+ String luceneMVQuery = "SELECT * FROM MyTable WHERE
TEXT_MATCH(QUOTES_LUCENE_MV, 'vico*') LIMIT 50000";
+ testSelectionResults(nativeMVQuery, luceneMVQuery);
+
+ nativeMVQuery = "SELECT * FROM MyTable WHERE
TEXT_CONTAINS(QUOTES_NATIVE_MV, 'convi.*ced') LIMIT 50000";
+ luceneMVQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE_MV,
'convi*ced') LIMIT 50000";
+ testSelectionResults(nativeMVQuery, luceneMVQuery);
+
+ nativeMVQuery = "SELECT * FROM MyTable WHERE
TEXT_CONTAINS(QUOTES_NATIVE_MV, 'vicomte') AND "
+ + "TEXT_CONTAINS(QUOTES_NATIVE_MV, 'hos.*') LIMIT 50000";
+ luceneMVQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE_MV,
'vicomte AND hos*') LIMIT 50000";
+ testSelectionResults(nativeMVQuery, luceneMVQuery);
+
+ nativeMVQuery = "SELECT * FROM MyTable WHERE
TEXT_CONTAINS(QUOTES_NATIVE_MV, 'sac.*') OR "
+ + "TEXT_CONTAINS(QUOTES_NATIVE_MV, 'herself') LIMIT 50000";
+ luceneMVQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE_MV,
'sac* OR herself') LIMIT 50000";
+ testSelectionResults(nativeMVQuery, luceneMVQuery);
+
+ nativeMVQuery = "SELECT * FROM MyTable WHERE
TEXT_CONTAINS(QUOTES_NATIVE_MV, 'vicomte') LIMIT 50000";
+ luceneMVQuery = "SELECT * FROM MyTable WHERE TEXT_MATCH(QUOTES_LUCENE_MV,
'vicomte') LIMIT 50000";
+ testSelectionResults(nativeMVQuery, luceneMVQuery);
}
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeMutableTextIndex.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeMutableTextIndex.java
index b78f2dc296..1e56c57c87 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeMutableTextIndex.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeMutableTextIndex.java
@@ -63,9 +63,20 @@ public class NativeMutableTextIndex implements
MutableTextIndex {
@Override
public void add(String document) {
- Iterable<String> tokens;
+ addHelper(document);
+ _nextDocId++;
+ }
+
+ @Override
+ public void add(String[] documents) {
+ for (String document : documents) {
+ addHelper(document);
+ }
+ _nextDocId++;
+ }
- tokens = analyze(document);
+ private void addHelper(String document) {
+ Iterable<String> tokens = analyze(document);
_writeLock.lock();
try {
for (String token : tokens) {
@@ -76,17 +87,11 @@ public class NativeMutableTextIndex implements
MutableTextIndex {
});
_invertedIndex.add(currentDictId, _nextDocId);
}
- _nextDocId++;
} finally {
_writeLock.unlock();
}
}
- @Override
- public void add(String[] documents) {
- throw new UnsupportedOperationException("Mutable native text indexes are
not supported for multi-valued columns");
- }
-
@Override
public ImmutableRoaringBitmap getDictIds(String searchQuery) {
throw new UnsupportedOperationException();
@@ -114,8 +119,8 @@ public class NativeMutableTextIndex implements
MutableTextIndex {
private List<String> analyze(String document) {
List<String> tokens = new ArrayList<>();
try (TokenStream tokenStream = _analyzer.tokenStream(_column, document)) {
- tokenStream.reset();
CharTermAttribute attribute =
tokenStream.getAttribute(CharTermAttribute.class);
+ tokenStream.reset();
while (tokenStream.incrementToken()) {
tokens.add(attribute.toString());
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/NativeTextIndexCreator.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/NativeTextIndexCreator.java
index d455c1a789..832801883d 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/NativeTextIndexCreator.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/NativeTextIndexCreator.java
@@ -67,6 +67,7 @@ public class NativeTextIndexCreator extends
AbstractTextIndexCreator {
private final File _tempDir;
private final File _fstIndexFile;
private final File _invertedIndexFile;
+ private final Analyzer _analyzer;
private final Map<String, RoaringBitmapWriter<RoaringBitmap>>
_postingListMap = new TreeMap<>();
private final RoaringBitmapWriter.Wizard<Container, RoaringBitmap>
_bitmapWriterWizard = RoaringBitmapWriter.writer();
private int _nextDocId = 0;
@@ -86,13 +87,27 @@ public class NativeTextIndexCreator extends
AbstractTextIndexCreator {
}
_fstIndexFile = new File(_tempDir, FST_FILE_NAME);
_invertedIndexFile = new File(_tempDir, INVERTED_INDEX_FILE_NAME);
+ _analyzer = new
StandardAnalyzer(LuceneTextIndexCreator.ENGLISH_STOP_WORDS_SET);
}
@Override
public void add(String document) {
+ addHelper(document);
+ _nextDocId++;
+ }
+
+ @Override
+ public void add(String[] documents, int length) {
+ for (int i = 0; i < length; i++) {
+ addHelper(documents[i]);
+ }
+ _nextDocId++;
+ }
+
+ private void addHelper(String document) {
List<String> tokens;
try {
- tokens = analyze(document, new
StandardAnalyzer(LuceneTextIndexCreator.ENGLISH_STOP_WORDS_SET));
+ tokens = analyze(document);
} catch (IOException e) {
throw new RuntimeException(e.getMessage());
}
@@ -100,13 +115,6 @@ public class NativeTextIndexCreator extends
AbstractTextIndexCreator {
for (String token : tokens) {
addToPostingList(token);
}
-
- _nextDocId++;
- }
-
- @Override
- public void add(String[] documents, int length) {
- throw new UnsupportedOperationException("Native text index is not
supported on MV column: " + _columnName);
}
@Override
@@ -132,17 +140,22 @@ public class NativeTextIndexCreator extends
AbstractTextIndexCreator {
@Override
public void close()
throws IOException {
+ _analyzer.close();
FileUtils.deleteDirectory(_tempDir);
}
- public List<String> analyze(String text, Analyzer analyzer)
+ public List<String> analyze(String text)
throws IOException {
List<String> result = new ArrayList<>();
- TokenStream tokenStream = analyzer.tokenStream(_columnName, text);
- CharTermAttribute attr = tokenStream.addAttribute(CharTermAttribute.class);
- tokenStream.reset();
- while (tokenStream.incrementToken()) {
- result.add(attr.toString());
+ try (TokenStream tokenStream = _analyzer.tokenStream(_columnName, text)) {
+ CharTermAttribute attr =
tokenStream.addAttribute(CharTermAttribute.class);
+ tokenStream.reset();
+ while (tokenStream.incrementToken()) {
+ result.add(attr.toString());
+ }
+ tokenStream.end();
+ } catch (IOException e) {
+ throw new RuntimeException("Caught exception while tokenizing the
document for column: " + _columnName, e);
}
return result;
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java
index 3650e3531f..a9cd64c91b 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java
@@ -59,7 +59,7 @@ public class NativeTextIndexReader implements TextIndexReader
{
PinotDataBuffer.mapFile(indexFile, /* readOnly */ true, 0,
indexFile.length(), ByteOrder.BIG_ENDIAN, desc);
populateIndexes();
} catch (Exception e) {
- LOGGER.error("Failed to instantiate Lucene text index reader for column
{}, exception {}", column,
+ LOGGER.error("Failed to instantiate native text index reader for column
{}, exception {}", column,
e.getMessage());
throw new RuntimeException(e);
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java
index a23d2f550e..54cd746708 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java
@@ -184,9 +184,6 @@ public class TextIndexType extends
AbstractIndexType<TextIndexConfig, TextIndexR
return null;
}
if (config.getFstType() == FSTType.NATIVE) {
- if (!context.getFieldSpec().isSingleValueField()) {
- return null;
- }
return new NativeMutableTextIndex(context.getFieldSpec().getName());
}
if (context.getConsumerDir() == null) {
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java
index 6345433d0c..2311943ef7 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java
@@ -19,7 +19,7 @@
package org.apache.pinot.segment.local.realtime.impl.invertedindex;
import java.io.File;
-import java.util.Arrays;
+import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.search.SearcherManager;
@@ -33,38 +33,66 @@ import static org.testng.Assert.assertEquals;
public class NativeAndLuceneMutableTextIndexTest {
private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(),
"RealTimeNativeVsLuceneTest");
private static final String TEXT_COLUMN_NAME = "testColumnName";
+ private static final String MV_TEXT_COLUMN_NAME = "testMVColumnName";
private RealtimeLuceneTextIndex _realtimeLuceneTextIndex;
private NativeMutableTextIndex _nativeMutableTextIndex;
- private List<String> getTextData() {
- return Arrays.asList("Prince Andrew kept looking with an amused smile from
Pierre",
- "vicomte and from the vicomte to their hostess. In the first moment
of",
- "Pierre’s outburst Anna Pávlovna, despite her social experience, was",
- "horror-struck. But when she saw that Pierre’s sacrilegious words",
- "had not exasperated the vicomte, and had convinced herself that it
was",
- "impossible to stop him, she rallied her forces and joined the vicomte
in", "a vigorous attack on the orator",
- "horror-struck. But when she", "she rallied her forces and joined",
"outburst Anna Pávlovna",
- "she rallied her forces and", "despite her social experience", "had
not exasperated the vicomte",
- " despite her social experience", "impossible to stop him", "despite
her social experience");
+ private RealtimeLuceneTextIndex _realtimeLuceneMVTextIndex;
+ private NativeMutableTextIndex _nativeMutableMVTextIndex;
+
+ private String[] getTextData() {
+ return new String[]{"Prince Andrew kept looking with an amused smile from
Pierre",
+ "vicomte and from the vicomte to their hostess. In the first moment of",
+ "Pierre’s outburst Anna Pávlovna, despite her social experience, was",
+ "horror-struck. But when she saw that Pierre’s sacrilegious words",
+ "had not exasperated the vicomte, and had convinced herself that it was",
+ "impossible to stop him, she rallied her forces and joined the vicomte
in", "a vigorous attack on the orator",
+ "horror-struck. But when she", "she rallied her forces and joined",
"outburst Anna Pávlovna",
+ "she rallied her forces and", "despite her social experience", "had not
exasperated the vicomte",
+ " despite her social experience", "impossible to stop him", "despite her
social experience"};
+ }
+
+ private String[][] getMVTextData() {
+ return new String[][]{{"Prince Andrew kept looking with an amused smile
from Pierre",
+ "vicomte and from the vicomte to their hostess. In the first moment
of"}, {
+ "Pierre’s outburst Anna Pávlovna, despite her social experience, was",
+ "horror-struck. But when she saw that Pierre’s sacrilegious words"}, {
+ "had not exasperated the vicomte, and had convinced herself that it
was"}, {
+ "impossible to stop him, she rallied her forces and joined the vicomte
in", "a vigorous attack on the orator",
+ "horror-struck. But when she", "she rallied her forces and joined",
"outburst Anna Pávlovna"}, {
+ "she rallied her forces and", "despite her social experience", "had not
exasperated the vicomte",
+ " despite her social experience", "impossible to stop him", "despite
her social experience"}};
}
@BeforeClass
public void setUp()
throws Exception {
- _realtimeLuceneTextIndex = new RealtimeLuceneTextIndex(TEXT_COLUMN_NAME,
INDEX_DIR, "fooBar", null,
- null);
+ _realtimeLuceneTextIndex = new RealtimeLuceneTextIndex(TEXT_COLUMN_NAME,
INDEX_DIR, "fooBar", null, null);
_nativeMutableTextIndex = new NativeMutableTextIndex(TEXT_COLUMN_NAME);
- List<String> documents = getTextData();
+ _realtimeLuceneMVTextIndex = new
RealtimeLuceneTextIndex(MV_TEXT_COLUMN_NAME, INDEX_DIR, "fooBar", null, null);
+ _nativeMutableMVTextIndex = new
NativeMutableTextIndex(MV_TEXT_COLUMN_NAME);
+
+ String[] documents = getTextData();
for (String doc : documents) {
_realtimeLuceneTextIndex.add(doc);
_nativeMutableTextIndex.add(doc);
}
- SearcherManager searcherManager =
_realtimeLuceneTextIndex.getSearcherManager();
+ String[][] mvDocuments = getMVTextData();
+ for (String[] mvDoc : mvDocuments) {
+ _realtimeLuceneMVTextIndex.add(mvDoc);
+ _nativeMutableMVTextIndex.add(mvDoc);
+ }
+
+ List<SearcherManager> searcherManagers = new ArrayList<>();
+ searcherManagers.add(_realtimeLuceneTextIndex.getSearcherManager());
+ searcherManagers.add(_realtimeLuceneMVTextIndex.getSearcherManager());
try {
- searcherManager.maybeRefresh();
+ for (SearcherManager searcherManager : searcherManagers) {
+ searcherManager.maybeRefresh();
+ }
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -100,5 +128,6 @@ public class NativeAndLuceneMutableTextIndexTest {
private void testSelectionResults(String nativeQuery, String luceneQuery) {
assertEquals(_nativeMutableTextIndex.getDocIds(nativeQuery),
_realtimeLuceneTextIndex.getDocIds(luceneQuery));
+ assertEquals(_nativeMutableMVTextIndex.getDocIds(nativeQuery),
_realtimeLuceneMVTextIndex.getDocIds(luceneQuery));
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]