This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 970622eb9d Test Classes Refactoring for setters of _textIndexColumns
and ColumnProperties (IndexLoadingConfig) (#14153)
970622eb9d is described below
commit 970622eb9dfbb30d08caef851ccc6956391865ad
Author: Chaitanya Deepthi <[email protected]>
AuthorDate: Fri Oct 4 23:18:43 2024 -0700
Test Classes Refactoring for setters of _textIndexColumns and
ColumnProperties (IndexLoadingConfig) (#14153)
---
.../pinot/queries/FastFilteredCountTest.java | 23 +++---
.../queries/NativeAndLuceneComparisonTest.java | 85 ++++++++------------
.../NoDictionaryCompressionQueriesTest.java | 22 ++----
.../pinot/queries/TextSearchQueriesTest.java | 92 +++++++++++++---------
.../perf/BenchmarkNativeVsLuceneTextIndex.java | 54 ++++---------
5 files changed, 123 insertions(+), 153 deletions(-)
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/FastFilteredCountTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/FastFilteredCountTest.java
index 864461aa82..c5d4d62fca 100644
---
a/pinot-core/src/test/java/org/apache/pinot/queries/FastFilteredCountTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/FastFilteredCountTest.java
@@ -22,8 +22,6 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
import java.util.List;
import java.util.stream.IntStream;
import org.apache.commons.io.FileUtils;
@@ -36,6 +34,7 @@ import
org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
import org.apache.pinot.segment.spi.ImmutableSegment;
import org.apache.pinot.segment.spi.IndexSegment;
import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
+import org.apache.pinot.spi.config.table.FieldConfig;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.data.FieldSpec;
@@ -123,16 +122,20 @@ public class FastFilteredCountTest extends
BaseQueriesTest {
driver.init(segmentGeneratorConfig, new GenericRowRecordReader(records));
driver.build();
- IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
- indexLoadingConfig.setInvertedIndexColumns(new
HashSet<>(Arrays.asList(CLASSIFICATION_COLUMN, SORTED_COLUMN)));
- indexLoadingConfig.setTextIndexColumns(Collections.singleton(TEXT_COLUMN));
- indexLoadingConfig.setJsonIndexColumns(Collections.singleton(JSON_COLUMN));
-
indexLoadingConfig.setRangeIndexColumns(Collections.singleton(INT_RANGE_COLUMN));
+ List<FieldConfig> fieldConfigs = List.of(
+ new FieldConfig(TEXT_COLUMN, FieldConfig.EncodingType.DICTIONARY,
FieldConfig.IndexType.TEXT, null, null));
- ImmutableSegment immutableSegment = ImmutableSegmentLoader.load(new
File(INDEX_DIR, SEGMENT_NAME),
- indexLoadingConfig);
+ TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME)
+ .setInvertedIndexColumns(List.of(CLASSIFICATION_COLUMN, SORTED_COLUMN))
+
.setJsonIndexColumns(List.of(JSON_COLUMN)).setRangeIndexColumns(List.of(INT_RANGE_COLUMN))
+ .setFieldConfigList(fieldConfigs).build();
+
+ IndexLoadingConfig indexLoadingConfig = new
IndexLoadingConfig(tableConfig, SCHEMA);
+
+ ImmutableSegment immutableSegment =
+ ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME),
indexLoadingConfig);
_indexSegment = immutableSegment;
- _indexSegments = Arrays.asList(immutableSegment, immutableSegment);
+ _indexSegments = List.of(immutableSegment, immutableSegment);
}
@AfterClass
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
index a066a45dc4..041f315b9d 100644
---
a/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/NativeAndLuceneComparisonTest.java
@@ -20,12 +20,8 @@ package org.apache.pinot.queries;
import java.io.File;
import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.core.common.Operator;
@@ -155,17 +151,13 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
private void buildLuceneSegment()
throws Exception {
List<GenericRow> rows = createTestData(NUM_ROWS);
- List<FieldConfig> fieldConfigs = new ArrayList<>();
-
- fieldConfigs.add(
- new FieldConfig(QUOTES_COL_LUCENE,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
- null));
- fieldConfigs.add(
+ List<FieldConfig> fieldConfigs = List.of(
+ new FieldConfig(QUOTES_COL_LUCENE,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null, null),
new FieldConfig(QUOTES_COL_LUCENE_MV,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
null));
TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
- .setInvertedIndexColumns(Arrays.asList(QUOTES_COL_LUCENE,
QUOTES_COL_LUCENE_MV))
+ .setInvertedIndexColumns(List.of(QUOTES_COL_LUCENE,
QUOTES_COL_LUCENE_MV))
.setFieldConfigList(fieldConfigs).build();
Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
.addSingleValueDimension(QUOTES_COL_LUCENE, FieldSpec.DataType.STRING)
@@ -185,21 +177,14 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
private void buildNativeTextIndexSegment()
throws Exception {
List<GenericRow> rows = createTestData(NUM_ROWS);
- List<FieldConfig> fieldConfigs = new ArrayList<>();
- Map<String, String> propertiesMap = new HashMap<>();
- FSTType fstType = FSTType.NATIVE;
-
- propertiesMap.put(FieldConfig.TEXT_FST_TYPE,
FieldConfig.TEXT_NATIVE_FST_LITERAL);
-
- fieldConfigs.add(
+ List<FieldConfig> fieldConfigs = List.of(
new FieldConfig(QUOTES_COL_NATIVE,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
- propertiesMap));
- fieldConfigs.add(
+ Map.of(FieldConfig.TEXT_FST_TYPE,
FieldConfig.TEXT_NATIVE_FST_LITERAL)),
new FieldConfig(QUOTES_COL_NATIVE_MV,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
- propertiesMap));
+ Map.of(FieldConfig.TEXT_FST_TYPE,
FieldConfig.TEXT_NATIVE_FST_LITERAL)));
TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
- .setInvertedIndexColumns(Arrays.asList(QUOTES_COL_NATIVE,
QUOTES_COL_NATIVE_MV))
+ .setInvertedIndexColumns(List.of(QUOTES_COL_NATIVE,
QUOTES_COL_NATIVE_MV))
.setFieldConfigList(fieldConfigs).build();
Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
.addSingleValueDimension(QUOTES_COL_NATIVE, FieldSpec.DataType.STRING)
@@ -208,7 +193,7 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
config.setOutDir(INDEX_DIR.getPath());
config.setTableName(TABLE_NAME);
config.setSegmentName(SEGMENT_NAME_NATIVE);
- config.setFSTIndexType(fstType);
+ config.setFSTIndexType(FSTType.NATIVE);
SegmentIndexCreationDriverImpl driver = new
SegmentIndexCreationDriverImpl();
try (RecordReader recordReader = new GenericRowRecordReader(rows)) {
@@ -219,51 +204,47 @@ public class NativeAndLuceneComparisonTest extends
BaseQueriesTest {
private ImmutableSegment loadLuceneSegment()
throws Exception {
- IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
- Set<String> textIndexCols = new HashSet<>();
- textIndexCols.add(QUOTES_COL_LUCENE);
- textIndexCols.add(QUOTES_COL_LUCENE_MV);
- indexLoadingConfig.setTextIndexColumns(textIndexCols);
- Set<String> invertedIndexCols = new HashSet<>();
- invertedIndexCols.add(QUOTES_COL_LUCENE);
- invertedIndexCols.add(QUOTES_COL_LUCENE_MV);
- indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
+ List<FieldConfig> fieldConfigs = List.of(
+ new FieldConfig(QUOTES_COL_LUCENE,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null, null),
+ new FieldConfig(QUOTES_COL_LUCENE_MV,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+ null));
+ TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setInvertedIndexColumns(List.of(QUOTES_COL_LUCENE,
QUOTES_COL_LUCENE_MV))
+ .setFieldConfigList(fieldConfigs).build();
+ Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension(QUOTES_COL_LUCENE, FieldSpec.DataType.STRING)
+ .addMultiValueDimension(QUOTES_COL_LUCENE_MV,
FieldSpec.DataType.STRING).build();
+ IndexLoadingConfig indexLoadingConfig = new
IndexLoadingConfig(tableConfig, schema);
return ImmutableSegmentLoader.load(new File(INDEX_DIR,
SEGMENT_NAME_LUCENE), indexLoadingConfig);
}
private ImmutableSegment loadNativeIndexSegment()
throws Exception {
- IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
- Map<String, String> propertiesMap = new HashMap<>();
- FSTType fstType = FSTType.NATIVE;
- propertiesMap.put(FieldConfig.TEXT_FST_TYPE,
FieldConfig.TEXT_NATIVE_FST_LITERAL);
-
- Map<String, Map<String, String>> columnPropertiesParentMap = new
HashMap<>();
- Set<String> textIndexCols = new HashSet<>();
- textIndexCols.add(QUOTES_COL_NATIVE);
- textIndexCols.add(QUOTES_COL_NATIVE_MV);
- indexLoadingConfig.setTextIndexColumns(textIndexCols);
- indexLoadingConfig.setFSTIndexType(fstType);
- Set<String> invertedIndexCols = new HashSet<>();
- invertedIndexCols.add(QUOTES_COL_NATIVE);
- invertedIndexCols.add(QUOTES_COL_NATIVE_MV);
- indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
- columnPropertiesParentMap.put(QUOTES_COL_NATIVE, propertiesMap);
- columnPropertiesParentMap.put(QUOTES_COL_NATIVE_MV, propertiesMap);
- indexLoadingConfig.setColumnProperties(columnPropertiesParentMap);
+ List<FieldConfig> fieldConfigs = List.of(
+ new FieldConfig(QUOTES_COL_NATIVE,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+ Map.of(FieldConfig.TEXT_FST_TYPE,
FieldConfig.TEXT_NATIVE_FST_LITERAL)),
+ new FieldConfig(QUOTES_COL_NATIVE_MV,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+ Map.of(FieldConfig.TEXT_FST_TYPE,
FieldConfig.TEXT_NATIVE_FST_LITERAL)));
+ TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setInvertedIndexColumns(List.of(QUOTES_COL_NATIVE,
QUOTES_COL_NATIVE_MV)).setFieldConfigList(fieldConfigs)
+ .build();
+ Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension(QUOTES_COL_NATIVE, FieldSpec.DataType.STRING)
+ .addMultiValueDimension(QUOTES_COL_NATIVE_MV,
FieldSpec.DataType.STRING).build();
+ IndexLoadingConfig indexLoadingConfig = new
IndexLoadingConfig(tableConfig, schema);
return ImmutableSegmentLoader.load(new File(INDEX_DIR,
SEGMENT_NAME_NATIVE), indexLoadingConfig);
}
private void testSelectionResults(String nativeQuery, String luceneQuery) {
_indexSegment = _nativeIndexSegment;
- _indexSegments = Arrays.asList(_nativeIndexSegment);
+ _indexSegments = List.of(_nativeIndexSegment);
Operator<SelectionResultsBlock> operator = getOperator(nativeQuery);
SelectionResultsBlock operatorResult = operator.nextBlock();
List<Object[]> resultset = (List<Object[]>) operatorResult.getRows();
Assert.assertNotNull(resultset);
_indexSegment = _luceneSegment;
- _indexSegments = Arrays.asList(_luceneSegment);
+ _indexSegments = List.of(_luceneSegment);
operator = getOperator(luceneQuery);
operatorResult = operator.nextBlock();
List<Object[]> resultset2 = (List<Object[]>) operatorResult.getRows();
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/NoDictionaryCompressionQueriesTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/NoDictionaryCompressionQueriesTest.java
index c9ec8fa948..d1f09d9ad3 100644
---
a/pinot-core/src/test/java/org/apache/pinot/queries/NoDictionaryCompressionQueriesTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/NoDictionaryCompressionQueriesTest.java
@@ -23,10 +23,8 @@ import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
-import java.util.HashSet;
import java.util.List;
import java.util.Random;
-import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.pinot.core.operator.blocks.results.SelectionResultsBlock;
@@ -96,6 +94,8 @@ public class NoDictionaryCompressionQueriesTest extends
BaseQueriesTest {
private IndexSegment _indexSegment;
private List<IndexSegment> _indexSegments;
private List<GenericRow> _rows;
+ private TableConfig _tableConfig;
+ private Schema _schema;
@Override
protected String getFilter() {
@@ -116,18 +116,8 @@ public class NoDictionaryCompressionQueriesTest extends
BaseQueriesTest {
public void setUp()
throws Exception {
FileUtils.deleteQuietly(INDEX_DIR);
-
buildSegment();
-
- IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
- Set<String> indexColumns = new HashSet<>();
- indexColumns.addAll(RAW_SNAPPY_INDEX_COLUMNS);
- indexColumns.addAll(RAW_PASS_THROUGH_INDEX_COLUMNS);
- indexColumns.addAll(RAW_ZSTANDARD_INDEX_COLUMNS);
- indexColumns.addAll(RAW_LZ4_INDEX_COLUMNS);
- indexColumns.addAll(RAW_GZIP_INDEX_COLUMNS);
-
- indexLoadingConfig.addNoDictionaryColumns(indexColumns);
+ IndexLoadingConfig indexLoadingConfig = new
IndexLoadingConfig(_tableConfig, _schema);
ImmutableSegment immutableSegment =
ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME),
indexLoadingConfig);
_indexSegment = immutableSegment;
@@ -180,10 +170,10 @@ public class NoDictionaryCompressionQueriesTest extends
BaseQueriesTest {
noDictionaryColumns.addAll(RAW_LZ4_INDEX_COLUMNS);
noDictionaryColumns.addAll(RAW_GZIP_INDEX_COLUMNS);
- TableConfig tableConfig =
+ _tableConfig =
new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setNoDictionaryColumns(noDictionaryColumns)
.setFieldConfigList(fieldConfigs).build();
- Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ _schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
.addSingleValueDimension(SNAPPY_STRING, FieldSpec.DataType.STRING)
.addSingleValueDimension(PASS_THROUGH_STRING,
FieldSpec.DataType.STRING)
.addSingleValueDimension(ZSTANDARD_STRING, FieldSpec.DataType.STRING)
@@ -199,7 +189,7 @@ public class NoDictionaryCompressionQueriesTest extends
BaseQueriesTest {
.addSingleValueDimension(PASS_THROUGH_LONG, FieldSpec.DataType.LONG)
.addSingleValueDimension(LZ4_LONG, FieldSpec.DataType.LONG)
.addSingleValueDimension(GZIP_LONG, FieldSpec.DataType.LONG).build();
- SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig,
schema);
+ SegmentGeneratorConfig config = new SegmentGeneratorConfig(_tableConfig,
_schema);
config.setOutDir(INDEX_DIR.getPath());
config.setTableName(TABLE_NAME);
config.setSegmentName(SEGMENT_NAME);
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
index 217e099003..4fdc2df8de 100644
---
a/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
@@ -29,12 +29,10 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Random;
-import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -78,6 +76,7 @@ import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.data.readers.RecordReader;
import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.jetbrains.annotations.NotNull;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@@ -115,9 +114,19 @@ public class TextSearchQueriesTest extends BaseQueriesTest
{
private static final List<String> DICT_TEXT_INDEX_COLUMNS =
Arrays.asList(SKILLS_TEXT_COL_DICT_NAME, SKILLS_TEXT_MV_COL_DICT_NAME);
private static final int INT_BASE_VALUE = 1000;
+ private static final Schema SCHEMA = new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension(QUERY_LOG_TEXT_COL_NAME,
FieldSpec.DataType.STRING)
+ .addSingleValueDimension(SKILLS_TEXT_COL_NAME, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(SKILLS_TEXT_COL_DICT_NAME,
FieldSpec.DataType.STRING)
+ .addSingleValueDimension(SKILLS_TEXT_COL_MULTI_TERM_NAME,
FieldSpec.DataType.STRING)
+ .addSingleValueDimension(SKILLS_TEXT_NO_RAW_NAME,
FieldSpec.DataType.STRING)
+ .addMultiValueDimension(SKILLS_TEXT_MV_COL_NAME,
FieldSpec.DataType.STRING)
+ .addMultiValueDimension(SKILLS_TEXT_MV_COL_DICT_NAME,
FieldSpec.DataType.STRING)
+ .addMetric(INT_COL_NAME, FieldSpec.DataType.INT).build();
private IndexSegment _indexSegment;
private List<IndexSegment> _indexSegments;
+ private TableConfig _tableConfig;
@Override
protected String getFilter() {
@@ -138,31 +147,49 @@ public class TextSearchQueriesTest extends
BaseQueriesTest {
public void setUp()
throws Exception {
FileUtils.deleteQuietly(INDEX_DIR);
-
buildSegment();
- IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
- Set<String> textIndexColumns = new HashSet<>();
- textIndexColumns.addAll(RAW_TEXT_INDEX_COLUMNS);
- textIndexColumns.addAll(DICT_TEXT_INDEX_COLUMNS);
- indexLoadingConfig.setTextIndexColumns(textIndexColumns);
- indexLoadingConfig.setInvertedIndexColumns(new
HashSet<>(DICT_TEXT_INDEX_COLUMNS));
- Map<String, Map<String, String>> columnProperties = new HashMap<>();
+ List<FieldConfig> fieldConfigs = createFieldConfigs();
+ _tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+
.setNoDictionaryColumns(RAW_TEXT_INDEX_COLUMNS).setInvertedIndexColumns(DICT_TEXT_INDEX_COLUMNS)
+ .setFieldConfigList(fieldConfigs).build();
+ IndexLoadingConfig indexLoadingConfig = new
IndexLoadingConfig(_tableConfig, SCHEMA);
+ ImmutableSegment immutableSegment =
+ ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME),
indexLoadingConfig);
+ _indexSegment = immutableSegment;
+ _indexSegments = Arrays.asList(immutableSegment, immutableSegment);
+ }
+
+ private static @NotNull List<FieldConfig> createFieldConfigs() {
+ List<FieldConfig> fieldConfigs = new ArrayList<>();
Map<String, String> props = new HashMap<>();
props.put(FieldConfig.TEXT_INDEX_USE_AND_FOR_MULTI_TERM_QUERIES, "true");
- columnProperties.put(SKILLS_TEXT_COL_MULTI_TERM_NAME, props);
+ fieldConfigs.add(new FieldConfig(SKILLS_TEXT_COL_MULTI_TERM_NAME,
FieldConfig.EncodingType.DICTIONARY,
+ FieldConfig.IndexType.TEXT, null, props));
+ fieldConfigs.add(
+ new FieldConfig(QUERY_LOG_TEXT_COL_NAME,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+ null));
props = new HashMap<>();
props.put(FieldConfig.TEXT_INDEX_STOP_WORD_INCLUDE_KEY, "coordinator");
props.put(FieldConfig.TEXT_INDEX_STOP_WORD_EXCLUDE_KEY, "it, those");
props.put(FieldConfig.TEXT_INDEX_ENABLE_PREFIX_SUFFIX_PHRASE_QUERIES,
"true");
- columnProperties.put(SKILLS_TEXT_COL_NAME, props);
+ fieldConfigs.add(
+ new FieldConfig(SKILLS_TEXT_COL_NAME,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+ props));
+ fieldConfigs.add(
+ new FieldConfig(SKILLS_TEXT_NO_RAW_NAME,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+ null));
+ fieldConfigs.add(
+ new FieldConfig(SKILLS_TEXT_MV_COL_NAME,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
+ null));
props = new HashMap<>();
props.put(FieldConfig.TEXT_INDEX_STOP_WORD_EXCLUDE_KEY, "");
- columnProperties.put(SKILLS_TEXT_COL_DICT_NAME, props);
- indexLoadingConfig.setColumnProperties(columnProperties);
- ImmutableSegment immutableSegment =
- ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME),
indexLoadingConfig);
- _indexSegment = immutableSegment;
- _indexSegments = Arrays.asList(immutableSegment, immutableSegment);
+ fieldConfigs.add(
+ new FieldConfig(SKILLS_TEXT_COL_DICT_NAME,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT,
+ null, props));
+ fieldConfigs.add(
+ new FieldConfig(SKILLS_TEXT_MV_COL_DICT_NAME,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT,
+ null, null));
+ return fieldConfigs;
}
@AfterClass
@@ -185,31 +212,20 @@ public class TextSearchQueriesTest extends
BaseQueriesTest {
new FieldConfig(textIndexColumn,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, null,
null));
}
- TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ _tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
.setNoDictionaryColumns(RAW_TEXT_INDEX_COLUMNS).setInvertedIndexColumns(DICT_TEXT_INDEX_COLUMNS)
.setFieldConfigList(fieldConfigs).build();
- Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
- .addSingleValueDimension(QUERY_LOG_TEXT_COL_NAME,
FieldSpec.DataType.STRING)
- .addSingleValueDimension(SKILLS_TEXT_COL_NAME,
FieldSpec.DataType.STRING)
- .addSingleValueDimension(SKILLS_TEXT_COL_DICT_NAME,
FieldSpec.DataType.STRING)
- .addSingleValueDimension(SKILLS_TEXT_COL_MULTI_TERM_NAME,
FieldSpec.DataType.STRING)
- .addSingleValueDimension(SKILLS_TEXT_NO_RAW_NAME,
FieldSpec.DataType.STRING)
- .addMultiValueDimension(SKILLS_TEXT_MV_COL_NAME,
FieldSpec.DataType.STRING)
- .addMultiValueDimension(SKILLS_TEXT_MV_COL_DICT_NAME,
FieldSpec.DataType.STRING)
- .addMetric(INT_COL_NAME, FieldSpec.DataType.INT).build();
- SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig,
schema);
+ SegmentGeneratorConfig config = new SegmentGeneratorConfig(_tableConfig,
SCHEMA);
config.setOutDir(INDEX_DIR.getPath());
config.setTableName(TABLE_NAME);
config.setSegmentName(SEGMENT_NAME);
- addTextIndexProp(config, SKILLS_TEXT_NO_RAW_NAME, ImmutableMap.<String,
String>builder()
- .put(FieldConfig.TEXT_INDEX_NO_RAW_DATA, "true")
- .put(FieldConfig.TEXT_INDEX_RAW_VALUE, "ILoveCoding")
- .build());
- addTextIndexProp(config, SKILLS_TEXT_COL_NAME, ImmutableMap.<String,
String>builder()
- .put(FieldConfig.TEXT_INDEX_STOP_WORD_INCLUDE_KEY, "coordinator")
- .put(FieldConfig.TEXT_INDEX_STOP_WORD_EXCLUDE_KEY, "it, those")
- .put(FieldConfig.TEXT_INDEX_ENABLE_PREFIX_SUFFIX_PHRASE_QUERIES,
"true")
- .build());
+ addTextIndexProp(config, SKILLS_TEXT_NO_RAW_NAME,
+ ImmutableMap.<String,
String>builder().put(FieldConfig.TEXT_INDEX_NO_RAW_DATA, "true")
+ .put(FieldConfig.TEXT_INDEX_RAW_VALUE, "ILoveCoding").build());
+ addTextIndexProp(config, SKILLS_TEXT_COL_NAME,
+ ImmutableMap.<String,
String>builder().put(FieldConfig.TEXT_INDEX_STOP_WORD_INCLUDE_KEY,
"coordinator")
+ .put(FieldConfig.TEXT_INDEX_STOP_WORD_EXCLUDE_KEY, "it, those")
+ .put(FieldConfig.TEXT_INDEX_ENABLE_PREFIX_SUFFIX_PHRASE_QUERIES,
"true").build());
addTextIndexProp(config, SKILLS_TEXT_COL_DICT_NAME,
Collections.singletonMap(FieldConfig.TEXT_INDEX_STOP_WORD_EXCLUDE_KEY,
""));
SegmentIndexCreationDriverImpl driver = new
SegmentIndexCreationDriverImpl();
diff --git
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkNativeVsLuceneTextIndex.java
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkNativeVsLuceneTextIndex.java
index 649cc8ebb4..23e481a10e 100644
---
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkNativeVsLuceneTextIndex.java
+++
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkNativeVsLuceneTextIndex.java
@@ -22,10 +22,8 @@ import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.core.common.Operator;
@@ -85,6 +83,9 @@ public class BenchmarkNativeVsLuceneTextIndex {
+ "WHERE TEXT_CONTAINS(DOMAIN_NAMES_COL, 'sac.*') OR
TEXT_CONTAINS(DOMAIN_NAMES_COL, 'vic.*')";
private static final String LUCENE_QUERY =
"SELECT SUM(INT_COL) FROM MyTable WHERE TEXT_MATCH(DOMAIN_NAMES_COL,
'sac* OR vic*')";
+ private static final Schema SCHEMA = new
Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension(DOMAIN_NAMES_COL, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(INT_COL, FieldSpec.DataType.INT).build();
private IndexSegment _indexSegment;
@@ -147,23 +148,8 @@ public class BenchmarkNativeVsLuceneTextIndex {
private void buildSegment(FSTType fstType)
throws Exception {
List<GenericRow> rows = createTestData(_numRows);
- List<FieldConfig> fieldConfigs = new ArrayList<>();
- Map<String, String> propertiesMap = new HashMap<>();
-
- if (fstType == FSTType.NATIVE) {
- propertiesMap.put(FieldConfig.TEXT_FST_TYPE,
FieldConfig.TEXT_NATIVE_FST_LITERAL);
- }
-
- fieldConfigs.add(
- new FieldConfig(DOMAIN_NAMES_COL, FieldConfig.EncodingType.DICTIONARY,
FieldConfig.IndexType.TEXT, null,
- propertiesMap));
-
- TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
-
.setInvertedIndexColumns(Arrays.asList(DOMAIN_NAMES_COL)).setFieldConfigList(fieldConfigs).build();
- Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
- .addSingleValueDimension(DOMAIN_NAMES_COL, FieldSpec.DataType.STRING)
- .addSingleValueDimension(INT_COL, FieldSpec.DataType.INT).build();
- SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig,
schema);
+ TableConfig tableConfig = getTableConfig(fstType);
+ SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig,
SCHEMA);
config.setOutDir(INDEX_DIR.getPath());
config.setTableName(TABLE_NAME);
config.setSegmentName(SEGMENT_NAME_NATIVE);
@@ -178,28 +164,22 @@ public class BenchmarkNativeVsLuceneTextIndex {
private ImmutableSegment loadSegment(FSTType fstType)
throws Exception {
- IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
- Map<String, String> propertiesMap = new HashMap<>();
+ TableConfig tableConfig = getTableConfig(fstType);
+ IndexLoadingConfig indexLoadingConfig = new
IndexLoadingConfig(tableConfig, SCHEMA);
+ return ImmutableSegmentLoader.load(new File(INDEX_DIR,
SEGMENT_NAME_NATIVE), indexLoadingConfig);
+ }
+ private static TableConfig getTableConfig(FSTType fstType) {
+ Map<String, String> propertiesMap = new HashMap<>();
if (fstType == FSTType.NATIVE) {
propertiesMap.put(FieldConfig.TEXT_FST_TYPE,
FieldConfig.TEXT_NATIVE_FST_LITERAL);
}
-
- Set<String> textIndexCols = new HashSet<>();
- textIndexCols.add(DOMAIN_NAMES_COL);
- indexLoadingConfig.setTextIndexColumns(textIndexCols);
- indexLoadingConfig.setFSTIndexType(fstType);
- Set<String> invertedIndexCols = new HashSet<>();
- invertedIndexCols.add(DOMAIN_NAMES_COL);
- indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
-
- if (fstType == FSTType.NATIVE) {
- Map<String, Map<String, String>> columnPropertiesParentMap = new
HashMap<>();
- columnPropertiesParentMap.put(DOMAIN_NAMES_COL, propertiesMap);
- indexLoadingConfig.setColumnProperties(columnPropertiesParentMap);
- }
-
- return ImmutableSegmentLoader.load(new File(INDEX_DIR,
SEGMENT_NAME_NATIVE), indexLoadingConfig);
+ List<FieldConfig> fieldConfigs = List.of(
+ new FieldConfig(DOMAIN_NAMES_COL, FieldConfig.EncodingType.DICTIONARY,
FieldConfig.IndexType.TEXT, null,
+ fstType == FSTType.NATIVE ? propertiesMap : null));
+ TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+
.setInvertedIndexColumns(List.of(DOMAIN_NAMES_COL)).setFieldConfigList(fieldConfigs).build();
+ return tableConfig;
}
@Benchmark
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]