This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new ec1d187475 Exclude dimensions from star-tree index stored type check
(#13355)
ec1d187475 is described below
commit ec1d1874758a7a3ef30402c12de62e789223f8a0
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Tue Jun 11 10:16:58 2024 -0700
Exclude dimensions from star-tree index stored type check (#13355)
---
.../pinot/core/startree/v2/BaseStarTreeV2Test.java | 58 +++++++-------
.../segment/store/SegmentLocalFSDirectory.java | 3 +-
.../local/segment/store/StarTreeIndexReader.java | 22 ++----
.../startree/v2/builder/MultipleTreesBuilder.java | 16 ++--
.../v2/builder/StarTreeIndexSeparator.java | 55 ++++----------
.../startree/v2/store/StarTreeIndexMapUtils.java | 32 ++++----
.../startree/v2/store/StarTreeLoaderUtils.java | 1 +
.../store/ColumnIndexDirectoryTestHelper.java | 1 +
.../v2/builder/StarTreeIndexSeparatorTest.java | 88 ++++++++--------------
.../apache/pinot/segment/spi/SegmentMetadata.java | 1 +
.../spi/index/metadata/SegmentMetadataImpl.java | 1 +
11 files changed, 115 insertions(+), 163 deletions(-)
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java
b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java
index 847d9e1568..732b8bc570 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java
@@ -95,40 +95,43 @@ abstract class BaseStarTreeV2Test<R, A> {
private static final int MAX_LEAF_RECORDS = RANDOM.nextInt(100) + 1;
// Using column names with '__' to make sure regular table columns with '__'
in the name aren't wrongly interpreted
// as AggregationFunctionColumnPair
- private static final String DIMENSION_D1 = "d1__COLUMN_NAME";
- private static final String DIMENSION_D2 = "__d2";
+ private static final String DIMENSION1 = "d1__COLUMN_NAME";
+ private static final String DIMENSION2 = "DISTINCTCOUNTRAWHLL__d2";
private static final int DIMENSION_CARDINALITY = 100;
private static final String METRIC = "m";
// Supported filters
- private static final String QUERY_FILTER_AND = " WHERE d1__COLUMN_NAME = 0
AND __d2 < 10";
+ private static final String QUERY_FILTER_AND = String.format(" WHERE %1$s =
0 AND %2$s < 10", DIMENSION1, DIMENSION2);
// StarTree supports OR predicates only on a single dimension
- private static final String QUERY_FILTER_OR = " WHERE d1__COLUMN_NAME > 10
OR d1__COLUMN_NAME < 50";
- private static final String QUERY_FILTER_NOT = " WHERE NOT d1__COLUMN_NAME >
10";
- private static final String QUERY_FILTER_AND_NOT = " WHERE d1__COLUMN_NAME >
10 AND NOT __d2 < 10";
- private static final String QUERY_FILTER_OR_NOT = " WHERE d1__COLUMN_NAME >
50 OR NOT d1__COLUMN_NAME > 10";
- private static final String QUERY_NOT_NOT = " WHERE NOT NOT d1__COLUMN_NAME
> 10";
+ private static final String QUERY_FILTER_OR = String.format(" WHERE %1$s >
10 OR %1$s < 50", DIMENSION1);
+ private static final String QUERY_FILTER_NOT = String.format(" WHERE NOT %s
> 10", DIMENSION1);
+ private static final String QUERY_FILTER_AND_NOT =
+ String.format(" WHERE %1$s > 10 AND NOT %2$s < 10", DIMENSION1,
DIMENSION2);
+ private static final String QUERY_FILTER_OR_NOT = String.format(" WHERE %1$s
> 50 OR NOT %1$s > 10", DIMENSION1);
+ private static final String QUERY_NOT_NOT = String.format(" WHERE NOT NOT %s
> 10", DIMENSION1);
private static final String QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS =
- " WHERE __d2 < 95 AND (NOT d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME >
50)";
+ String.format(" WHERE %2$s < 95 AND (NOT %1$s > 10 OR %1$s > 50)",
DIMENSION1, DIMENSION2);
private static final String
QUERY_FILTER_COMPLEX_AND_MULTIPLE_DIMENSIONS_THREE_PREDICATES =
- " WHERE __d2 < 95 AND NOT __d2 < 25 AND (d1__COLUMN_NAME > 10 OR
d1__COLUMN_NAME < 50)";
+ String.format(" WHERE %2$s < 95 AND NOT %2$s < 25 AND (%1$s > 10 OR %1$s
< 50)", DIMENSION1, DIMENSION2);
private static final String
QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS_THREE_PREDICATES =
- " WHERE (__d2 > 95 OR __d2 < 25) AND (d1__COLUMN_NAME > 10 OR
d1__COLUMN_NAME < 50)";
+ String.format(" WHERE (%2$s > 95 OR %2$s < 25) AND (%1$s > 10 OR %1$s <
50)", DIMENSION1, DIMENSION2);
private static final String QUERY_FILTER_COMPLEX_OR_SINGLE_DIMENSION =
- " WHERE NOT d1__COLUMN_NAME = 95 AND (d1__COLUMN_NAME > 90 OR
d1__COLUMN_NAME < 100)";
+ String.format(" WHERE NOT %1$s = 95 AND (%1$s > 90 OR %1$s < 100)",
DIMENSION1);
// Unsupported filters
- private static final String QUERY_FILTER_OR_MULTIPLE_DIMENSIONS = " WHERE
d1__COLUMN_NAME > 10 OR __d2 < 50";
+ private static final String QUERY_FILTER_OR_MULTIPLE_DIMENSIONS =
+ String.format(" WHERE %1$s > 10 OR %2$s < 50", DIMENSION1, DIMENSION2);
private static final String QUERY_FILTER_OR_ON_AND =
- " WHERE (d1__COLUMN_NAME > 10 AND d1__COLUMN_NAME < 50) OR
d1__COLUMN_NAME < 50";
- private static final String QUERY_FILTER_NOT_ON_AND = " WHERE NOT
(d1__COLUMN_NAME > 10 AND d1__COLUMN_NAME < 50)";
- private static final String QUERY_FILTER_NOT_ON_OR = " WHERE NOT
(d1__COLUMN_NAME < 10 OR d1__COLUMN_NAME > 50)";
+ String.format(" WHERE (%1$s > 10 AND %1$s < 50) OR %1$s < 50",
DIMENSION1);
+ private static final String QUERY_FILTER_NOT_ON_AND =
+ String.format(" WHERE NOT (%1$s > 10 AND %1$s < 50)", DIMENSION1);
+ private static final String QUERY_FILTER_NOT_ON_OR = String.format(" WHERE
NOT (%1$s < 10 OR %1$s > 50)", DIMENSION1);
// Always false filters
- private static final String QUERY_FILTER_ALWAYS_FALSE = " WHERE
d1__COLUMN_NAME > 100";
- private static final String QUERY_FILTER_OR_ALWAYS_FALSE = " WHERE
d1__COLUMN_NAME > 100 OR d1__COLUMN_NAME < 0";
+ private static final String QUERY_FILTER_ALWAYS_FALSE = String.format("
WHERE %s > 100", DIMENSION1);
+ private static final String QUERY_FILTER_OR_ALWAYS_FALSE = String.format("
WHERE %1$s > 100 OR %1$s < 0", DIMENSION1);
- private static final String QUERY_GROUP_BY = " GROUP BY __d2";
- private static final String FILTER_AGG_CLAUSE = " FILTER(WHERE
d1__COLUMN_NAME > 10)";
+ private static final String QUERY_GROUP_BY = " GROUP BY " + DIMENSION2;
+ private static final String FILTER_AGG_CLAUSE = String.format(" FILTER(WHERE
%s > 10)", DIMENSION1);
private ValueAggregator _valueAggregator;
private DataType _aggregatedValueType;
@@ -143,8 +146,8 @@ abstract class BaseStarTreeV2Test<R, A> {
_aggregatedValueType = _valueAggregator.getAggregatedValueType();
_aggregation = getAggregation(_valueAggregator.getAggregationType());
- Schema.SchemaBuilder schemaBuilder = new
Schema.SchemaBuilder().addSingleValueDimension(DIMENSION_D1, DataType.INT)
- .addSingleValueDimension(DIMENSION_D2, DataType.INT);
+ Schema.SchemaBuilder schemaBuilder = new
Schema.SchemaBuilder().addSingleValueDimension(DIMENSION1, DataType.INT)
+ .addSingleValueDimension(DIMENSION2, DataType.INT);
DataType rawValueType = getRawValueType();
// Raw value type will be null for COUNT aggregation function
if (rawValueType != null) {
@@ -156,8 +159,8 @@ abstract class BaseStarTreeV2Test<R, A> {
List<GenericRow> segmentRecords = new ArrayList<>(NUM_SEGMENT_RECORDS);
for (int i = 0; i < NUM_SEGMENT_RECORDS; i++) {
GenericRow segmentRecord = new GenericRow();
- segmentRecord.putValue(DIMENSION_D1,
RANDOM.nextInt(DIMENSION_CARDINALITY));
- segmentRecord.putValue(DIMENSION_D2,
RANDOM.nextInt(DIMENSION_CARDINALITY));
+ segmentRecord.putValue(DIMENSION1,
RANDOM.nextInt(DIMENSION_CARDINALITY));
+ segmentRecord.putValue(DIMENSION2,
RANDOM.nextInt(DIMENSION_CARDINALITY));
if (rawValueType != null) {
segmentRecord.putValue(METRIC, getRandomRawValue(RANDOM));
}
@@ -171,10 +174,9 @@ abstract class BaseStarTreeV2Test<R, A> {
driver.init(segmentGeneratorConfig, new
GenericRowRecordReader(segmentRecords));
driver.build();
- StarTreeIndexConfig starTreeIndexConfig =
- new StarTreeIndexConfig(Arrays.asList(DIMENSION_D1, DIMENSION_D2),
null, null, Collections.singletonList(
- new StarTreeAggregationConfig(METRIC,
_valueAggregator.getAggregationType().getName(),
- getCompressionCodec(), true, getIndexVersion(), null, null)),
MAX_LEAF_RECORDS);
+ StarTreeIndexConfig starTreeIndexConfig = new
StarTreeIndexConfig(Arrays.asList(DIMENSION1, DIMENSION2), null, null,
+ Collections.singletonList(new StarTreeAggregationConfig(METRIC,
_valueAggregator.getAggregationType().getName(),
+ getCompressionCodec(), true, getIndexVersion(), null, null)),
MAX_LEAF_RECORDS);
File indexDir = new File(TEMP_DIR, SEGMENT_NAME);
// Randomly build star-tree using on-heap or off-heap mode
MultipleTreesBuilder.BuildMode buildMode =
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SegmentLocalFSDirectory.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SegmentLocalFSDirectory.java
index e158219a08..297bf036f1 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SegmentLocalFSDirectory.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SegmentLocalFSDirectory.java
@@ -28,7 +28,6 @@ import java.util.Collections;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import javax.annotation.Nullable;
-import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.configuration2.ex.ConfigurationException;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.segment.spi.creator.SegmentVersion;
@@ -268,7 +267,7 @@ public class SegmentLocalFSDirectory extends
SegmentDirectory {
default:
break;
}
- if
(CollectionUtils.isNotEmpty(_segmentMetadata.getStarTreeV2MetadataList())) {
+ if (_segmentMetadata.getStarTreeV2MetadataList() != null) {
try {
_starTreeIndexReader = new StarTreeIndexReader(_segmentDirectory,
_segmentMetadata, _readMode);
} catch (ConfigurationException e) {
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReader.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReader.java
index 4c10f46143..537a16d835 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReader.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReader.java
@@ -18,7 +18,6 @@
*/
package org.apache.pinot.segment.local.segment.store;
-import com.google.common.base.Preconditions;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
@@ -51,10 +50,10 @@ public class StarTreeIndexReader implements Closeable {
private static final Logger LOGGER =
LoggerFactory.getLogger(StarTreeIndexReader.class);
private final File _segmentDirectory;
- private final SegmentMetadataImpl _segmentMetadata;
+ private final List<StarTreeV2Metadata> _starTreeMetadataList;
+ private final int _numStarTrees;
private final ReadMode _readMode;
private final File _indexFile;
- private final int _numStarTrees;
// StarTree index can contain multiple index instances, identified by ids
like 0, 1, etc.
private final List<Map<IndexKey, StarTreeIndexEntry>> _indexColumnEntries;
@@ -67,17 +66,11 @@ public class StarTreeIndexReader implements Closeable {
*/
public StarTreeIndexReader(File segmentDirectory, SegmentMetadataImpl
segmentMetadata, ReadMode readMode)
throws IOException, ConfigurationException {
- Preconditions.checkNotNull(segmentDirectory);
- Preconditions.checkArgument(segmentDirectory.exists(), "SegmentDirectory:
" + segmentDirectory + " does not exist");
- Preconditions.checkArgument(segmentDirectory.isDirectory(),
- "SegmentDirectory: " + segmentDirectory + " is not a directory");
- Preconditions.checkNotNull(segmentMetadata);
- Preconditions.checkNotNull(readMode);
-
_segmentDirectory = segmentDirectory;
- _segmentMetadata = segmentMetadata;
+ _starTreeMetadataList = segmentMetadata.getStarTreeV2MetadataList();
+ assert _starTreeMetadataList != null;
+ _numStarTrees = _starTreeMetadataList.size();
_readMode = readMode;
- _numStarTrees = _segmentMetadata.getStarTreeV2MetadataList().size();
_indexFile = new File(_segmentDirectory,
StarTreeV2Constants.INDEX_FILE_NAME);
_indexColumnEntries = new ArrayList<>(_numStarTrees);
load();
@@ -88,7 +81,7 @@ public class StarTreeIndexReader implements Closeable {
List<Map<StarTreeIndexMapUtils.IndexKey,
StarTreeIndexMapUtils.IndexValue>> indexMapList;
try (InputStream inputStream = new FileInputStream(
new File(_segmentDirectory, StarTreeV2Constants.INDEX_MAP_FILE_NAME)))
{
- indexMapList = StarTreeIndexMapUtils.loadFromInputStream(inputStream,
_numStarTrees);
+ indexMapList = StarTreeIndexMapUtils.loadFromInputStream(inputStream,
_starTreeMetadataList);
}
if (_readMode == ReadMode.heap) {
_dataBuffer = PinotDataBuffer.loadFile(_indexFile, 0,
_indexFile.length(), ByteOrder.LITTLE_ENDIAN,
@@ -112,8 +105,7 @@ public class StarTreeIndexReader implements Closeable {
columnEntries.put(new IndexKey(String.valueOf(starTreeId),
StandardIndexes.inverted()),
new
StarTreeIndexEntry(indexMap.get(StarTreeIndexMapUtils.STAR_TREE_INDEX_KEY),
_dataBuffer,
ByteOrder.LITTLE_ENDIAN));
- List<StarTreeV2Metadata> starTreeMetadataList =
_segmentMetadata.getStarTreeV2MetadataList();
- StarTreeV2Metadata starTreeMetadata = starTreeMetadataList.get(starTreeId);
+ StarTreeV2Metadata starTreeMetadata =
_starTreeMetadataList.get(starTreeId);
// Load dimension forward indexes
for (String dimension : starTreeMetadata.getDimensionsSplitOrder()) {
columnEntries.put(new IndexKey(dimension, StandardIndexes.forward()),
new StarTreeIndexEntry(
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
index ee060d946b..50dfc8b56e 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
@@ -38,8 +38,10 @@ import
org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils.In
import
org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils.IndexValue;
import org.apache.pinot.segment.spi.ImmutableSegment;
import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants;
import
org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants.MetadataKey;
+import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata;
import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths;
import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
import org.apache.pinot.spi.env.CommonsConfigurationUtils;
@@ -60,6 +62,7 @@ public class MultipleTreesBuilder implements Closeable {
private final List<StarTreeV2BuilderConfig> _builderConfigs;
private final BuildMode _buildMode;
+ private final File _indexDir;
private final File _segmentDirectory;
private final PropertiesConfiguration _metadataProperties;
private final ImmutableSegment _segment;
@@ -82,10 +85,10 @@ public class MultipleTreesBuilder implements Closeable {
Preconditions.checkArgument(CollectionUtils.isNotEmpty(builderConfigs),
"Must provide star-tree builder configs");
_builderConfigs = builderConfigs;
_buildMode = buildMode;
+ _indexDir = indexDir;
_segmentDirectory = SegmentDirectoryPaths.findSegmentDirectory(indexDir);
_metadataProperties =
- CommonsConfigurationUtils.fromFile(
- new File(_segmentDirectory,
V1Constants.MetadataKeys.METADATA_FILE_NAME));
+ CommonsConfigurationUtils.fromFile(new File(_segmentDirectory,
V1Constants.MetadataKeys.METADATA_FILE_NAME));
_separator = getSeparator();
_segment = ImmutableSegmentLoader.load(indexDir, ReadMode.mmap);
}
@@ -104,10 +107,10 @@ public class MultipleTreesBuilder implements Closeable {
Preconditions.checkArgument(CollectionUtils.isNotEmpty(indexConfigs) ||
enableDefaultStarTree,
"Must provide star-tree index configs or enable default star-tree");
_buildMode = buildMode;
+ _indexDir = indexDir;
_segmentDirectory = SegmentDirectoryPaths.findSegmentDirectory(indexDir);
_metadataProperties =
- CommonsConfigurationUtils.fromFile(
- new File(_segmentDirectory,
V1Constants.MetadataKeys.METADATA_FILE_NAME));
+ CommonsConfigurationUtils.fromFile(new File(_segmentDirectory,
V1Constants.MetadataKeys.METADATA_FILE_NAME));
Preconditions.checkState(!_metadataProperties.containsKey(MetadataKey.STAR_TREE_COUNT),
"Star-tree already exists");
_segment = ImmutableSegmentLoader.load(indexDir, ReadMode.mmap);
try {
@@ -122,7 +125,8 @@ public class MultipleTreesBuilder implements Closeable {
@Nullable
private StarTreeIndexSeparator getSeparator()
throws Exception {
- if (!_metadataProperties.containsKey(MetadataKey.STAR_TREE_COUNT)) {
+ List<StarTreeV2Metadata> starTreeMetadataList = new
SegmentMetadataImpl(_indexDir).getStarTreeV2MetadataList();
+ if (starTreeMetadataList == null) {
return null;
}
try {
@@ -134,7 +138,7 @@ public class MultipleTreesBuilder implements Closeable {
_separatorTempDir, false);
StarTreeIndexSeparator separator =
new StarTreeIndexSeparator(new File(_separatorTempDir,
StarTreeV2Constants.INDEX_MAP_FILE_NAME),
- new File(_separatorTempDir,
StarTreeV2Constants.INDEX_FILE_NAME), _metadataProperties);
+ new File(_separatorTempDir,
StarTreeV2Constants.INDEX_FILE_NAME), starTreeMetadataList);
_metadataProperties.subset(StarTreeV2Constants.MetadataKey.STAR_TREE_SUBSET).clear();
CommonsConfigurationUtils.saveToFile(_metadataProperties,
new File(_segmentDirectory,
V1Constants.MetadataKeys.METADATA_FILE_NAME));
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java
index c017ea4b6a..29e677f5d5 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparator.java
@@ -29,8 +29,6 @@ import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import org.apache.commons.configuration2.Configuration;
-import org.apache.commons.configuration2.PropertiesConfiguration;
import org.apache.commons.configuration2.ex.ConfigurationException;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils;
@@ -44,53 +42,26 @@ import
org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata;
* The {@code StarTreeIndexSeparator} pulls out the individual star-trees from
the common star-tree index file
*/
public class StarTreeIndexSeparator implements Closeable {
-
- private final FileChannel _indexFileChannel;
private final List<Map<StarTreeIndexMapUtils.IndexKey,
StarTreeIndexMapUtils.IndexValue>> _indexMapList;
private final List<StarTreeV2BuilderConfig> _builderConfigList;
- private final List<Integer> _totalDocsList;
+ private final List<Integer> _numDocsList;
+ private final FileChannel _indexFileChannel;
- public StarTreeIndexSeparator(File indexMapFile, File indexFile,
PropertiesConfiguration metadataProperties)
+ public StarTreeIndexSeparator(File indexMapFile, File indexFile,
List<StarTreeV2Metadata> starTreeMetadataList)
throws IOException {
- _indexMapList =
- extractIndexMap(indexMapFile,
metadataProperties.getInt(StarTreeV2Constants.MetadataKey.STAR_TREE_COUNT));
- _indexFileChannel = new RandomAccessFile(indexFile, "r").getChannel();
- _builderConfigList = extractBuilderConfigs(metadataProperties);
- _totalDocsList = extractTotalDocsList(metadataProperties);
- }
-
- private List<Map<StarTreeIndexMapUtils.IndexKey,
StarTreeIndexMapUtils.IndexValue>> extractIndexMap(File indexMapFile,
- int numStarTrees) {
try (InputStream inputStream = new FileInputStream(indexMapFile)) {
- return StarTreeIndexMapUtils.loadFromInputStream(inputStream,
numStarTrees);
- } catch (IOException | ConfigurationException e) {
+ _indexMapList = StarTreeIndexMapUtils.loadFromInputStream(inputStream,
starTreeMetadataList);
+ } catch (ConfigurationException e) {
throw new RuntimeException(e);
}
- }
-
- public List<Integer> extractTotalDocsList(PropertiesConfiguration
metadataProperties) {
- List<Integer> totalDocsList = new ArrayList<>(_indexMapList.size());
- for (int i = 0; i < _indexMapList.size(); i++) {
- Configuration metadata =
metadataProperties.subset(StarTreeV2Constants.MetadataKey.getStarTreePrefix(i));
- totalDocsList.add(i,
metadata.getInt(StarTreeV2Constants.MetadataKey.TOTAL_DOCS));
- }
- return totalDocsList;
- }
-
- /**
- * Extract the list of {@link StarTreeV2BuilderConfig} for each of the
star-tree present in the given metadata
- * properties.
- * @param metadataProperties index metadata properties
- * @return List of {@link StarTreeV2BuilderConfig}
- */
- public List<StarTreeV2BuilderConfig>
extractBuilderConfigs(PropertiesConfiguration metadataProperties) {
- List<StarTreeV2BuilderConfig> builderConfigList = new
ArrayList<>(_indexMapList.size());
- for (int i = 0; i < _indexMapList.size(); i++) {
- Configuration metadata =
metadataProperties.subset(StarTreeV2Constants.MetadataKey.getStarTreePrefix(i));
- StarTreeV2Metadata starTreeV2Metadata = new StarTreeV2Metadata(metadata);
- builderConfigList.add(i,
StarTreeV2BuilderConfig.fromMetadata(starTreeV2Metadata));
+ int numStarTrees = starTreeMetadataList.size();
+ _builderConfigList = new ArrayList<>(numStarTrees);
+ _numDocsList = new ArrayList<>(numStarTrees);
+ for (StarTreeV2Metadata starTreeMetadata : starTreeMetadataList) {
+
_builderConfigList.add(StarTreeV2BuilderConfig.fromMetadata(starTreeMetadata));
+ _numDocsList.add(starTreeMetadata.getNumDocs());
}
- return builderConfigList;
+ _indexFileChannel = new RandomAccessFile(indexFile, "r").getChannel();
}
/**
@@ -109,7 +80,7 @@ public class StarTreeIndexSeparator implements Closeable {
return -1;
}
separate(starTreeOutputDir, treeIndex);
- return _totalDocsList.get(treeIndex);
+ return _numDocsList.get(treeIndex);
}
private void separate(File starTreeOutputDir, int treeIndex)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
index 7ac45d3b18..fdc6e3703b 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
@@ -24,16 +24,18 @@ import java.io.InputStream;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
-import javax.annotation.Nonnull;
+import java.util.Set;
import javax.annotation.Nullable;
import org.apache.commons.configuration2.PropertiesConfiguration;
import org.apache.commons.configuration2.ex.ConfigurationException;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import
org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair;
+import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata;
import org.apache.pinot.spi.env.CommonsConfigurationUtils;
@@ -119,10 +121,8 @@ public class StarTreeIndexMapUtils {
@Override
public int compareTo(IndexKey other) {
- return Comparator
- .comparing((IndexKey i) -> i._column,
Comparator.nullsLast(Comparator.naturalOrder()))
- .thenComparing((IndexKey i) -> i._indexType)
- .compare(this, other);
+ return Comparator.comparing((IndexKey i) -> i._column,
Comparator.nullsLast(Comparator.naturalOrder()))
+ .thenComparing((IndexKey i) -> i._indexType).compare(this, other);
}
}
@@ -142,7 +142,7 @@ public class StarTreeIndexMapUtils {
}
@Override
- public int compareTo(@Nonnull IndexValue o) {
+ public int compareTo(IndexValue o) {
return Long.compare(_offset, o._offset);
}
}
@@ -171,10 +171,15 @@ public class StarTreeIndexMapUtils {
/**
* Loads the index maps for multiple star-trees from an input stream.
*/
- public static List<Map<IndexKey, IndexValue>>
loadFromInputStream(InputStream indexMapInputStream, int numStarTrees)
+ public static List<Map<IndexKey, IndexValue>>
loadFromInputStream(InputStream indexMapInputStream,
+ List<StarTreeV2Metadata> starTreeMetadataList)
throws ConfigurationException {
+ assert starTreeMetadataList != null;
+ int numStarTrees = starTreeMetadataList.size();
+ List<Set<String>> dimensionSets = new ArrayList<>(numStarTrees);
List<Map<IndexKey, IndexValue>> indexMaps = new ArrayList<>(numStarTrees);
- for (int i = 0; i < numStarTrees; i++) {
+ for (StarTreeV2Metadata starTreeMetadata : starTreeMetadataList) {
+ dimensionSets.add(new
HashSet<>(starTreeMetadata.getDimensionsSplitOrder()));
indexMaps.add(new HashMap<>());
}
@@ -182,6 +187,7 @@ public class StarTreeIndexMapUtils {
for (String key : CommonsConfigurationUtils.getKeys(configuration)) {
String[] split = StringUtils.split(key, KEY_SEPARATOR);
int starTreeId = Integer.parseInt(split[0]);
+ Set<String> dimensionSet = dimensionSets.get(starTreeId);
Map<IndexKey, IndexValue> indexMap = indexMaps.get(starTreeId);
int columnSplitEndIndex = split.length - 2;
@@ -198,13 +204,9 @@ public class StarTreeIndexMapUtils {
column = StringUtils.join(split, KEY_SEPARATOR, 1,
columnSplitEndIndex);
}
// Convert metric (function-column pair) to stored name for
backward-compatibility
- if (column.contains(AggregationFunctionColumnPair.DELIMITER)) {
- try {
- AggregationFunctionColumnPair functionColumnPair =
AggregationFunctionColumnPair.fromColumnName(column);
- column =
AggregationFunctionColumnPair.resolveToStoredType(functionColumnPair).toColumnName();
- } catch (Exception e) {
- // Ignoring this exception for columns that are not metric
(function-column pair)
- }
+ if (!dimensionSet.contains(column)) {
+ AggregationFunctionColumnPair functionColumnPair =
AggregationFunctionColumnPair.fromColumnName(column);
+ column =
AggregationFunctionColumnPair.resolveToStoredType(functionColumnPair).toColumnName();
}
indexKey = new IndexKey(IndexType.FORWARD_INDEX, column);
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeLoaderUtils.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeLoaderUtils.java
index e798f249d0..f7adc1a07d 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeLoaderUtils.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeLoaderUtils.java
@@ -55,6 +55,7 @@ public class StarTreeLoaderUtils {
SegmentMetadataImpl segmentMetadata, Map<String, ColumnIndexContainer>
indexContainerMap)
throws IOException {
List<StarTreeV2Metadata> starTreeMetadataList =
segmentMetadata.getStarTreeV2MetadataList();
+ assert starTreeMetadataList != null;
int numStarTrees = starTreeMetadataList.size();
List<StarTreeV2> starTrees = new ArrayList<>(numStarTrees);
for (int i = 0; i < numStarTrees; i++) {
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/ColumnIndexDirectoryTestHelper.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/ColumnIndexDirectoryTestHelper.java
index a18b24f591..b6718ee460 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/ColumnIndexDirectoryTestHelper.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/ColumnIndexDirectoryTestHelper.java
@@ -89,6 +89,7 @@ public class ColumnIndexDirectoryTestHelper {
static SegmentMetadataImpl writeMetadata(SegmentVersion version) {
SegmentMetadataImpl segmentMetadata =
Mockito.mock(SegmentMetadataImpl.class);
when(segmentMetadata.getVersion()).thenReturn(version);
+ when(segmentMetadata.getStarTreeV2MetadataList()).thenReturn(null);
ColumnMetadata columnMetadata = Mockito.mock(ColumnMetadata.class);
when(columnMetadata.isSingleValue()).thenReturn(true);
when(columnMetadata.isSorted()).thenReturn(false);
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparatorTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparatorTest.java
index b830493b8d..2e87c58e5d 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparatorTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexSeparatorTest.java
@@ -18,85 +18,63 @@
*/
package org.apache.pinot.segment.local.startree.v2.builder;
-import com.google.common.collect.Lists;
import java.io.File;
-import java.io.IOException;
import java.net.URL;
import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
-import java.util.Objects;
-import org.apache.commons.configuration2.PropertiesConfiguration;
-import org.apache.commons.configuration2.ex.ConfigurationException;
+import java.util.Set;
import org.apache.commons.io.FileUtils;
-import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants;
import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
-import org.apache.pinot.spi.env.CommonsConfigurationUtils;
+import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import static
org.apache.pinot.segment.spi.V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION;
import static
org.apache.pinot.segment.spi.V1Constants.Indexes.UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION;
import static
org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants.STAR_TREE_INDEX_FILE_NAME;
-import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.Assert.assertNotNull;
import static org.testng.AssertJUnit.assertTrue;
public class StarTreeIndexSeparatorTest {
-
private static final String SEGMENT_PATH = "data/startree/segment";
- private static final String TOTAL_DOCS_KEY = "startree.v2.0.total.docs";
-
- private StarTreeIndexSeparator _separator;
- private PropertiesConfiguration _metadataProperties;
- private final StarTreeV2BuilderConfig _builderConfig =
StarTreeV2BuilderConfig.fromIndexConfig(
- new StarTreeIndexConfig(
- Lists.newArrayList("AirlineID", "Origin", "Dest"),
- Lists.newArrayList(),
- Lists.newArrayList("count__*", "max__ArrDelay"),
- null,
- 10));
+ private static final StarTreeV2BuilderConfig BUILDER_CONFIG =
StarTreeV2BuilderConfig.fromIndexConfig(
+ new StarTreeIndexConfig(List.of("AirlineID", "Origin", "Dest"),
List.of(), List.of("count__*", "max__ArrDelay"),
+ null, 10));
+ private static final File TEMP_DIR = new File(FileUtils.getTempDirectory(),
"StarTreeIndexSeparatorTest");
@BeforeClass
- public void setup()
- throws IOException, ConfigurationException {
- ClassLoader classLoader = getClass().getClassLoader();
- URL segmentUrl = classLoader.getResource(SEGMENT_PATH);
- File segmentDir = new File(segmentUrl.getFile());
- _metadataProperties = CommonsConfigurationUtils.fromFile(
- new File(segmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME));
- _separator = new StarTreeIndexSeparator(
- new File(segmentDir, StarTreeV2Constants.INDEX_MAP_FILE_NAME),
- new File(segmentDir, StarTreeV2Constants.INDEX_FILE_NAME),
- _metadataProperties);
- }
-
- @Test
- public void extractTotalDocsListTest() {
- assertNotNull(_separator);
- List<Integer> docsList =
_separator.extractTotalDocsList(_metadataProperties);
- assertNotNull(docsList);
- assertEquals(docsList,
Lists.newArrayList(_metadataProperties.getInt(TOTAL_DOCS_KEY)));
+ public void setUp() {
+ FileUtils.deleteQuietly(TEMP_DIR);
}
- @Test
- public void extractBuilderConfigsTest() {
- List<StarTreeV2BuilderConfig> builderConfigList =
_separator.extractBuilderConfigs(_metadataProperties);
- assertEquals(builderConfigList, Lists.newArrayList(_builderConfig));
+ @AfterClass
+ public void tearDown() {
+ FileUtils.deleteQuietly(TEMP_DIR);
}
@Test
- public void separateTest()
- throws IOException {
- File tempDir = new File(FileUtils.getTempDirectory(), "separateTest");
- _separator.separate(tempDir, _builderConfig);
- List<String> files = Arrays.asList(Objects.requireNonNull(tempDir.list()));
- assertTrue(files.contains(STAR_TREE_INDEX_FILE_NAME));
- _builderConfig.getDimensionsSplitOrder()
- .forEach(dimension -> assertTrue(files.contains(dimension +
UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION)));
- _builderConfig.getFunctionColumnPairs()
- .forEach(dimension -> assertTrue(files.contains(dimension +
RAW_SV_FORWARD_INDEX_FILE_EXTENSION)));
- FileUtils.forceDelete(tempDir);
+ public void testSeparate()
+ throws Exception {
+ URL segmentUrl = getClass().getClassLoader().getResource(SEGMENT_PATH);
+ assertNotNull(segmentUrl);
+ File segmentDir = new File(segmentUrl.getFile());
+ try (StarTreeIndexSeparator separator = new StarTreeIndexSeparator(
+ new File(segmentDir, StarTreeV2Constants.INDEX_MAP_FILE_NAME),
+ new File(segmentDir, StarTreeV2Constants.INDEX_FILE_NAME),
+ new SegmentMetadataImpl(segmentDir).getStarTreeV2MetadataList())) {
+ separator.separate(TEMP_DIR, BUILDER_CONFIG);
+ }
+ String[] fileNames = TEMP_DIR.list();
+ assertNotNull(fileNames);
+ Set<String> fileNameSet = new HashSet<>(Arrays.asList(fileNames));
+ assertTrue(fileNameSet.contains(STAR_TREE_INDEX_FILE_NAME));
+ BUILDER_CONFIG.getDimensionsSplitOrder()
+ .forEach(dimension -> assertTrue(fileNameSet.contains(dimension +
UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION)));
+ BUILDER_CONFIG.getFunctionColumnPairs()
+ .forEach(dimension -> assertTrue(fileNameSet.contains(dimension +
RAW_SV_FORWARD_INDEX_FILE_EXTENSION)));
}
}
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/SegmentMetadata.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/SegmentMetadata.java
index 63f897e0a7..b9a1d23700 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/SegmentMetadata.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/SegmentMetadata.java
@@ -92,6 +92,7 @@ public interface SegmentMetadata {
*/
long getLatestIngestionTimestamp();
+ @Nullable
List<StarTreeV2Metadata> getStarTreeV2MetadataList();
Map<String, String> getCustomMap();
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java
index b0bb5d04f0..d0b7c84d34 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java
@@ -388,6 +388,7 @@ public class SegmentMetadataImpl implements SegmentMetadata
{
return Long.MIN_VALUE;
}
+ @Nullable
@Override
public List<StarTreeV2Metadata> getStarTreeV2MetadataList() {
return _starTreeV2MetadataList;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]