siddharthteotia commented on a change in pull request #7885:
URL: https://github.com/apache/pinot/pull/7885#discussion_r770163310
##########
File path:
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
##########
@@ -179,126 +167,71 @@ public void init(SegmentGeneratorConfig
segmentCreationSpec, SegmentIndexCreatio
}
String columnName = fieldSpec.getName();
- DataType storedType = fieldSpec.getDataType().getStoredType();
- ColumnIndexCreationInfo indexCreationInfo =
indexCreationInfoMap.get(columnName);
- Preconditions.checkNotNull(indexCreationInfo, "Missing index creation
info for column: %s", columnName);
- boolean dictEnabledColumn = createDictionaryForColumn(indexCreationInfo,
segmentCreationSpec, fieldSpec);
-
+ ColumnIndexCreationInfo columnIndexCreationInfo =
indexCreationInfoMap.get(columnName);
+ Preconditions.checkNotNull(columnIndexCreationInfo, "Missing index
creation info for column: %s", columnName);
+ boolean dictEnabledColumn =
createDictionaryForColumn(columnIndexCreationInfo, segmentCreationSpec,
fieldSpec);
+ Preconditions.checkState(dictEnabledColumn ||
!invertedIndexColumns.contains(columnName),
+ "Cannot create inverted index for raw index column: %s", columnName);
+ ColumnMetadata columnMetadata = ColumnMetadataImpl.builder()
+ .setCardinality(columnIndexCreationInfo.getDistinctValueCount())
+ .setHasDictionary(dictEnabledColumn)
+ .setFieldSpec(fieldSpec)
+ .setTotalDocs(segmentIndexCreationInfo.getTotalDocs())
+
.setTotalNumberOfEntries(columnIndexCreationInfo.getTotalNumberOfEntries())
+ .setSorted(columnIndexCreationInfo.isSorted())
+ .build();
+ IndexCreationContext context = IndexCreationContext.builder()
+ .withIndexDir(_indexDir)
+ .withColumnMetadata(columnMetadata)
+ .withSegmentGeneratorConfig(segmentCreationSpec)
+ .withColumnIndexCreationInfo(columnIndexCreationInfo)
+ .withFSTType(_config.getFSTIndexType())
+ .withH3IndexConfig(h3IndexConfigs.get(columnName))
+ .withCompressionType(dictEnabledColumn ? null :
getColumnCompressionType(segmentCreationSpec, fieldSpec))
+ .build();
+ // Initialize forward index creator
+ _forwardIndexCreatorMap.put(columnName,
_indexCreatorProvider.newForwardIndexCreator(context));
+
+ // Initialize inverted index creator; skip creating inverted index if
sorted
+ if (invertedIndexColumns.contains(columnName) &&
!columnIndexCreationInfo.isSorted()) {
+ _invertedIndexCreatorMap.put(columnName,
_indexCreatorProvider.newInvertedIndexCreator(context));
+ }
if (dictEnabledColumn) {
// Create dictionary-encoded index
-
// Initialize dictionary creator
SegmentDictionaryCreator dictionaryCreator =
- new
SegmentDictionaryCreator(indexCreationInfo.getSortedUniqueElementsArray(),
fieldSpec, _indexDir,
- indexCreationInfo.isUseVarLengthDictionary());
+ new
SegmentDictionaryCreator(columnIndexCreationInfo.getSortedUniqueElementsArray(),
fieldSpec, _indexDir,
+ columnIndexCreationInfo.isUseVarLengthDictionary());
_dictionaryCreatorMap.put(columnName, dictionaryCreator);
-
// Create dictionary
try {
dictionaryCreator.build();
} catch (Exception e) {
LOGGER.error("Error building dictionary for field: {}, cardinality:
{}, number of bytes per entry: {}",
- fieldSpec.getName(), indexCreationInfo.getDistinctValueCount(),
dictionaryCreator.getNumBytesPerEntry());
+ fieldSpec.getName(),
columnIndexCreationInfo.getDistinctValueCount(),
+ dictionaryCreator.getNumBytesPerEntry());
throw e;
}
-
- // Initialize forward index creator
- int cardinality = indexCreationInfo.getDistinctValueCount();
- if (fieldSpec.isSingleValueField()) {
- if (indexCreationInfo.isSorted()) {
- _forwardIndexCreatorMap.put(columnName,
- new SingleValueSortedForwardIndexCreator(_indexDir,
columnName, cardinality));
- } else {
- _forwardIndexCreatorMap.put(columnName,
- new SingleValueUnsortedForwardIndexCreator(_indexDir,
columnName, cardinality, _totalDocs));
- }
- } else {
- _forwardIndexCreatorMap.put(columnName,
- new MultiValueUnsortedForwardIndexCreator(_indexDir, columnName,
cardinality, _totalDocs,
- indexCreationInfo.getTotalNumberOfEntries()));
- }
-
- // Initialize inverted index creator; skip creating inverted index if
sorted
- if (invertedIndexColumns.contains(columnName) &&
!indexCreationInfo.isSorted()) {
- if (segmentCreationSpec.isOnHeap()) {
- _invertedIndexCreatorMap.put(columnName,
- new OnHeapBitmapInvertedIndexCreator(_indexDir, columnName,
cardinality));
- } else {
- _invertedIndexCreatorMap.put(columnName,
- new OffHeapBitmapInvertedIndexCreator(_indexDir, fieldSpec,
cardinality, _totalDocs,
- indexCreationInfo.getTotalNumberOfEntries()));
- }
- }
- } else {
- // Create raw index
- Preconditions.checkState(!invertedIndexColumns.contains(columnName),
- "Cannot create inverted index for raw index column: %s",
columnName);
-
- ChunkCompressionType compressionType =
getColumnCompressionType(segmentCreationSpec, fieldSpec);
-
- // Initialize forward index creator
- boolean deriveNumDocsPerChunk =
- shouldDeriveNumDocsPerChunk(columnName,
segmentCreationSpec.getColumnProperties());
- int writerVersion = rawIndexWriterVersion(columnName,
segmentCreationSpec.getColumnProperties());
- if (fieldSpec.isSingleValueField()) {
- _forwardIndexCreatorMap.put(columnName,
- getRawIndexCreatorForSVColumn(_indexDir, compressionType,
columnName, storedType, _totalDocs,
- indexCreationInfo.getLengthOfLongestEntry(),
deriveNumDocsPerChunk, writerVersion));
- } else {
- _forwardIndexCreatorMap.put(columnName,
- getRawIndexCreatorForMVColumn(_indexDir, compressionType,
columnName, storedType, _totalDocs,
- indexCreationInfo.getMaxNumberOfMultiValueElements(),
deriveNumDocsPerChunk, writerVersion,
- indexCreationInfo.getMaxRowLengthInBytes()));
- }
}
if (textIndexColumns.contains(columnName)) {
// Initialize text index creator
- Preconditions.checkState(storedType == DataType.STRING,
+ Preconditions.checkState(fieldSpec.getDataType().getStoredType() ==
FieldSpec.DataType.STRING,
"Text index is currently only supported on STRING type columns");
_textIndexCreatorMap.put(columnName,
new LuceneTextIndexCreator(columnName, _indexDir, true /*
commitOnClose */));
Review comment:
I don't follow this. When I reviewed it last time,
TextIndexCreatorProvider supported both lucene text fst -
https://github.com/apache/pinot/pull/7885#discussion_r768304406
Why do we have to exclude it ? The same distinction that we have today
between lucene text index, lucene fst and native fst should be carried into
this interface. When native fst was designed, we had agreed that the other 2
are not going to go away from the code (although users might be provided a
migration path if need be). So, since they will continue to exist,
TextIndexCreatorProvider should support all flavors imo.
##########
File path:
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
##########
@@ -179,126 +167,71 @@ public void init(SegmentGeneratorConfig
segmentCreationSpec, SegmentIndexCreatio
}
String columnName = fieldSpec.getName();
- DataType storedType = fieldSpec.getDataType().getStoredType();
- ColumnIndexCreationInfo indexCreationInfo =
indexCreationInfoMap.get(columnName);
- Preconditions.checkNotNull(indexCreationInfo, "Missing index creation
info for column: %s", columnName);
- boolean dictEnabledColumn = createDictionaryForColumn(indexCreationInfo,
segmentCreationSpec, fieldSpec);
-
+ ColumnIndexCreationInfo columnIndexCreationInfo =
indexCreationInfoMap.get(columnName);
+ Preconditions.checkNotNull(columnIndexCreationInfo, "Missing index
creation info for column: %s", columnName);
+ boolean dictEnabledColumn =
createDictionaryForColumn(columnIndexCreationInfo, segmentCreationSpec,
fieldSpec);
+ Preconditions.checkState(dictEnabledColumn ||
!invertedIndexColumns.contains(columnName),
+ "Cannot create inverted index for raw index column: %s", columnName);
+ ColumnMetadata columnMetadata = ColumnMetadataImpl.builder()
+ .setCardinality(columnIndexCreationInfo.getDistinctValueCount())
+ .setHasDictionary(dictEnabledColumn)
+ .setFieldSpec(fieldSpec)
+ .setTotalDocs(segmentIndexCreationInfo.getTotalDocs())
+
.setTotalNumberOfEntries(columnIndexCreationInfo.getTotalNumberOfEntries())
+ .setSorted(columnIndexCreationInfo.isSorted())
+ .build();
+ IndexCreationContext context = IndexCreationContext.builder()
+ .withIndexDir(_indexDir)
+ .withColumnMetadata(columnMetadata)
+ .withSegmentGeneratorConfig(segmentCreationSpec)
+ .withColumnIndexCreationInfo(columnIndexCreationInfo)
+ .withFSTType(_config.getFSTIndexType())
+ .withH3IndexConfig(h3IndexConfigs.get(columnName))
+ .withCompressionType(dictEnabledColumn ? null :
getColumnCompressionType(segmentCreationSpec, fieldSpec))
+ .build();
+ // Initialize forward index creator
+ _forwardIndexCreatorMap.put(columnName,
_indexCreatorProvider.newForwardIndexCreator(context));
+
+ // Initialize inverted index creator; skip creating inverted index if
sorted
+ if (invertedIndexColumns.contains(columnName) &&
!columnIndexCreationInfo.isSorted()) {
+ _invertedIndexCreatorMap.put(columnName,
_indexCreatorProvider.newInvertedIndexCreator(context));
+ }
if (dictEnabledColumn) {
// Create dictionary-encoded index
-
// Initialize dictionary creator
SegmentDictionaryCreator dictionaryCreator =
- new
SegmentDictionaryCreator(indexCreationInfo.getSortedUniqueElementsArray(),
fieldSpec, _indexDir,
- indexCreationInfo.isUseVarLengthDictionary());
+ new
SegmentDictionaryCreator(columnIndexCreationInfo.getSortedUniqueElementsArray(),
fieldSpec, _indexDir,
+ columnIndexCreationInfo.isUseVarLengthDictionary());
_dictionaryCreatorMap.put(columnName, dictionaryCreator);
-
// Create dictionary
try {
dictionaryCreator.build();
} catch (Exception e) {
LOGGER.error("Error building dictionary for field: {}, cardinality:
{}, number of bytes per entry: {}",
- fieldSpec.getName(), indexCreationInfo.getDistinctValueCount(),
dictionaryCreator.getNumBytesPerEntry());
+ fieldSpec.getName(),
columnIndexCreationInfo.getDistinctValueCount(),
+ dictionaryCreator.getNumBytesPerEntry());
throw e;
}
-
- // Initialize forward index creator
- int cardinality = indexCreationInfo.getDistinctValueCount();
- if (fieldSpec.isSingleValueField()) {
- if (indexCreationInfo.isSorted()) {
- _forwardIndexCreatorMap.put(columnName,
- new SingleValueSortedForwardIndexCreator(_indexDir,
columnName, cardinality));
- } else {
- _forwardIndexCreatorMap.put(columnName,
- new SingleValueUnsortedForwardIndexCreator(_indexDir,
columnName, cardinality, _totalDocs));
- }
- } else {
- _forwardIndexCreatorMap.put(columnName,
- new MultiValueUnsortedForwardIndexCreator(_indexDir, columnName,
cardinality, _totalDocs,
- indexCreationInfo.getTotalNumberOfEntries()));
- }
-
- // Initialize inverted index creator; skip creating inverted index if
sorted
- if (invertedIndexColumns.contains(columnName) &&
!indexCreationInfo.isSorted()) {
- if (segmentCreationSpec.isOnHeap()) {
- _invertedIndexCreatorMap.put(columnName,
- new OnHeapBitmapInvertedIndexCreator(_indexDir, columnName,
cardinality));
- } else {
- _invertedIndexCreatorMap.put(columnName,
- new OffHeapBitmapInvertedIndexCreator(_indexDir, fieldSpec,
cardinality, _totalDocs,
- indexCreationInfo.getTotalNumberOfEntries()));
- }
- }
- } else {
- // Create raw index
- Preconditions.checkState(!invertedIndexColumns.contains(columnName),
- "Cannot create inverted index for raw index column: %s",
columnName);
-
- ChunkCompressionType compressionType =
getColumnCompressionType(segmentCreationSpec, fieldSpec);
-
- // Initialize forward index creator
- boolean deriveNumDocsPerChunk =
- shouldDeriveNumDocsPerChunk(columnName,
segmentCreationSpec.getColumnProperties());
- int writerVersion = rawIndexWriterVersion(columnName,
segmentCreationSpec.getColumnProperties());
- if (fieldSpec.isSingleValueField()) {
- _forwardIndexCreatorMap.put(columnName,
- getRawIndexCreatorForSVColumn(_indexDir, compressionType,
columnName, storedType, _totalDocs,
- indexCreationInfo.getLengthOfLongestEntry(),
deriveNumDocsPerChunk, writerVersion));
- } else {
- _forwardIndexCreatorMap.put(columnName,
- getRawIndexCreatorForMVColumn(_indexDir, compressionType,
columnName, storedType, _totalDocs,
- indexCreationInfo.getMaxNumberOfMultiValueElements(),
deriveNumDocsPerChunk, writerVersion,
- indexCreationInfo.getMaxRowLengthInBytes()));
- }
}
if (textIndexColumns.contains(columnName)) {
// Initialize text index creator
- Preconditions.checkState(storedType == DataType.STRING,
+ Preconditions.checkState(fieldSpec.getDataType().getStoredType() ==
FieldSpec.DataType.STRING,
"Text index is currently only supported on STRING type columns");
_textIndexCreatorMap.put(columnName,
new LuceneTextIndexCreator(columnName, _indexDir, true /*
commitOnClose */));
Review comment:
I don't follow this. When I reviewed it last time,
TextIndexCreatorProvider supported both lucene text and fst -
https://github.com/apache/pinot/pull/7885#discussion_r768304406
Why do we have to exclude it ? The same distinction that we have today
between lucene text index, lucene fst and native fst should be carried into
this interface. When native fst was designed, we had agreed that the other 2
are not going to go away from the code (although users might be provided a
migration path if need be). So, since they will continue to exist,
TextIndexCreatorProvider should support all flavors imo.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]