Jackie-Jiang commented on a change in pull request #7638:
URL: https://github.com/apache/pinot/pull/7638#discussion_r736813077
##########
File path:
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/TextIndexHandler.java
##########
@@ -183,4 +170,45 @@ private void createTextIndexForColumn(ColumnMetadata
columnMetadata)
properties.setProperty(getKeyFor(column, TEXT_INDEX_TYPE),
TextIndexType.LUCENE.name());
properties.save();
}
+
+ private void processSVField(boolean hasDictionary, ForwardIndexReader
forwardIndexReader,
+ ForwardIndexReaderContext readerContext, TextIndexCreator
textIndexCreator,
+ int numDocs, ColumnMetadata columnMetadata)
+ throws IOException {
+ if (!hasDictionary) {
+ // text index on raw column, just read the raw forward index
+ VarByteChunkSVForwardIndexReader rawIndexReader =
(VarByteChunkSVForwardIndexReader) forwardIndexReader;
+ BaseChunkSVForwardIndexReader.ChunkReaderContext chunkReaderContext =
+ (BaseChunkSVForwardIndexReader.ChunkReaderContext) readerContext;
+ for (int docId = 0; docId < numDocs; docId++) {
+ textIndexCreator.add(rawIndexReader.getString(docId,
chunkReaderContext));
+ }
Review comment:
(minor) no need to perform the case
```suggestion
for (int docId = 0; docId < numDocs; docId++) {
textIndexCreator.add(forwardIndexReader.getString(docId,
readerContext));
}
```
##########
File path:
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/TextIndexHandler.java
##########
@@ -183,4 +170,45 @@ private void createTextIndexForColumn(ColumnMetadata
columnMetadata)
properties.setProperty(getKeyFor(column, TEXT_INDEX_TYPE),
TextIndexType.LUCENE.name());
properties.save();
}
+
+ private void processSVField(boolean hasDictionary, ForwardIndexReader
forwardIndexReader,
+ ForwardIndexReaderContext readerContext, TextIndexCreator
textIndexCreator,
+ int numDocs, ColumnMetadata columnMetadata)
+ throws IOException {
+ if (!hasDictionary) {
+ // text index on raw column, just read the raw forward index
+ VarByteChunkSVForwardIndexReader rawIndexReader =
(VarByteChunkSVForwardIndexReader) forwardIndexReader;
+ BaseChunkSVForwardIndexReader.ChunkReaderContext chunkReaderContext =
+ (BaseChunkSVForwardIndexReader.ChunkReaderContext) readerContext;
+ for (int docId = 0; docId < numDocs; docId++) {
+ textIndexCreator.add(rawIndexReader.getString(docId,
chunkReaderContext));
+ }
+ } else {
+ // text index on dictionary encoded SV column
+ // read forward index to get dictId
+ // read the raw value from dictionary using dictId
+ try (Dictionary dictionary = LoaderUtils.getDictionary(_segmentWriter,
columnMetadata)) {
+ for (int docId = 0; docId < numDocs; docId++) {
+ int dictId = forwardIndexReader.getDictId(docId, readerContext);
+ textIndexCreator.add(dictionary.getStringValue(dictId));
+ }
+ }
+ }
+ }
+
+ private void processMVField(boolean hasDictionary, ForwardIndexReader
forwardIndexReader,
+ TextIndexCreator textIndexCreator, int numDocs, ColumnMetadata
columnMetadata) {
+ if (!hasDictionary) {
+ // text index on raw column, just read the raw forward index
+ VarByteChunkMVForwardIndexReader rawIndexReader =
(VarByteChunkMVForwardIndexReader) forwardIndexReader;
+ for (int docId = 0; docId < numDocs; docId++) {
+ final BaseChunkSVForwardIndexReader.ChunkReaderContext context =
rawIndexReader.createContext();
+ String[] values = new String[columnMetadata.getTotalDocs()];
+ rawIndexReader.getStringMV(docId, values, context);
+ textIndexCreator.add(values);
+ }
+ } else {
+ throw new UnsupportedOperationException("Multi value field on dictionary
encoded column not supported");
Review comment:
This is supported. You may read dictionary ids from the forward index
reader, then create the values using dictionary
##########
File path:
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/TextIndexHandler.java
##########
@@ -183,4 +170,45 @@ private void createTextIndexForColumn(ColumnMetadata
columnMetadata)
properties.setProperty(getKeyFor(column, TEXT_INDEX_TYPE),
TextIndexType.LUCENE.name());
properties.save();
}
+
+ private void processSVField(boolean hasDictionary, ForwardIndexReader
forwardIndexReader,
+ ForwardIndexReaderContext readerContext, TextIndexCreator
textIndexCreator,
+ int numDocs, ColumnMetadata columnMetadata)
+ throws IOException {
+ if (!hasDictionary) {
+ // text index on raw column, just read the raw forward index
+ VarByteChunkSVForwardIndexReader rawIndexReader =
(VarByteChunkSVForwardIndexReader) forwardIndexReader;
+ BaseChunkSVForwardIndexReader.ChunkReaderContext chunkReaderContext =
+ (BaseChunkSVForwardIndexReader.ChunkReaderContext) readerContext;
+ for (int docId = 0; docId < numDocs; docId++) {
+ textIndexCreator.add(rawIndexReader.getString(docId,
chunkReaderContext));
+ }
+ } else {
+ // text index on dictionary encoded SV column
+ // read forward index to get dictId
+ // read the raw value from dictionary using dictId
+ try (Dictionary dictionary = LoaderUtils.getDictionary(_segmentWriter,
columnMetadata)) {
+ for (int docId = 0; docId < numDocs; docId++) {
+ int dictId = forwardIndexReader.getDictId(docId, readerContext);
+ textIndexCreator.add(dictionary.getStringValue(dictId));
+ }
+ }
+ }
+ }
+
+ private void processMVField(boolean hasDictionary, ForwardIndexReader
forwardIndexReader,
+ TextIndexCreator textIndexCreator, int numDocs, ColumnMetadata
columnMetadata) {
+ if (!hasDictionary) {
+ // text index on raw column, just read the raw forward index
+ VarByteChunkMVForwardIndexReader rawIndexReader =
(VarByteChunkMVForwardIndexReader) forwardIndexReader;
+ for (int docId = 0; docId < numDocs; docId++) {
+ final BaseChunkSVForwardIndexReader.ChunkReaderContext context =
rawIndexReader.createContext();
+ String[] values = new String[columnMetadata.getTotalDocs()];
+ rawIndexReader.getStringMV(docId, values, context);
+ textIndexCreator.add(values);
+ }
Review comment:
Pass in the reader context, reuse the value buffer
```suggestion
String[] valueBuffer = new
String[columnMetadata.getMaxNumberOfMultiValues()];
for (int docId = 0; docId < numDocs; docId++) {
int length = forwardIndexReader.getStringMV(docId, valueBuffer,
readerContext);
textIndexCreator.add(values, length);
}
```
##########
File path:
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/TextIndexCreator.java
##########
@@ -32,6 +32,11 @@
*/
void add(String document);
+ /**
+ * Adds a set of
Review comment:
(minor) Incomplete java doc
##########
File path:
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/LoaderUtils.java
##########
@@ -70,9 +71,14 @@ private LoaderUtils() {
columnMetadata.getTotalNumberOfEntries(),
columnMetadata.getBitsPerElement());
}
} else {
- DataType dataType = columnMetadata.getDataType();
- return dataType.isFixedWidth() ? new
FixedByteChunkSVForwardIndexReader(dataBuffer, dataType)
- : new VarByteChunkSVForwardIndexReader(dataBuffer, dataType);
+ if (columnMetadata.isSingleValue()) {
+ DataType dataType = columnMetadata.getDataType();
+ return dataType.isFixedWidth() ? new
FixedByteChunkSVForwardIndexReader(dataBuffer, dataType)
+ : new VarByteChunkSVForwardIndexReader(dataBuffer, dataType);
+ } else {
+ //TODO: Implement MV FixedByte Forward Index reader
Review comment:
Check if column is fixed width, and use
`FixedByteChunkMVForwardIndexReader` if it is fixed width
##########
File path:
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/TextIndexCreator.java
##########
@@ -32,6 +32,11 @@
*/
void add(String document);
+ /**
+ * Adds a set of
+ */
+ void add(String[] document);
Review comment:
Also include the length so that the value buffer can be reused
##########
File path:
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java
##########
@@ -65,6 +65,11 @@ public void add(String document) {
_dictId++;
}
+ @Override
+ public void add(String[] document) {
Review comment:
Also include the length so that the value buffer can be reused
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]