This is an automated email from the ASF dual-hosted git repository.

saurabhd336 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 873992f93d Post build index creation (#11711)
873992f93d is described below

commit 873992f93d350ded958e20856ae455da89f990be
Author: Saurabh Dubey <[email protected]>
AuthorDate: Wed Oct 4 09:55:12 2023 +0530

    Post build index creation (#11711)
    
    * Allow creating indexes that depend on completed segment
    
    ---------
    
    Co-authored-by: Saurabh Dubey <[email protected]>
    Co-authored-by: Saurabh Dubey 
<[email protected]>
---
 .../creator/impl/SegmentColumnarIndexCreator.java  | 12 +-----
 .../impl/SegmentIndexCreationDriverImpl.java       | 45 ++++++++++++++++++++++
 .../index/dictionary/DictionaryIndexType.java      |  4 ++
 .../index/nullvalue/NullValueIndexType.java        |  4 ++
 .../apache/pinot/segment/spi/index/IndexType.java  | 25 ++++++++++++
 5 files changed, 80 insertions(+), 10 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
index 5ec40796d6..a84c275299 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
@@ -94,7 +94,7 @@ public class SegmentColumnarIndexCreator implements 
SegmentCreator {
   /**
    * Contains, indexed by column name, the creator associated with each index 
type.
    *
-   * Indexes that {@link #hasSpecialLifecycle(IndexType) have a special 
lyfecycle} are not included here.
+   * Indexes whose build lifecycle is not DURING_SEGMENT_CREATION are not 
included here.
    */
   private Map<String, Map<IndexType<?, ?, ?>, IndexCreator>> 
_creatorsByColAndIndex = new HashMap<>();
   private final Map<String, NullValueVectorCreator> _nullValueVectorCreatorMap 
= new HashMap<>();
@@ -195,7 +195,7 @@ public class SegmentColumnarIndexCreator implements 
SegmentCreator {
       Map<IndexType<?, ?, ?>, IndexCreator> creatorsByIndex =
           
Maps.newHashMapWithExpectedSize(IndexService.getInstance().getAllIndexes().size());
       for (IndexType<?, ?, ?> index : 
IndexService.getInstance().getAllIndexes()) {
-        if (hasSpecialLifecycle(index)) {
+        if (index.getIndexBuildLifecycle() != 
IndexType.BuildLifecycle.DURING_SEGMENT_CREATION) {
           continue;
         }
         tryCreateIndexCreator(creatorsByIndex, index, context, config);
@@ -243,14 +243,6 @@ public class SegmentColumnarIndexCreator implements 
SegmentCreator {
     return builder.build();
   }
 
-  /**
-   * Returns true if the given index type has their own construction lifecycle 
and therefore should not be instantiated
-   * in the general index loop and shouldn't be notified of each new column.
-   */
-  private boolean hasSpecialLifecycle(IndexType<?, ?, ?> indexType) {
-    return indexType == StandardIndexes.nullValueVector() || indexType == 
StandardIndexes.dictionary();
-  }
-
   /**
    * Creates the {@link IndexCreator} in a type safe way.
    *
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java
index 15f1611c04..0293e644c4 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java
@@ -23,11 +23,14 @@ import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.UUID;
+import java.util.stream.Collectors;
 import javax.annotation.Nullable;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.io.FileUtils;
@@ -37,6 +40,7 @@ import 
org.apache.pinot.segment.local.segment.creator.RecordReaderSegmentCreatio
 import org.apache.pinot.segment.local.segment.creator.TransformPipeline;
 import 
org.apache.pinot.segment.local.segment.index.converter.SegmentFormatConverterFactory;
 import 
org.apache.pinot.segment.local.segment.index.dictionary.DictionaryIndexType;
+import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
 import org.apache.pinot.segment.local.segment.readers.PinotSegmentRecordReader;
 import org.apache.pinot.segment.local.startree.v2.builder.MultipleTreesBuilder;
 import org.apache.pinot.segment.local.utils.CrcUtils;
@@ -52,7 +56,13 @@ import 
org.apache.pinot.segment.spi.creator.SegmentIndexCreationDriver;
 import org.apache.pinot.segment.spi.creator.SegmentPreIndexStatsContainer;
 import org.apache.pinot.segment.spi.creator.SegmentVersion;
 import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
+import org.apache.pinot.segment.spi.index.IndexHandler;
+import org.apache.pinot.segment.spi.index.IndexService;
+import org.apache.pinot.segment.spi.index.IndexType;
 import org.apache.pinot.segment.spi.index.creator.SegmentIndexCreationInfo;
+import org.apache.pinot.segment.spi.loader.SegmentDirectoryLoaderContext;
+import org.apache.pinot.segment.spi.loader.SegmentDirectoryLoaderRegistry;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
 import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths;
 import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
 import org.apache.pinot.spi.config.table.TableConfig;
@@ -65,7 +75,9 @@ import org.apache.pinot.spi.data.readers.FileFormat;
 import org.apache.pinot.spi.data.readers.GenericRow;
 import org.apache.pinot.spi.data.readers.RecordReader;
 import org.apache.pinot.spi.data.readers.RecordReaderFactory;
+import org.apache.pinot.spi.env.PinotConfiguration;
 import org.apache.pinot.spi.utils.ByteArray;
+import org.apache.pinot.spi.utils.ReadMode;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -312,6 +324,7 @@ public class SegmentIndexCreationDriverImpl implements 
SegmentIndexCreationDrive
     if (_totalDocs > 0) {
       buildStarTreeV2IfNecessary(segmentOutputDir);
     }
+    updatePostSegmentCreationIndexes(segmentOutputDir);
 
     // Compute CRC and creation time
     long crc = CrcUtils.forAllFilesInFolder(segmentOutputDir).computeCrc();
@@ -336,6 +349,38 @@ public class SegmentIndexCreationDriverImpl implements 
SegmentIndexCreationDrive
     LOGGER.info("Driver, indexing time : {}", _totalIndexTime);
   }
 
+  private void updatePostSegmentCreationIndexes(File indexDir) throws 
Exception {
+    Set<IndexType> postSegCreationIndexes = 
IndexService.getInstance().getAllIndexes().stream()
+        .filter(indexType -> indexType.getIndexBuildLifecycle() == 
IndexType.BuildLifecycle.POST_SEGMENT_CREATION)
+        .collect(Collectors.toSet());
+
+    if (postSegCreationIndexes.size() > 0) {
+      // Build other indexes
+      Map<String, Object> props = new HashMap<>();
+      props.put(IndexLoadingConfig.READ_MODE_KEY, ReadMode.mmap);
+      PinotConfiguration segmentDirectoryConfigs = new 
PinotConfiguration(props);
+
+      SegmentDirectoryLoaderContext segmentLoaderContext =
+          new 
SegmentDirectoryLoaderContext.Builder().setTableConfig(_config.getTableConfig())
+              .setSchema(_config.getSchema()).setSegmentName(_segmentName)
+              .setSegmentDirectoryConfigs(segmentDirectoryConfigs).build();
+
+      IndexLoadingConfig indexLoadingConfig =
+          new IndexLoadingConfig(null, _config.getTableConfig(), 
_config.getSchema());
+
+      try (SegmentDirectory segmentDirectory = 
SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader()
+          .load(indexDir.toURI(), segmentLoaderContext);
+          SegmentDirectory.Writer segmentWriter = 
segmentDirectory.createWriter()) {
+        for (IndexType indexType : postSegCreationIndexes) {
+          IndexHandler handler =
+              indexType.createIndexHandler(segmentDirectory, 
indexLoadingConfig.getFieldIndexConfigByColName(),
+                  _config.getSchema(), _config.getTableConfig());
+          handler.updateIndices(segmentWriter);
+        }
+      }
+    }
+  }
+
   private void buildStarTreeV2IfNecessary(File indexDir)
       throws Exception {
     List<StarTreeIndexConfig> starTreeIndexConfigs = 
_config.getStarTreeIndexConfigs();
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java
index bba174d732..102361a733 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/dictionary/DictionaryIndexType.java
@@ -449,4 +449,8 @@ public class DictionaryIndexType
     return MutableDictionaryFactory.getMutableDictionary(storedType, 
context.isOffHeap(), context.getMemoryManager(),
         dictionaryColumnSize, Math.min(estimatedCardinality, 
context.getCapacity()), dictionaryAllocationContext);
   }
+
+  public BuildLifecycle getIndexBuildLifecycle() {
+    return BuildLifecycle.CUSTOM;
+  }
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/nullvalue/NullValueIndexType.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/nullvalue/NullValueIndexType.java
index 9280b7ce92..316b72ef0b 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/nullvalue/NullValueIndexType.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/nullvalue/NullValueIndexType.java
@@ -130,4 +130,8 @@ public class NullValueIndexType extends 
AbstractIndexType<IndexConfig, NullValue
   @Override
   public void convertToNewFormat(TableConfig tableConfig, Schema schema) {
   }
+
+  public BuildLifecycle getIndexBuildLifecycle() {
+    return BuildLifecycle.CUSTOM;
+  }
 }
diff --git 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/IndexType.java
 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/IndexType.java
index 94ef6a96b7..10a6a416b2 100644
--- 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/IndexType.java
+++ 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/IndexType.java
@@ -38,6 +38,12 @@ import org.apache.pinot.spi.data.Schema;
  * @param <IC> the {@link IndexCreator} subclass that should be used to create 
indexes of this type.
  */
 public interface IndexType<C extends IndexConfig, IR extends IndexReader, IC 
extends IndexCreator> {
+  /**
+   * Returns the {@link BuildLifecycle} for this index type. This is used to 
determine when the index should be built.
+   */
+  default BuildLifecycle getIndexBuildLifecycle() {
+    return BuildLifecycle.DURING_SEGMENT_CREATION;
+  }
 
   /**
    * The unique id that identifies this index type.
@@ -127,4 +133,23 @@ public interface IndexType<C extends IndexConfig, IR 
extends IndexReader, IC ext
   default MutableIndex createMutableIndex(MutableIndexContext context, C 
config) {
     return null;
   }
+
+  enum BuildLifecycle {
+    /**
+     * The index will be built during segment creation, using the {@link 
IndexCreator#add} call for each of the column
+     * values being added.
+     */
+    DURING_SEGMENT_CREATION,
+
+    /**
+     * The index will be build post the segment file has been created, using 
the {@link IndexHandler#updateIndices} call
+     * This is useful for indexes that may need the entire prebuilt segment to 
be available before they can be built.
+     */
+    POST_SEGMENT_CREATION,
+
+    /**
+     * The index's built lifecycle is managed in a custom manner.
+     */
+    CUSTOM
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to