[GitHub] [pinot] vvivekiyer commented on a diff in pull request #9454: ForwardIndexHandler: Change compressionType during segmentReload

GitBox Mon, 26 Sep 2022 18:26:06 -0700


vvivekiyer commented on code in PR #9454:
URL: https://github.com/apache/pinot/pull/9454#discussion_r980642492



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java:
##########
@@ -104,8 +104,8 @@ public void process()
       // Update single-column indices, like inverted index, json index etc.
       IndexCreatorProvider indexCreatorProvider = 
IndexingOverrides.getIndexCreatorProvider();
       for (ColumnIndexType type : ColumnIndexType.values()) {
-        IndexHandlerFactory.getIndexHandler(type, _segmentMetadata, 
_indexLoadingConfig)
-            .updateIndices(segmentWriter, indexCreatorProvider);
+        IndexHandler handler = IndexHandlerFactory.getIndexHandler(type, 
_segmentMetadata, _indexLoadingConfig);
+        handler.updateIndices(segmentWriter, indexCreatorProvider);

Review Comment:
   Good point. It is required that we process forwardIndexHandler changes 
first. Let me add a comment in ColumnIndexType to clarify this.
   
   One thing to note is that, range index need not be rewritten for compression 
code change. Range index only needs to be rewritten if dictEnabled changes.



##########
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandlerTest.java:
##########
@@ -0,0 +1,374 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.loader;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import org.apache.commons.io.FileUtils;
+import 
org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
+import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
+import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
+import org.apache.pinot.segment.local.segment.store.SegmentLocalFSDirectory;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.creator.IndexCreatorProvider;
+import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
+import org.apache.pinot.segment.spi.index.IndexingOverrides;
+import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.data.readers.RecordReader;
+import org.apache.pinot.spi.utils.ReadMode;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+public class ForwardIndexHandlerTest {
+  private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), 
"ForwardIndexHandlerTest");
+  private static final String TABLE_NAME = "myTable";
+  private static final String SEGMENT_NAME = "testSegment";
+
+  // TODOs:
+  // 1. Add other datatypes (double, float, bigdecimal, bytes). Also add MV 
columns.
+  // 2. Add text index and other index types for raw columns.
+  private static final String SNAPPY_STRING = "SNAPPY_STRING";
+  private static final String PASS_THROUGH_STRING = "PASS_THROUGH_STRING";
+  private static final String ZSTANDARD_STRING = "ZSTANDARD_STRING";
+  private static final String LZ4_STRING = "LZ4_STRING";
+
+  private static final String SNAPPY_LONG = "SNAPPY_LONG";
+  private static final String PASS_THROUGH_LONG = "PASS_THROUGH_LONG";
+  private static final String ZSTANDARD_LONG = "ZSTANDARD_LONG";
+  private static final String LZ4_LONG = "LZ4_LONG";
+
+  private static final String SNAPPY_INTEGER = "SNAPPY_INTEGER";
+  private static final String PASS_THROUGH_INTEGER = "PASS_THROUGH_INTEGER";
+  private static final String ZSTANDARD_INTEGER = "ZSTANDARD_INTEGER";
+  private static final String LZ4_INTEGER = "LZ4_INTEGER";
+
+  private static final String DICT_INTEGER = "DICT_INTEGER";
+  private static final String DICT_STRING = "DICT_STRING";
+  private static final String DICT_LONG = "DICT_LONG";
+
+  private static final List<String> RAW_SNAPPY_INDEX_COLUMNS =
+      Arrays.asList(SNAPPY_STRING, SNAPPY_LONG, SNAPPY_INTEGER);
+
+  private static final List<String> RAW_ZSTANDARD_INDEX_COLUMNS =
+      Arrays.asList(ZSTANDARD_STRING, ZSTANDARD_LONG, ZSTANDARD_INTEGER);
+
+  private static final List<String> RAW_PASS_THROUGH_INDEX_COLUMNS =
+      Arrays.asList(PASS_THROUGH_STRING, PASS_THROUGH_LONG, 
PASS_THROUGH_INTEGER);
+
+  private static final List<String> RAW_LZ4_INDEX_COLUMNS = 
Arrays.asList(LZ4_STRING, LZ4_LONG, LZ4_INTEGER);
+
+  private List<String> _noDictionaryColumns = new ArrayList<>();
+  TableConfig _tableConfig;
+  Schema _schema;
+  private SegmentMetadataImpl _existingSegmentMetadata;
+  private SegmentDirectory.Writer _writer;
+  private List<GenericRow> _rows;
+
+  @BeforeClass
+  public void setUp()
+      throws Exception {
+    // Delete index directly if it already exists.
+    FileUtils.deleteQuietly(INDEX_DIR);
+
+    buildSegment();
+  }
+
+  private void buildSegment()
+      throws Exception {
+    _rows = createTestData();
+
+    List<FieldConfig> fieldConfigs = new ArrayList<>(
+        RAW_SNAPPY_INDEX_COLUMNS.size() + RAW_ZSTANDARD_INDEX_COLUMNS.size() + 
RAW_PASS_THROUGH_INDEX_COLUMNS.size()
+            + RAW_LZ4_INDEX_COLUMNS.size());
+
+    for (String indexColumn : RAW_SNAPPY_INDEX_COLUMNS) {
+      fieldConfigs.add(new FieldConfig(indexColumn, 
FieldConfig.EncodingType.RAW, Collections.emptyList(),
+          FieldConfig.CompressionCodec.SNAPPY, null));
+    }
+
+    for (String indexColumn : RAW_ZSTANDARD_INDEX_COLUMNS) {
+      fieldConfigs.add(new FieldConfig(indexColumn, 
FieldConfig.EncodingType.RAW, Collections.emptyList(),
+          FieldConfig.CompressionCodec.ZSTANDARD, null));
+    }
+
+    for (String indexColumn : RAW_PASS_THROUGH_INDEX_COLUMNS) {
+      fieldConfigs.add(new FieldConfig(indexColumn, 
FieldConfig.EncodingType.RAW, Collections.emptyList(),
+          FieldConfig.CompressionCodec.PASS_THROUGH, null));
+    }
+
+    for (String indexColumn : RAW_LZ4_INDEX_COLUMNS) {
+      fieldConfigs.add(new FieldConfig(indexColumn, 
FieldConfig.EncodingType.RAW, Collections.emptyList(),
+          FieldConfig.CompressionCodec.LZ4, null));
+    }
+
+    _noDictionaryColumns.addAll(RAW_SNAPPY_INDEX_COLUMNS);
+    _noDictionaryColumns.addAll(RAW_ZSTANDARD_INDEX_COLUMNS);
+    _noDictionaryColumns.addAll(RAW_PASS_THROUGH_INDEX_COLUMNS);
+    _noDictionaryColumns.addAll(RAW_LZ4_INDEX_COLUMNS);
+
+    _tableConfig =
+        new 
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setNoDictionaryColumns(_noDictionaryColumns)
+            .setFieldConfigList(fieldConfigs).build();
+    _schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+        .addSingleValueDimension(SNAPPY_STRING, FieldSpec.DataType.STRING)
+        .addSingleValueDimension(PASS_THROUGH_STRING, 
FieldSpec.DataType.STRING)
+        .addSingleValueDimension(ZSTANDARD_STRING, FieldSpec.DataType.STRING)
+        .addSingleValueDimension(LZ4_STRING, FieldSpec.DataType.STRING)
+        .addSingleValueDimension(SNAPPY_INTEGER, FieldSpec.DataType.INT)
+        .addSingleValueDimension(ZSTANDARD_INTEGER, FieldSpec.DataType.INT)
+        .addSingleValueDimension(PASS_THROUGH_INTEGER, FieldSpec.DataType.INT)
+        .addSingleValueDimension(LZ4_INTEGER, FieldSpec.DataType.INT)
+        .addSingleValueDimension(SNAPPY_LONG, FieldSpec.DataType.LONG)
+        .addSingleValueDimension(ZSTANDARD_LONG, FieldSpec.DataType.LONG)
+        .addSingleValueDimension(PASS_THROUGH_LONG, FieldSpec.DataType.LONG)
+        .addSingleValueDimension(LZ4_LONG, FieldSpec.DataType.LONG)
+        .addSingleValueDimension(DICT_INTEGER, FieldSpec.DataType.INT)
+        .addSingleValueDimension(DICT_LONG, FieldSpec.DataType.LONG)
+        .addSingleValueDimension(DICT_STRING, 
FieldSpec.DataType.STRING).build();
+
+    SegmentGeneratorConfig config = new SegmentGeneratorConfig(_tableConfig, 
_schema);
+    config.setOutDir(INDEX_DIR.getPath());
+    config.setTableName(TABLE_NAME);
+    config.setSegmentName(SEGMENT_NAME);
+    SegmentIndexCreationDriverImpl driver = new 
SegmentIndexCreationDriverImpl();
+    try (RecordReader recordReader = new GenericRowRecordReader(_rows)) {
+      driver.init(config, recordReader);
+      driver.build();
+    }
+
+    File segmentDirectory = new File(INDEX_DIR, driver.getSegmentName());
+    _existingSegmentMetadata = new SegmentMetadataImpl(segmentDirectory);
+    _writer = new SegmentLocalFSDirectory(segmentDirectory, 
_existingSegmentMetadata, ReadMode.mmap).createWriter();
+  }
+
+  private List<GenericRow> createTestData() {
+    List<GenericRow> rows = new ArrayList<>();
+
+    //Generate random data
+    int rowLength = 1000;
+    Random random = new Random();
+    String[] tempStringRows = new String[rowLength];
+    Integer[] tempIntRows = new Integer[rowLength];
+    Long[] tempLongRows = new Long[rowLength];
+
+    for (int i = 0; i < rowLength; i++) {
+      //Adding a fixed value to check for filter queries
+      if (i % 10 == 0) {
+        tempStringRows[i] = "testRow";
+        tempIntRows[i] = 1001;
+        tempLongRows[i] = 1001L;
+      } else {
+        tempStringRows[i] = "n" + i;
+        tempIntRows[i] = i;
+        tempLongRows[i] = (long) i;
+      }
+    }
+
+    for (int i = 0; i < rowLength; i++) {
+      GenericRow row = new GenericRow();
+
+      // Raw String columns
+      row.putValue(SNAPPY_STRING, tempStringRows[i]);
+      row.putValue(ZSTANDARD_STRING, tempStringRows[i]);
+      row.putValue(PASS_THROUGH_STRING, tempStringRows[i]);
+      row.putValue(LZ4_STRING, tempStringRows[i]);
+
+      // Raw integer columns
+      row.putValue(SNAPPY_INTEGER, tempIntRows[i]);
+      row.putValue(ZSTANDARD_INTEGER, tempIntRows[i]);
+      row.putValue(PASS_THROUGH_INTEGER, tempIntRows[i]);
+      row.putValue(LZ4_INTEGER, tempIntRows[i]);
+
+      // Raw long columns
+      row.putValue(SNAPPY_LONG, tempLongRows[i]);
+      row.putValue(ZSTANDARD_LONG, tempLongRows[i]);
+      row.putValue(PASS_THROUGH_LONG, tempLongRows[i]);
+      row.putValue(LZ4_LONG, tempLongRows[i]);
+
+      // Dictionary columns
+      row.putValue(DICT_INTEGER, tempIntRows[i]);
+      row.putValue(DICT_LONG, tempLongRows[i]);
+      row.putValue(DICT_STRING, tempStringRows[i]);
+
+      rows.add(row);
+    }
+    return rows;
+  }
+
+  @Test
+  public void testComputeOperation() {
+    // TEST1 : Validate with zero changes.
+    IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, 
_tableConfig);
+    ForwardIndexHandler fwdIndexHandler =
+        new ForwardIndexHandler(_existingSegmentMetadata, indexLoadingConfig);
+    Map<String, ForwardIndexHandler.Operation> operationMap = new HashMap<>();
+    operationMap = fwdIndexHandler.computeOperation(_writer);
+    assertEquals(operationMap, Collections.EMPTY_MAP);
+
+    // TEST2: Enable dictionary for a RAW_ZSTANDARD_INDEX_COLUMN
+    indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig);
+    indexLoadingConfig.getNoDictionaryColumns().remove(ZSTANDARD_STRING);
+    fwdIndexHandler = new ForwardIndexHandler(_existingSegmentMetadata, 
indexLoadingConfig);
+    operationMap = fwdIndexHandler.computeOperation(_writer);
+    assertEquals(operationMap, Collections.EMPTY_MAP);
+
+    // TEST3: Disable dictionary
+    indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig);
+    indexLoadingConfig.getNoDictionaryColumns().add(DICT_INTEGER);
+    fwdIndexHandler = new ForwardIndexHandler(_existingSegmentMetadata, 
indexLoadingConfig);
+    operationMap = fwdIndexHandler.computeOperation(_writer);
+    assertEquals(operationMap, Collections.EMPTY_MAP);
+
+    // TEST4: Add random index
+    indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig);
+    indexLoadingConfig.getTextIndexColumns().add(DICT_INTEGER);
+    indexLoadingConfig.getTextIndexColumns().add(LZ4_INTEGER);
+    fwdIndexHandler = new ForwardIndexHandler(_existingSegmentMetadata, 
indexLoadingConfig);
+    operationMap = fwdIndexHandler.computeOperation(_writer);
+    assertEquals(operationMap, Collections.EMPTY_MAP);
+
+    // TEST5: Change compression
+
+    // Create new tableConfig with the modified fieldConfigs.
+    List<FieldConfig> fieldConfigs = new 
ArrayList<>(_tableConfig.getFieldConfigList());
+    FieldConfig config = fieldConfigs.remove(0);
+    FieldConfig newConfig = new FieldConfig(config.getName(), 
FieldConfig.EncodingType.RAW, Collections.emptyList(),
+        FieldConfig.CompressionCodec.ZSTANDARD, null);
+    fieldConfigs.add(newConfig);
+    TableConfig tableConfig =
+        new 
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setNoDictionaryColumns(_noDictionaryColumns)
+            .setFieldConfigList(fieldConfigs).build();
+    tableConfig.setFieldConfigList(fieldConfigs);
+
+    indexLoadingConfig = new IndexLoadingConfig(null, tableConfig);
+    fwdIndexHandler = new ForwardIndexHandler(_existingSegmentMetadata, 
indexLoadingConfig);
+    operationMap = fwdIndexHandler.computeOperation(_writer);
+    assertEquals(operationMap.size(), 1);
+    assertEquals(operationMap.get(config.getName()), 
ForwardIndexHandler.Operation.CHANGE_RAW_COMPRESSION_TYPE);
+
+    // TEST6: Change compression and add index
+    fieldConfigs = new ArrayList<>(_tableConfig.getFieldConfigList());
+    config = fieldConfigs.remove(0);
+    newConfig = new FieldConfig(config.getName(), 
FieldConfig.EncodingType.RAW, Collections.emptyList(),
+        FieldConfig.CompressionCodec.ZSTANDARD, null);
+    fieldConfigs.add(newConfig);
+    tableConfig =
+        new 
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setNoDictionaryColumns(_noDictionaryColumns)
+            .setFieldConfigList(fieldConfigs).build();
+    tableConfig.setFieldConfigList(fieldConfigs);
+
+    indexLoadingConfig = new IndexLoadingConfig(null, tableConfig);
+    indexLoadingConfig.getTextIndexColumns().add(config.getName());
+    indexLoadingConfig.getInvertedIndexColumns().add(config.getName());
+    fwdIndexHandler = new ForwardIndexHandler(_existingSegmentMetadata, 
indexLoadingConfig);
+    operationMap = fwdIndexHandler.computeOperation(_writer);
+    assertEquals(operationMap.size(), 1);
+    assertEquals(operationMap.get(config.getName()), 
ForwardIndexHandler.Operation.CHANGE_RAW_COMPRESSION_TYPE);
+  }
+
+  @Test
+  public void testRewriteRawForwardIndex()

Review Comment:
   Added tests in segmentPreprocessTest



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java:
##########
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.loader;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.creator.IndexCreatorProvider;
+import org.apache.pinot.segment.spi.creator.SegmentVersion;
+import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Helper class used by {@link SegmentPreProcessor} to make changes to forward 
index and dictionary configs. Note
+ * that this handler only works if segment versions >= 3.0. The currently 
supported operations are:
+ * 1. Change compression on raw SV columns.
+ *
+ *  TODO: Add support for the following:
+ *  1. Change compression for raw MV columns
+ *  2. Enable dictionary
+ *  3. Disable dictionary
+ */
+public class ForwardIndexHandler implements IndexHandler {
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(ForwardIndexHandler.class);
+
+  private final SegmentMetadata _segmentMetadata;
+  IndexLoadingConfig _indexLoadingConfig;
+
+  protected enum Operation {
+    CHANGE_RAW_COMPRESSION_TYPE,
+
+    // TODO: Add other operations like ENABLE_DICTIONARY, DISABLE_DICTIONARY.
+  }
+
+  public ForwardIndexHandler(SegmentMetadata segmentMetadata, 
IndexLoadingConfig indexLoadingConfig) {
+    _segmentMetadata = segmentMetadata;
+    _indexLoadingConfig = indexLoadingConfig;
+  }
+
+  @Override
+  public boolean needUpdateIndices(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = 
computeOperation(segmentReader);
+    return !columnOperationMap.isEmpty();
+  }
+
+  @Override
+  public void updateIndices(SegmentDirectory.Writer segmentWriter, 
IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Map<String, Operation> columnOperationMap = 
computeOperation(segmentWriter);
+    if (columnOperationMap.isEmpty()) {
+      return;
+    }
+
+    for (Map.Entry<String, Operation> entry : columnOperationMap.entrySet()) {
+      String column = entry.getKey();
+      Operation operation = entry.getValue();
+
+      switch (operation) {
+        case CHANGE_RAW_COMPRESSION_TYPE:
+          rewriteRawForwardIndex(column, segmentWriter, indexCreatorProvider);
+          break;
+        // TODO: Add other operations here.
+        default:
+          break;
+      }
+    }
+  }
+
+  @VisibleForTesting
+  Map<String, Operation> computeOperation(SegmentDirectory.Reader 
segmentReader) {
+    Map<String, Operation> columnOperationMap = new HashMap<>();
+
+    // Works only if the existing segment is V3.
+    if (_segmentMetadata.getVersion() != SegmentVersion.v3) {
+      return columnOperationMap;
+    }
+
+    // From existing column config.
+    Set<String> existingAllColumns = _segmentMetadata.getAllColumns();
+    Set<String> existingDictColumns =
+        
segmentReader.toSegmentDirectory().getColumnsWithIndex(ColumnIndexType.DICTIONARY);
+    Set<String> existingNoDictColumns = new HashSet<>();
+    for (String column : existingAllColumns) {
+      if (!existingDictColumns.contains(column)) {
+        existingNoDictColumns.add(column);
+      }
+    }
+
+    // From new column config.
+    Set<String> newNoDictColumns = 
_indexLoadingConfig.getNoDictionaryColumns();
+
+    for (String column : existingAllColumns) {
+      if (existingNoDictColumns.contains(column) && 
newNoDictColumns.contains(column)) {
+        // Both existing and new column is RAW forward index encoded. Check if 
compression needs to be changed.
+        if (shouldChangeCompressionType(column, segmentReader)) {
+          columnOperationMap.put(column, 
Operation.CHANGE_RAW_COMPRESSION_TYPE);
+        }
+      } else if (existingNoDictColumns.contains(column) && 
!newNoDictColumns.contains(column)) {
+        // TODO: Enable dictionary
+      } else if (!existingNoDictColumns.contains(column) && 
newNoDictColumns.contains(column)) {
+        // TODO: Disable dictionary.
+      } else {
+        // No changes necessary.
+      }
+    }
+
+    return columnOperationMap;
+  }
+
+  private boolean shouldChangeCompressionType(String column, 
SegmentDirectory.Reader segmentReader) {
+    ColumnMetadata existingColMetadata = 
_segmentMetadata.getColumnMetadataFor(column);
+
+    // TODO: Remove this MV column limitation.
+    if (!existingColMetadata.isSingleValue()) {
+      return false;
+    }
+
+    // The compression type for an existing segment can only be determined by 
reading the forward index header.
+    try {
+      ForwardIndexReader fwdIndexReader = 
LoaderUtils.getForwardIndexReader(segmentReader, existingColMetadata);
+      ChunkCompressionType existingCompressionType = 
fwdIndexReader.getCompressionType();
+      Preconditions.checkState(existingCompressionType != null,
+          "Existing compressionType cannot be null for raw forward index 
column=" + column);
+
+      // Get the new compression type.
+      ChunkCompressionType newCompressionType = null;
+      Map<String, ChunkCompressionType> newCompressionConfigs = 
_indexLoadingConfig.getCompressionConfigs();
+      if (newCompressionConfigs.containsKey(column)) {
+        newCompressionType = newCompressionConfigs.get(column);
+      }
+
+      // Note that default compression type (PASS_THROUGH for metric and LZ4 
for dimension) is not considered if the
+      // compressionType is not explicitly provided in tableConfig. This is to 
avoid incorrectly rewriting the all
+      // forward indexes during segmentReload when the default compressionType 
changes.
+      if (newCompressionType == null || existingCompressionType == 
newCompressionType) {
+        return false;
+      }
+
+      return true;
+    } catch (Exception e) {
+      LOGGER.error("Caught exception while changing compression for column: 
{}", column, e);
+      return false;
+    }
+  }
+
+  private void rewriteRawForwardIndex(String column, SegmentDirectory.Writer 
segmentWriter,
+      IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Preconditions.checkState(_segmentMetadata.getVersion() == 
SegmentVersion.v3);
+
+    ColumnMetadata existingColMetadata = 
_segmentMetadata.getColumnMetadataFor(column);
+    File indexDir = _segmentMetadata.getIndexDir();
+    String segmentName = _segmentMetadata.getName();
+    File inProgress = new File(indexDir, column + ".fwd.inprogress");
+    File fwdIndexFile = new File(indexDir, column + 
V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION);
+
+    if (!inProgress.exists()) {
+      // Marker file does not exist, which means last run ended normally.
+      // Create a marker file.
+      FileUtils.touch(inProgress);
+    } else {
+      // Marker file exists, which means last run gets interrupted.
+      // Remove inverted index if exists.
+      // For v1 and v2, it's the actual inverted index. For v3, it's the 
temporary inverted index.
+      FileUtils.deleteQuietly(fwdIndexFile);
+    }
+
+    LOGGER.info("Creating new forward index for segment={} and column={}", 
segmentName, column);
+
+    Map<String, ChunkCompressionType> compressionConfigs = 
_indexLoadingConfig.getCompressionConfigs();
+    Preconditions.checkState(compressionConfigs.containsKey(column));
+    // At this point, compressionConfigs is guaranteed to contain the column.
+    ChunkCompressionType newCompressionType = compressionConfigs.get(column);
+
+    int numDocs = existingColMetadata.getTotalDocs();
+
+    try (ForwardIndexReader reader = 
LoaderUtils.getForwardIndexReader(segmentWriter, existingColMetadata)) {
+      int lengthOfLongestEntry = reader.getLengthOfLongestEntry();
+      Preconditions.checkState(lengthOfLongestEntry >= 0,
+          "lengthOfLongestEntry cannot be negative. segment=" + segmentName + 
" column={}" + column);
+
+      IndexCreationContext.Forward context =
+          
IndexCreationContext.builder().withIndexDir(indexDir).withColumnMetadata(existingColMetadata)
+              .withLengthOfLongestEntry(lengthOfLongestEntry).build()
+              .forForwardIndex(newCompressionType, 
_indexLoadingConfig.getColumnProperties());
+
+      try (ForwardIndexCreator creator = 
indexCreatorProvider.newForwardIndexCreator(context)) {
+        PinotSegmentColumnReader columnReader =
+            new PinotSegmentColumnReader(reader, null, null, 
existingColMetadata.getMaxNumberOfMultiValues());
+
+        for (int i = 0; i < numDocs; i++) {
+          Object val = columnReader.getValue(i);
+
+          // JSON fields are either stored as string or bytes. No special 
handling is needed.
+          switch (creator.getValueType()) {
+            case INT:
+              creator.putInt((int) val);
+              break;
+            case LONG:
+              creator.putLong((long) val);
+              break;
+            case FLOAT:
+              creator.putFloat((float) val);
+              break;
+            case DOUBLE:
+              creator.putDouble((double) val);
+              break;
+            case STRING:
+              creator.putString((String) val);
+              break;
+            case BYTES:
+              creator.putBytes((byte[]) val);
+              break;
+            case BIG_DECIMAL:
+              creator.putBigDecimal((BigDecimal) val);
+              break;
+            default:
+              throw new IllegalStateException();
+          }
+        }
+      }
+    }
+
+    // We used the existing forward index to generate a new forward index. 
Existing forward index is not needed
+    // anymore. Remove it.
+    // Note that the stale entries corresponding to old forward index in 
columns.psf file will be removed when
+    // segmentWriter.close() is called.
+    segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);

Review Comment:
   Clarified the comment. Please let me know if this makes sense.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [pinot] vvivekiyer commented on a diff in pull request #9454: ForwardIndexHandler: Change compressionType during segmentReload

Reply via email to