yashmayya commented on code in PR #13037:
URL: https://github.com/apache/pinot/pull/13037#discussion_r1593366648


##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java:
##########
@@ -757,57 +753,157 @@ private void createDerivedColumnV1Indices(String column, 
FunctionEvaluator funct
           } else {
             useVarLengthDictionary = 
_indexLoadingConfig.getVarLengthDictionaryColumns().contains(column);
           }
-          indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, 
true, useVarLengthDictionary, true,
-              new ByteArray((byte[]) fieldSpec.getDefaultNullValue()));
+          indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, 
createDictionary, useVarLengthDictionary,
+              true, new ByteArray((byte[]) fieldSpec.getDefaultNullValue()));
           break;
         }
         default:
           throw new IllegalStateException();
       }
 
-      // Create dictionary
-      try (SegmentDictionaryCreator dictionaryCreator = new 
SegmentDictionaryCreator(fieldSpec, _indexDir,
-          indexCreationInfo.isUseVarLengthDictionary())) {
-        
dictionaryCreator.build(indexCreationInfo.getSortedUniqueElementsArray());
+      if (createDictionary) {
+        createDerivedColumnForwardIndexWithDictionary(column, fieldSpec, 
outputValues, indexCreationInfo);
+      } else {
+        createDerivedColumnForwardIndexWithoutDictionary(column, fieldSpec, 
outputValues, indexCreationInfo);
+      }
+    } finally {
+      for (ValueReader valueReader : valueReaders) {
+        valueReader.close();
+      }
+    }
+  }
+
+  /**
+   * Helper method to create the dictionary and forward indices for a column 
with derived values.
+   */
+  private void createDerivedColumnForwardIndexWithDictionary(String column, 
FieldSpec fieldSpec, Object[] outputValues,
+      ColumnIndexCreationInfo indexCreationInfo) throws Exception {
+
+    // Create dictionary
+    try (SegmentDictionaryCreator dictionaryCreator = new 
SegmentDictionaryCreator(fieldSpec, _indexDir,
+        indexCreationInfo.isUseVarLengthDictionary())) {
+      
dictionaryCreator.build(indexCreationInfo.getSortedUniqueElementsArray());
+
+      int numDocs = outputValues.length;
 
-        // Create forward index
-        int cardinality = indexCreationInfo.getDistinctValueCount();
+      // Create forward index
+      boolean isSingleValue = fieldSpec.isSingleValueField();
+
+      try (ForwardIndexCreator forwardIndexCreator
+          = getForwardIndexCreator(fieldSpec, indexCreationInfo, numDocs, 
column, true)) {
         if (isSingleValue) {
-          try (ForwardIndexCreator forwardIndexCreator = 
indexCreationInfo.isSorted()
-              ? new SingleValueSortedForwardIndexCreator(_indexDir, column, 
cardinality)
-              : new SingleValueUnsortedForwardIndexCreator(_indexDir, column, 
cardinality, numDocs)) {
-            for (int i = 0; i < numDocs; i++) {
-              
forwardIndexCreator.putDictId(dictionaryCreator.indexOfSV(outputValues[i]));
-            }
+          for (Object outputValue : outputValues) {
+            
forwardIndexCreator.putDictId(dictionaryCreator.indexOfSV(outputValue));
           }
         } else {
-          DictIdCompressionType dictIdCompressionType = null;
-          FieldIndexConfigs fieldIndexConfig = 
_indexLoadingConfig.getFieldIndexConfig(column);
-          if (fieldIndexConfig != null) {
-            ForwardIndexConfig forwardIndexConfig = 
fieldIndexConfig.getConfig(new ForwardIndexPlugin().getIndexType());
-            if (forwardIndexConfig != null) {
-              dictIdCompressionType = 
forwardIndexConfig.getDictIdCompressionType();
-            }
-          }
-          try (ForwardIndexCreator forwardIndexCreator = dictIdCompressionType 
== DictIdCompressionType.MV_ENTRY_DICT
-              ? new MultiValueEntryDictForwardIndexCreator(_indexDir, column, 
cardinality, numDocs)
-              : new MultiValueUnsortedForwardIndexCreator(_indexDir, column, 
cardinality, numDocs,
-                  indexCreationInfo.getTotalNumberOfEntries())) {
-            for (int i = 0; i < numDocs; i++) {
-              
forwardIndexCreator.putDictIdMV(dictionaryCreator.indexOfMV(outputValues[i]));
-            }
+          for (Object outputValue : outputValues) {
+            
forwardIndexCreator.putDictIdMV(dictionaryCreator.indexOfMV(outputValue));
           }
         }
-
         // Add the column metadata
         SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, 
column, indexCreationInfo, numDocs,
             fieldSpec, true, dictionaryCreator.getNumBytesPerEntry());
       }
-    } finally {
-      for (ValueReader valueReader : valueReaders) {
-        valueReader.close();
+    }
+  }
+
+  /**
+   * Helper method to create a forward index for a raw encoded column with 
derived values.
+   */
+  private void createDerivedColumnForwardIndexWithoutDictionary(String column, 
FieldSpec fieldSpec,
+      Object[] outputValues, ColumnIndexCreationInfo indexCreationInfo)
+      throws Exception {
+
+    // Create forward index
+    int numDocs = outputValues.length;
+    boolean isSingleValue = fieldSpec.isSingleValueField();
+
+    try (ForwardIndexCreator forwardIndexCreator
+        = getForwardIndexCreator(fieldSpec, indexCreationInfo, numDocs, 
column, false)) {
+      if (isSingleValue) {
+        for (Object outputValue : outputValues) {
+          switch (fieldSpec.getDataType().getStoredType()) {
+            // Casts are safe here because we've already done the conversion 
in createDerivedColumnV1Indices
+            case INT:
+              forwardIndexCreator.putInt((int) outputValue);
+              break;
+            case LONG:
+              forwardIndexCreator.putLong((long) outputValue);
+              break;
+            case FLOAT:
+              forwardIndexCreator.putFloat((float) outputValue);
+              break;
+            case DOUBLE:
+              forwardIndexCreator.putDouble((double) outputValue);
+              break;
+            case BIG_DECIMAL:
+              forwardIndexCreator.putBigDecimal((BigDecimal) outputValue);
+              break;
+            case STRING:
+              forwardIndexCreator.putString((String) outputValue);
+              break;
+            case BYTES:
+              forwardIndexCreator.putBytes((byte[]) outputValue);
+              break;
+            default:
+              throw new IllegalStateException();
+          }
+        }
+      } else {
+        for (Object outputValue : outputValues) {
+          switch (fieldSpec.getDataType().getStoredType()) {
+            // Casts are safe here because we've already done the conversion 
in createDerivedColumnV1Indices
+            case INT:
+              forwardIndexCreator.putIntMV(ArrayUtils.toPrimitive((Integer[]) 
outputValue));

Review Comment:
   We're using the primitive wrapper classes 
[here](https://github.com/apache/pinot/blob/363a03eea82a66aef4af724625dc714676018aa5/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java#L612)
 for instance because in the dictionary MV case, we're casting to `Object[]` 
which isn't possible with primitive arrays. So we could add separate branches 
in each case 
[here](https://github.com/apache/pinot/blob/363a03eea82a66aef4af724625dc714676018aa5/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java#L603)
 for multi-value fields to either create a primitive array (for raw encoded 
column) or an array of primitive wrapper classes (for dictionary encoded 
column). But then, I think we'd also need to update all the column statistics 
collector classes, which currently only handle two cases - the entry is either 
something that can be casted to an `Obj
 ect[]` (i.e., an `Integer[]` for `IntColumnPreIndexStatsCollector`), or else 
its treated as a single-value integer. Primitive arrays (`int[]` for instance) 
don't seem to be currently handled appropriately (since they can't be cast to 
`Object[]`) - see 
[here](https://github.com/apache/pinot/blob/363a03eea82a66aef4af724625dc714676018aa5/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java#L38-L61)
 for example.
   
   I can certainly make that change here in this PR though if you think it 
makes sense. Alternatively, I'd be happy to file a follow-up PR to make this 
optimization and related changes too. Thoughts?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to