Re: [PR] Update Segment builder to use column major tables [pinot]

via GitHub Fri, 20 Oct 2023 06:55:24 -0700


ege-st commented on code in PR #11776:
URL: https://github.com/apache/pinot/pull/11776#discussion_r1367009788



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -328,12 +332,76 @@ public void indexRow(GenericRow row)
         String columnName = entry.getKey();
         // If row has null value for given column name, add to null value 
vector
         if (row.isNullValue(columnName)) {
-          _nullValueVectorCreatorMap.get(columnName).setNull(_docIdCounter);
+          _nullValueVectorCreatorMap.get(columnName).setNull(_docPosOnDisk);
         }
       }
     }
 
-    _docIdCounter++;
+    _docPosOnDisk++;
+  }
+
+  @Override
+  public void indexColumn(String columnName, @Nullable int[] sortedDocIds, 
IndexSegment segment,
+      boolean skipDefaultNullValues)
+      throws IOException {
+    long startNS = System.nanoTime();
+
+    // Iterate over each value in the column
+    try (PinotSegmentColumnReader colReader = new 
PinotSegmentColumnReader(segment, columnName)) {
+      int numDocs = segment.getSegmentMetadata().getTotalDocs();
+      Map<IndexType<?, ?, ?>, IndexCreator> creatorsByIndex = 
_creatorsByColAndIndex.get(columnName);
+      NullValueVectorCreator nullVec = 
_nullValueVectorCreatorMap.get(columnName);
+      FieldSpec fieldSpec = _schema.getFieldSpecFor(columnName);
+      SegmentDictionaryCreator dictionaryCreator = 
_dictionaryCreatorMap.get(columnName);
+      if (sortedDocIds != null) {
+        int onDiskDocId = 0;
+        for (int docId : sortedDocIds) {
+          indexColumnValue(colReader, creatorsByIndex, columnName, fieldSpec, 
dictionaryCreator, docId, onDiskDocId,
+              nullVec, skipDefaultNullValues);
+          onDiskDocId += 1;
+        }
+      } else {
+        for (int docId = 0; docId < numDocs; docId++) {
+          indexColumnValue(colReader, creatorsByIndex, columnName, fieldSpec, 
dictionaryCreator, docId, docId, nullVec,
+              skipDefaultNullValues);
+        }
+      }
+    }
+
+    _docPosOnDisk++;
+  }
+
+  private void indexColumnValue(PinotSegmentColumnReader colReader,
+      Map<IndexType<?, ?, ?>, IndexCreator> creatorsByIndex, String 
columnName, FieldSpec fieldSpec,
+      SegmentDictionaryCreator dictionaryCreator, int sourceDocId, int 
onDiskDocPos, NullValueVectorCreator nullVec,
+      boolean skipDefaultNullValues)
+      throws IOException {
+    Object columnValueToIndex = colReader.getValue(sourceDocId);
+    if (columnValueToIndex == null) {
+      throw new RuntimeException("Null value for column:" + columnName);
+    }
+
+    if (fieldSpec.isSingleValueField()) {
+      indexSingleValueRow(dictionaryCreator, columnValueToIndex, 
creatorsByIndex);
+    } else {
+      indexMultiValueRow(dictionaryCreator, (Object[]) columnValueToIndex, 
creatorsByIndex);
+    }
+
+    if (_nullHandlingEnabled && !skipDefaultNullValues) {
+      //handling null values
+//            In row oriented:
+//              - this.indexRow iterates over each column and checks if it 
isNullValue.  If it is then it sets the null
+//              value vector for that doc id
+//              - This null value comes from the GenericRow that is created by 
PinotSegmentRecordReader
+//              - PinotSegmentRecordReader:L224 is where we figure out the 
null value stuff
+//              - PSegRecReader calls PinotSegmentColumnReader.isNull on the 
doc id to determine if the value for that
+//              column of that docId is null
+//              - if it returns true and we are NOT skipping null values we 
put the default null value into that field
+//              of the GenericRow

Review Comment:
   I put it in because I wanted to see if it would help people better 
understand the different steps that are involved in the null value logic. But 
since it didn't help, I'll remove it.



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -303,6 +305,8 @@ public static ChunkCompressionType 
getDefaultCompressionType(FieldType fieldType
   @Override
   public void indexRow(GenericRow row)
       throws IOException {
+    long startNS = System.nanoTime();

Review Comment:
   Removed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Update Segment builder to use column major tables [pinot]

Reply via email to