ege-st commented on code in PR #11776: URL: https://github.com/apache/pinot/pull/11776#discussion_r1367009788
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -328,12 +332,76 @@ public void indexRow(GenericRow row) String columnName = entry.getKey(); // If row has null value for given column name, add to null value vector if (row.isNullValue(columnName)) { - _nullValueVectorCreatorMap.get(columnName).setNull(_docIdCounter); + _nullValueVectorCreatorMap.get(columnName).setNull(_docPosOnDisk); } } } - _docIdCounter++; + _docPosOnDisk++; + } + + @Override + public void indexColumn(String columnName, @Nullable int[] sortedDocIds, IndexSegment segment, + boolean skipDefaultNullValues) + throws IOException { + long startNS = System.nanoTime(); + + // Iterate over each value in the column + try (PinotSegmentColumnReader colReader = new PinotSegmentColumnReader(segment, columnName)) { + int numDocs = segment.getSegmentMetadata().getTotalDocs(); + Map<IndexType<?, ?, ?>, IndexCreator> creatorsByIndex = _creatorsByColAndIndex.get(columnName); + NullValueVectorCreator nullVec = _nullValueVectorCreatorMap.get(columnName); + FieldSpec fieldSpec = _schema.getFieldSpecFor(columnName); + SegmentDictionaryCreator dictionaryCreator = _dictionaryCreatorMap.get(columnName); + if (sortedDocIds != null) { + int onDiskDocId = 0; + for (int docId : sortedDocIds) { + indexColumnValue(colReader, creatorsByIndex, columnName, fieldSpec, dictionaryCreator, docId, onDiskDocId, + nullVec, skipDefaultNullValues); + onDiskDocId += 1; + } + } else { + for (int docId = 0; docId < numDocs; docId++) { + indexColumnValue(colReader, creatorsByIndex, columnName, fieldSpec, dictionaryCreator, docId, docId, nullVec, + skipDefaultNullValues); + } + } + } + + _docPosOnDisk++; + } + + private void indexColumnValue(PinotSegmentColumnReader colReader, + Map<IndexType<?, ?, ?>, IndexCreator> creatorsByIndex, String columnName, FieldSpec fieldSpec, + SegmentDictionaryCreator dictionaryCreator, int sourceDocId, int onDiskDocPos, NullValueVectorCreator nullVec, + boolean skipDefaultNullValues) + throws IOException { + Object columnValueToIndex = colReader.getValue(sourceDocId); + if (columnValueToIndex == null) { + throw new RuntimeException("Null value for column:" + columnName); + } + + if (fieldSpec.isSingleValueField()) { + indexSingleValueRow(dictionaryCreator, columnValueToIndex, creatorsByIndex); + } else { + indexMultiValueRow(dictionaryCreator, (Object[]) columnValueToIndex, creatorsByIndex); + } + + if (_nullHandlingEnabled && !skipDefaultNullValues) { + //handling null values +// In row oriented: +// - this.indexRow iterates over each column and checks if it isNullValue. If it is then it sets the null +// value vector for that doc id +// - This null value comes from the GenericRow that is created by PinotSegmentRecordReader +// - PinotSegmentRecordReader:L224 is where we figure out the null value stuff +// - PSegRecReader calls PinotSegmentColumnReader.isNull on the doc id to determine if the value for that +// column of that docId is null +// - if it returns true and we are NOT skipping null values we put the default null value into that field +// of the GenericRow Review Comment: I put it in because I wanted to see if it would help people better understand the different steps that are involved in the null value logic. But since it didn't help, I'll remove it. ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -303,6 +305,8 @@ public static ChunkCompressionType getDefaultCompressionType(FieldType fieldType @Override public void indexRow(GenericRow row) throws IOException { + long startNS = System.nanoTime(); Review Comment: Removed. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org