tarun11Mavani commented on code in PR #16344:
URL: https://github.com/apache/pinot/pull/16344#discussion_r2309648971
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/stats/RealtimeSegmentStatsContainer.java:
##########
@@ -38,30 +42,99 @@
public class RealtimeSegmentStatsContainer implements
SegmentPreIndexStatsContainer {
private final MutableSegment _mutableSegment;
private final Map<String, ColumnStatistics> _columnStatisticsMap = new
HashMap<>();
+ private final int _totalDocCount;
public RealtimeSegmentStatsContainer(MutableSegment mutableSegment,
@Nullable int[] sortedDocIds,
StatsCollectorConfig statsCollectorConfig) {
+ this(mutableSegment, sortedDocIds, statsCollectorConfig, null);
+ }
+
+ public RealtimeSegmentStatsContainer(MutableSegment mutableSegment,
@Nullable int[] sortedDocIds,
+ StatsCollectorConfig statsCollectorConfig, @Nullable RecordReader
recordReader) {
_mutableSegment = mutableSegment;
+ // Determine if we're using compacted reader
+ boolean isUsingCompactedReader = recordReader instanceof
CompactedPinotSegmentRecordReader;
+
+ // Determine the correct total document count based on whether compaction
is being used
+ if (isUsingCompactedReader && mutableSegment.getValidDocIds() != null) {
+ _totalDocCount =
mutableSegment.getValidDocIds().getMutableRoaringBitmap().getCardinality();
+ } else {
+ _totalDocCount = mutableSegment.getNumDocsIndexed();
+ }
+
// Create all column statistics
+ // Determine compaction mode once for all columns
+ boolean useCompactedStatistics = isUsingCompactedReader &&
mutableSegment.getValidDocIds() != null;
+ ThreadSafeMutableRoaringBitmap validDocIds = useCompactedStatistics ?
mutableSegment.getValidDocIds() : null;
Review Comment:
Thanks for catching this. I have made a change to create a copy of
validDocIds and use that instead of directly using the
mutableSegment.validDocIds.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]