Copilot commented on code in PR #17606:
URL: https://github.com/apache/pinot/pull/17606#discussion_r2768830859


##########
pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java:
##########
@@ -222,38 +262,64 @@ private DimensionTable createFastLookupDimensionTable() {
       int totalDocs = 0;
       for (SegmentDataManager segmentManager : segmentDataManagers) {
         IndexSegment indexSegment = segmentManager.getSegment();
-        totalDocs += indexSegment.getSegmentMetadata().getTotalDocs();
+        totalDocs += getTotalDocCount(indexSegment);
       }
 
       Object2ObjectOpenCustomHashMap<Object[], Object[]> lookupTable =
           new Object2ObjectOpenCustomHashMap<>(totalDocs, HASH_STRATEGY);
+      Object2ObjectOpenCustomHashMap<Object[], RecordLocation> 
recordLocationMap =
+          new Object2ObjectOpenCustomHashMap<>(totalDocs, HASH_STRATEGY);
 
       List<String> valueColumns = getValueColumns(schema.getColumnNames(), 
primaryKeyColumns);
 
-      for (SegmentDataManager segmentManager : segmentDataManagers) {
+      for (int segmentIndex = 0; segmentIndex < segmentDataManagers.size(); 
segmentIndex++) {
+        SegmentDataManager segmentManager = 
segmentDataManagers.get(segmentIndex);
         IndexSegment indexSegment = segmentManager.getSegment();
+        MutableRoaringBitmap queryableDocIds = 
getQueryableDocIdsSnapshot(indexSegment);

Review Comment:
   The variable name `queryableDocIds` is ambiguous as it could refer to either 
the bitmap itself or a snapshot. Consider renaming to `queryableDocIdsSnapshot` 
to clearly indicate this is a snapshot taken at a specific point in time, 
matching the naming pattern used elsewhere in the codebase (e.g., line 550).



##########
pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java:
##########
@@ -300,46 +367,70 @@ private DimensionTable createMemOptimisedDimensionTable() 
{
     int totalDocs = 0;
     for (SegmentDataManager segmentManager : segmentDataManagers) {
       IndexSegment indexSegment = segmentManager.getSegment();
-      totalDocs += indexSegment.getSegmentMetadata().getTotalDocs();
+      totalDocs += getTotalDocCount(indexSegment);
     }
 
     Object2LongOpenCustomHashMap<Object[]> lookupTable = new 
Object2LongOpenCustomHashMap<>(totalDocs, HASH_STRATEGY);
     lookupTable.defaultReturnValue(Long.MIN_VALUE);
 
-    for (SegmentDataManager segmentManager : segmentDataManagers) {
+    for (int segmentIndex = 0; segmentIndex < segmentDataManagers.size(); 
segmentIndex++) {
+      SegmentDataManager segmentManager = 
segmentDataManagers.get(segmentIndex);
       IndexSegment indexSegment = segmentManager.getSegment();
       int numTotalDocs = indexSegment.getSegmentMetadata().getTotalDocs();
-      if (numTotalDocs > 0) {
+      MutableRoaringBitmap queryableDocIds = 
getQueryableDocIdsSnapshot(indexSegment);

Review Comment:
   The variable name `queryableDocIds` is ambiguous as it could refer to either 
the bitmap itself or a snapshot. Consider renaming to `queryableDocIdsSnapshot` 
to clearly indicate this is a snapshot taken at a specific point in time, 
matching the naming pattern used elsewhere in the codebase.



##########
pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java:
##########
@@ -397,4 +488,82 @@ public FieldSpec getColumnFieldSpec(String columnName) {
   public List<String> getPrimaryKeyColumns() {
     return _dimensionTable.get().getPrimaryKeyColumns();
   }
+
+  private void applyQueryableDocIdsForRecordLocations(List<SegmentDataManager> 
segmentDataManagers,
+      Object2ObjectOpenCustomHashMap<Object[], RecordLocation> 
recordLocationMap) {
+    if (!_enableUpsert) {
+      return;
+    }
+    if (recordLocationMap.isEmpty() || segmentDataManagers.isEmpty()) {
+      return;
+    }
+    List<MutableRoaringBitmap> queryableDocIdsBySegment = new 
ArrayList<>(segmentDataManagers.size());
+    for (int i = 0; i < segmentDataManagers.size(); i++) {
+      queryableDocIdsBySegment.add(new MutableRoaringBitmap());
+    }
+    for (RecordLocation recordLocation : recordLocationMap.values()) {
+      
queryableDocIdsBySegment.get(recordLocation._segmentIndex).add(recordLocation._docId);
+    }
+    applyQueryableDocIdsToSegments(segmentDataManagers, 
queryableDocIdsBySegment);
+  }
+
+  private void applyQueryableDocIdsForLookupTable(List<SegmentDataManager> 
segmentDataManagers,
+      Object2LongOpenCustomHashMap<Object[]> lookupTable) {
+    if (!_enableUpsert) {
+      return;
+    }
+    if (lookupTable.isEmpty() || segmentDataManagers.isEmpty()) {
+      return;
+    }
+    List<MutableRoaringBitmap> queryableDocIdsBySegment = new 
ArrayList<>(segmentDataManagers.size());
+    for (int i = 0; i < segmentDataManagers.size(); i++) {
+      queryableDocIdsBySegment.add(new MutableRoaringBitmap());
+    }
+    for (Object2LongOpenCustomHashMap.Entry<Object[]> entry : 
lookupTable.object2LongEntrySet()) {
+      long readerIdxAndDocId = entry.getLongValue();
+      int readerIdx = (int) (readerIdxAndDocId >>> 32);
+      int docId = (int) readerIdxAndDocId;
+      queryableDocIdsBySegment.get(readerIdx).add(docId);
+    }
+    applyQueryableDocIdsToSegments(segmentDataManagers, 
queryableDocIdsBySegment);
+  }
+
+  private void applyQueryableDocIdsToSegments(List<SegmentDataManager> 
segmentDataManagers,
+      List<MutableRoaringBitmap> queryableDocIdsBySegment) {
+    for (int i = 0; i < segmentDataManagers.size(); i++) {
+      IndexSegment segment = segmentDataManagers.get(i).getSegment();
+      if (!(segment instanceof ImmutableSegmentImpl)) {
+        continue;
+      }
+      MutableRoaringBitmap queryableDocIds = queryableDocIdsBySegment.get(i);
+      ThreadSafeMutableRoaringBitmap existingQueryableDocIds = 
segment.getQueryableDocIds();
+      if (existingQueryableDocIds != null) {
+        MutableRoaringBitmap existingSnapshot = 
existingQueryableDocIds.getMutableRoaringBitmap();
+        // Intersect with existing queryable doc IDs to honor any pre-existing 
constraints
+        // (e.g. from prior upsert or filtering logic) instead of overwriting 
them.

Review Comment:
   The logic for intersecting queryable doc IDs appears to assume that 
pre-existing constraints should always be honored. However, this could lead to 
unexpected behavior if segments have stale queryable doc IDs from previous 
operations. Consider documenting the expected state of segments when this 
method is called, or add validation to ensure segments are in the expected 
state before applying the intersection.
   ```suggestion
           // (e.g. from prior upsert or filtering logic) instead of 
overwriting them.
           // Validate that the intersection does not unexpectedly eliminate 
all documents when both
           // sides are non-empty; such a state typically indicates stale or 
incompatible constraints.
           if (!queryableDocIds.isEmpty() && !existingSnapshot.isEmpty()) {
             MutableRoaringBitmap intersectionCheck = queryableDocIds.clone();
             intersectionCheck.and(existingSnapshot);
             Preconditions.checkState(!intersectionCheck.isEmpty(),
                 "Existing queryableDocIds for segment '%s' eliminate all docs 
from lookup-based queryableDocIds",
                 segment.getSegmentName());
           }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to