This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 8b88697e73 Fix the ConcurrentModificationException for And/Or DocIdSet 
(#12611)
8b88697e73 is described below

commit 8b88697e739127ab079658b5371b65c666c8d907
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Mon Mar 11 10:45:31 2024 -0700

    Fix the ConcurrentModificationException for And/Or DocIdSet (#12611)
---
 .../pinot/core/operator/docidsets/AndDocIdSet.java | 51 ++++++++++++----------
 .../pinot/core/operator/docidsets/OrDocIdSet.java  | 49 +++++++++++----------
 2 files changed, 55 insertions(+), 45 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
index 503fcb1027..64ec2816aa 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
@@ -20,11 +20,10 @@ package org.apache.pinot.core.operator.docidsets;
 
 import java.util.ArrayList;
 import java.util.Comparator;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicReference;
 import javax.annotation.Nullable;
-import org.apache.commons.collections.MapUtils;
 import org.apache.pinot.common.utils.config.QueryOptionsUtils;
 import org.apache.pinot.core.common.BlockDocIdIterator;
 import org.apache.pinot.core.common.BlockDocIdSet;
@@ -57,14 +56,16 @@ import org.roaringbitmap.buffer.MutableRoaringBitmap;
  * </ul>
  */
 public final class AndDocIdSet implements BlockDocIdSet {
-  private final List<BlockDocIdSet> _docIdSets;
+  // Keep the scan based BlockDocIdSets to be accessed when collecting query 
execution stats
+  private final AtomicReference<List<BlockDocIdSet>> _scanBasedDocIdSets = new 
AtomicReference<>();
   private final boolean _cardinalityBasedRankingForScan;
-  private long _numEntriesScannedInFilter = 0L;
+  private List<BlockDocIdSet> _docIdSets;
+  private volatile long _numEntriesScannedInFilter;
 
   public AndDocIdSet(List<BlockDocIdSet> docIdSets, @Nullable Map<String, 
String> queryOptions) {
-    _docIdSets = docIdSets instanceof ArrayList ? docIdSets : new 
ArrayList<>(docIdSets);
+    _docIdSets = docIdSets;
     _cardinalityBasedRankingForScan =
-        !MapUtils.isEmpty(queryOptions) && 
QueryOptionsUtils.isAndScanReorderingEnabled(queryOptions);
+        queryOptions != null && 
QueryOptionsUtils.isAndScanReorderingEnabled(queryOptions);
   }
 
   @Override
@@ -77,33 +78,33 @@ public final class AndDocIdSet implements BlockDocIdSet {
     List<BitmapBasedDocIdIterator> bitmapBasedDocIdIterators = new 
ArrayList<>();
     List<ScanBasedDocIdIterator> scanBasedDocIdIterators = new ArrayList<>();
     List<BlockDocIdIterator> remainingDocIdIterators = new ArrayList<>();
+    long numEntriesScannedForNonScanBasedDocIdSets = 0L;
+    List<BlockDocIdSet> scanBasedDocIdSets = new ArrayList<>();
 
-    Iterator<BlockDocIdSet> iterator = _docIdSets.iterator();
-    for (int i = 0; iterator.hasNext(); i++) {
-      BlockDocIdSet blockDocIdSet = iterator.next();
-      BlockDocIdIterator docIdIterator = blockDocIdSet.iterator();
+    for (int i = 0; i < numDocIdSets; i++) {
+      BlockDocIdSet docIdSet = _docIdSets.get(i);
+      BlockDocIdIterator docIdIterator = docIdSet.iterator();
       allDocIdIterators[i] = docIdIterator;
       if (docIdIterator instanceof SortedDocIdIterator) {
         sortedDocIdIterators.add((SortedDocIdIterator) docIdIterator);
-        // aggregate the number of entries scanned in filter before removing 
the iterator
-        _numEntriesScannedInFilter += 
blockDocIdSet.getNumEntriesScannedInFilter();
-        // do not keep holding on to the _docIdRanges since they will occupy 
heap space during the query execution
-        iterator.remove();
+        numEntriesScannedForNonScanBasedDocIdSets += 
docIdSet.getNumEntriesScannedInFilter();
       } else if (docIdIterator instanceof BitmapBasedDocIdIterator) {
         bitmapBasedDocIdIterators.add((BitmapBasedDocIdIterator) 
docIdIterator);
-        // aggregate the number of entries scanned in filter before removing 
the iterator
-        // some BitmapBasedDocIdIterator may be generated from underlying 
index types (e.g. H3Index) that actually
-        // scans documents, so we need to aggregate them here
-        _numEntriesScannedInFilter += 
blockDocIdSet.getNumEntriesScannedInFilter();
-        // do not keep holding on to the _docIdRanges since they will occupy 
heap space during the query execution
-        iterator.remove();
+        numEntriesScannedForNonScanBasedDocIdSets += 
docIdSet.getNumEntriesScannedInFilter();
       } else if (docIdIterator instanceof ScanBasedDocIdIterator) {
         scanBasedDocIdIterators.add((ScanBasedDocIdIterator) docIdIterator);
+        scanBasedDocIdSets.add(docIdSet);
       } else {
         remainingDocIdIterators.add(docIdIterator);
+        scanBasedDocIdSets.add(docIdSet);
       }
     }
 
+    // Set _docIdSets to null so that underlying BlockDocIdSets can be garbage 
collected
+    _docIdSets = null;
+    _numEntriesScannedInFilter = numEntriesScannedForNonScanBasedDocIdSets;
+    _scanBasedDocIdSets.set(scanBasedDocIdSets);
+
     // evaluate the bitmaps in the order of the lowest matching num docIds 
comes first, so that we minimize the number
     // of containers (range) for comparison from the beginning, as will 
minimize the effort of bitmap AND application
     bitmapBasedDocIdIterators.sort(Comparator.comparing(x -> 
x.getDocIds().getCardinality()));
@@ -186,9 +187,13 @@ public final class AndDocIdSet implements BlockDocIdSet {
 
   @Override
   public long getNumEntriesScannedInFilter() {
-    for (BlockDocIdSet child : _docIdSets) {
-      _numEntriesScannedInFilter += child.getNumEntriesScannedInFilter();
+    List<BlockDocIdSet> scanBasedDocIdSets = _scanBasedDocIdSets.get();
+    long numEntriesScannedForScanBasedDocIdSets = 0L;
+    if (scanBasedDocIdSets != null) {
+      for (BlockDocIdSet scanBasedDocIdSet : scanBasedDocIdSets) {
+        numEntriesScannedForScanBasedDocIdSets += 
scanBasedDocIdSet.getNumEntriesScannedInFilter();
+      }
     }
-    return _numEntriesScannedInFilter;
+    return _numEntriesScannedInFilter + numEntriesScannedForScanBasedDocIdSets;
   }
 }
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/OrDocIdSet.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/OrDocIdSet.java
index 76ac7a12e8..fdc522286b 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/OrDocIdSet.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/OrDocIdSet.java
@@ -19,8 +19,8 @@
 package org.apache.pinot.core.operator.docidsets;
 
 import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.List;
+import java.util.concurrent.atomic.AtomicReference;
 import org.apache.pinot.core.common.BlockDocIdIterator;
 import org.apache.pinot.core.common.BlockDocIdSet;
 import org.apache.pinot.core.operator.dociditerators.BitmapBasedDocIdIterator;
@@ -48,12 +48,14 @@ import org.roaringbitmap.buffer.MutableRoaringBitmap;
  * </ul>
  */
 public final class OrDocIdSet implements BlockDocIdSet {
-  private final List<BlockDocIdSet> _docIdSets;
+  // Keep the scan based BlockDocIdSets to be accessed when collecting query 
execution stats
+  private final AtomicReference<List<BlockDocIdSet>> _scanBasedDocIdSets = new 
AtomicReference<>();
   private final int _numDocs;
-  private long _numEntriesScannedInFilter = 0L;
+  private List<BlockDocIdSet> _docIdSets;
+  private volatile long _numEntriesScannedInFilter = 0L;
 
   public OrDocIdSet(List<BlockDocIdSet> docIdSets, int numDocs) {
-    _docIdSets = docIdSets instanceof ArrayList ? docIdSets : new 
ArrayList<>(docIdSets);
+    _docIdSets = docIdSets;
     _numDocs = numDocs;
   }
 
@@ -64,30 +66,29 @@ public final class OrDocIdSet implements BlockDocIdSet {
     List<SortedDocIdIterator> sortedDocIdIterators = new ArrayList<>();
     List<BitmapBasedDocIdIterator> bitmapBasedDocIdIterators = new 
ArrayList<>();
     List<BlockDocIdIterator> remainingDocIdIterators = new ArrayList<>();
+    long numEntriesScannedForNonScanBasedDocIdSets = 0L;
+    List<BlockDocIdSet> scanBasedDocIdSets = new ArrayList<>();
 
-    Iterator<BlockDocIdSet> iterator = _docIdSets.iterator();
-    for (int i = 0; iterator.hasNext(); i++) {
-      BlockDocIdSet blockDocIdSet = iterator.next();
-      BlockDocIdIterator docIdIterator = blockDocIdSet.iterator();
+    for (int i = 0; i < numDocIdSets; i++) {
+      BlockDocIdSet docIdSet = _docIdSets.get(i);
+      BlockDocIdIterator docIdIterator = docIdSet.iterator();
       allDocIdIterators[i] = docIdIterator;
       if (docIdIterator instanceof SortedDocIdIterator) {
         sortedDocIdIterators.add((SortedDocIdIterator) docIdIterator);
-        // aggregate the number of entries scanned in filter before removing 
the iterator
-        _numEntriesScannedInFilter += 
blockDocIdSet.getNumEntriesScannedInFilter();
-        // do not keep holding on to the _docIdRanges since they will occupy 
heap space during the query execution
-        iterator.remove();
+        numEntriesScannedForNonScanBasedDocIdSets += 
docIdSet.getNumEntriesScannedInFilter();
       } else if (docIdIterator instanceof BitmapBasedDocIdIterator) {
-        bitmapBasedDocIdIterators.add((BitmapBasedDocIdIterator) 
docIdIterator);
-        // aggregate the number of entries scanned in filter before removing 
the iterator
-        // some BitmapBasedDocIdIterator may be generated from underlying 
index types (e.g. H3Index) that actually
-        // scans documents, so we need to aggregate them here
-        _numEntriesScannedInFilter += 
blockDocIdSet.getNumEntriesScannedInFilter();
-        // do not keep holding on to the bitmaps since they will occupy heap 
space during the query execution
-        iterator.remove();
+        numEntriesScannedForNonScanBasedDocIdSets += 
docIdSet.getNumEntriesScannedInFilter();
       } else {
         remainingDocIdIterators.add(docIdIterator);
+        scanBasedDocIdSets.add(docIdSet);
       }
     }
+
+    // Set _docIdSets to null so that underlying BlockDocIdSets can be garbage 
collected
+    _docIdSets = null;
+    _numEntriesScannedInFilter = numEntriesScannedForNonScanBasedDocIdSets;
+    _scanBasedDocIdSets.set(scanBasedDocIdSets);
+
     int numSortedDocIdIterators = sortedDocIdIterators.size();
     int numBitmapBasedDocIdIterators = bitmapBasedDocIdIterators.size();
     if (numSortedDocIdIterators + numBitmapBasedDocIdIterators > 1) {
@@ -127,9 +128,13 @@ public final class OrDocIdSet implements BlockDocIdSet {
 
   @Override
   public long getNumEntriesScannedInFilter() {
-    for (BlockDocIdSet docIdSet : _docIdSets) {
-      _numEntriesScannedInFilter += docIdSet.getNumEntriesScannedInFilter();
+    List<BlockDocIdSet> scanBasedDocIdSets = _scanBasedDocIdSets.get();
+    long numEntriesScannedForScanBasedDocIdSets = 0L;
+    if (scanBasedDocIdSets != null) {
+      for (BlockDocIdSet scanBasedDocIdSet : scanBasedDocIdSets) {
+        numEntriesScannedForScanBasedDocIdSets += 
scanBasedDocIdSet.getNumEntriesScannedInFilter();
+      }
     }
-    return _numEntriesScannedInFilter;
+    return _numEntriesScannedInFilter + numEntriesScannedForScanBasedDocIdSets;
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to