Copilot commented on code in PR #17606:
URL: https://github.com/apache/pinot/pull/17606#discussion_r2768830859
##########
pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java:
##########
@@ -222,38 +262,64 @@ private DimensionTable createFastLookupDimensionTable() {
int totalDocs = 0;
for (SegmentDataManager segmentManager : segmentDataManagers) {
IndexSegment indexSegment = segmentManager.getSegment();
- totalDocs += indexSegment.getSegmentMetadata().getTotalDocs();
+ totalDocs += getTotalDocCount(indexSegment);
}
Object2ObjectOpenCustomHashMap<Object[], Object[]> lookupTable =
new Object2ObjectOpenCustomHashMap<>(totalDocs, HASH_STRATEGY);
+ Object2ObjectOpenCustomHashMap<Object[], RecordLocation>
recordLocationMap =
+ new Object2ObjectOpenCustomHashMap<>(totalDocs, HASH_STRATEGY);
List<String> valueColumns = getValueColumns(schema.getColumnNames(),
primaryKeyColumns);
- for (SegmentDataManager segmentManager : segmentDataManagers) {
+ for (int segmentIndex = 0; segmentIndex < segmentDataManagers.size();
segmentIndex++) {
+ SegmentDataManager segmentManager =
segmentDataManagers.get(segmentIndex);
IndexSegment indexSegment = segmentManager.getSegment();
+ MutableRoaringBitmap queryableDocIds =
getQueryableDocIdsSnapshot(indexSegment);
Review Comment:
The variable name `queryableDocIds` is ambiguous as it could refer to either
the bitmap itself or a snapshot. Consider renaming to `queryableDocIdsSnapshot`
to clearly indicate this is a snapshot taken at a specific point in time,
matching the naming pattern used elsewhere in the codebase (e.g., line 550).
##########
pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java:
##########
@@ -300,46 +367,70 @@ private DimensionTable createMemOptimisedDimensionTable()
{
int totalDocs = 0;
for (SegmentDataManager segmentManager : segmentDataManagers) {
IndexSegment indexSegment = segmentManager.getSegment();
- totalDocs += indexSegment.getSegmentMetadata().getTotalDocs();
+ totalDocs += getTotalDocCount(indexSegment);
}
Object2LongOpenCustomHashMap<Object[]> lookupTable = new
Object2LongOpenCustomHashMap<>(totalDocs, HASH_STRATEGY);
lookupTable.defaultReturnValue(Long.MIN_VALUE);
- for (SegmentDataManager segmentManager : segmentDataManagers) {
+ for (int segmentIndex = 0; segmentIndex < segmentDataManagers.size();
segmentIndex++) {
+ SegmentDataManager segmentManager =
segmentDataManagers.get(segmentIndex);
IndexSegment indexSegment = segmentManager.getSegment();
int numTotalDocs = indexSegment.getSegmentMetadata().getTotalDocs();
- if (numTotalDocs > 0) {
+ MutableRoaringBitmap queryableDocIds =
getQueryableDocIdsSnapshot(indexSegment);
Review Comment:
The variable name `queryableDocIds` is ambiguous as it could refer to either
the bitmap itself or a snapshot. Consider renaming to `queryableDocIdsSnapshot`
to clearly indicate this is a snapshot taken at a specific point in time,
matching the naming pattern used elsewhere in the codebase.
##########
pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java:
##########
@@ -397,4 +488,82 @@ public FieldSpec getColumnFieldSpec(String columnName) {
public List<String> getPrimaryKeyColumns() {
return _dimensionTable.get().getPrimaryKeyColumns();
}
+
+ private void applyQueryableDocIdsForRecordLocations(List<SegmentDataManager>
segmentDataManagers,
+ Object2ObjectOpenCustomHashMap<Object[], RecordLocation>
recordLocationMap) {
+ if (!_enableUpsert) {
+ return;
+ }
+ if (recordLocationMap.isEmpty() || segmentDataManagers.isEmpty()) {
+ return;
+ }
+ List<MutableRoaringBitmap> queryableDocIdsBySegment = new
ArrayList<>(segmentDataManagers.size());
+ for (int i = 0; i < segmentDataManagers.size(); i++) {
+ queryableDocIdsBySegment.add(new MutableRoaringBitmap());
+ }
+ for (RecordLocation recordLocation : recordLocationMap.values()) {
+
queryableDocIdsBySegment.get(recordLocation._segmentIndex).add(recordLocation._docId);
+ }
+ applyQueryableDocIdsToSegments(segmentDataManagers,
queryableDocIdsBySegment);
+ }
+
+ private void applyQueryableDocIdsForLookupTable(List<SegmentDataManager>
segmentDataManagers,
+ Object2LongOpenCustomHashMap<Object[]> lookupTable) {
+ if (!_enableUpsert) {
+ return;
+ }
+ if (lookupTable.isEmpty() || segmentDataManagers.isEmpty()) {
+ return;
+ }
+ List<MutableRoaringBitmap> queryableDocIdsBySegment = new
ArrayList<>(segmentDataManagers.size());
+ for (int i = 0; i < segmentDataManagers.size(); i++) {
+ queryableDocIdsBySegment.add(new MutableRoaringBitmap());
+ }
+ for (Object2LongOpenCustomHashMap.Entry<Object[]> entry :
lookupTable.object2LongEntrySet()) {
+ long readerIdxAndDocId = entry.getLongValue();
+ int readerIdx = (int) (readerIdxAndDocId >>> 32);
+ int docId = (int) readerIdxAndDocId;
+ queryableDocIdsBySegment.get(readerIdx).add(docId);
+ }
+ applyQueryableDocIdsToSegments(segmentDataManagers,
queryableDocIdsBySegment);
+ }
+
+ private void applyQueryableDocIdsToSegments(List<SegmentDataManager>
segmentDataManagers,
+ List<MutableRoaringBitmap> queryableDocIdsBySegment) {
+ for (int i = 0; i < segmentDataManagers.size(); i++) {
+ IndexSegment segment = segmentDataManagers.get(i).getSegment();
+ if (!(segment instanceof ImmutableSegmentImpl)) {
+ continue;
+ }
+ MutableRoaringBitmap queryableDocIds = queryableDocIdsBySegment.get(i);
+ ThreadSafeMutableRoaringBitmap existingQueryableDocIds =
segment.getQueryableDocIds();
+ if (existingQueryableDocIds != null) {
+ MutableRoaringBitmap existingSnapshot =
existingQueryableDocIds.getMutableRoaringBitmap();
+ // Intersect with existing queryable doc IDs to honor any pre-existing
constraints
+ // (e.g. from prior upsert or filtering logic) instead of overwriting
them.
Review Comment:
The logic for intersecting queryable doc IDs appears to assume that
pre-existing constraints should always be honored. However, this could lead to
unexpected behavior if segments have stale queryable doc IDs from previous
operations. Consider documenting the expected state of segments when this
method is called, or add validation to ensure segments are in the expected
state before applying the intersection.
```suggestion
// (e.g. from prior upsert or filtering logic) instead of
overwriting them.
// Validate that the intersection does not unexpectedly eliminate
all documents when both
// sides are non-empty; such a state typically indicates stale or
incompatible constraints.
if (!queryableDocIds.isEmpty() && !existingSnapshot.isEmpty()) {
MutableRoaringBitmap intersectionCheck = queryableDocIds.clone();
intersectionCheck.and(existingSnapshot);
Preconditions.checkState(!intersectionCheck.isEmpty(),
"Existing queryableDocIds for segment '%s' eliminate all docs
from lookup-based queryableDocIds",
segment.getSegmentName());
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]