surekhasaharan commented on a change in pull request #7425: Add is_overshadowed
column to sys.segments table
URL: https://github.com/apache/incubator-druid/pull/7425#discussion_r279509422
##########
File path:
sql/src/main/java/org/apache/druid/sql/calcite/schema/MetadataSegmentView.java
##########
@@ -134,40 +138,41 @@ public void stop()
private void poll()
{
log.info("polling published segments from coordinator");
- final JsonParserIterator<DataSegment> metadataSegments =
getMetadataSegments(
+ final JsonParserIterator<SegmentWithOvershadowedStatus> metadataSegments =
getMetadataSegments(
coordinatorDruidLeaderClient,
jsonMapper,
responseHandler,
segmentWatcherConfig.getWatchedDataSources()
);
- final DateTime timestamp = DateTimes.nowUtc();
+ final ImmutableSortedSet.Builder<SegmentWithOvershadowedStatus> builder =
ImmutableSortedSet.naturalOrder();
while (metadataSegments.hasNext()) {
- final DataSegment interned =
DataSegmentInterner.intern(metadataSegments.next());
- // timestamp is used to filter deleted segments
- publishedSegments.put(interned, timestamp);
+ final SegmentWithOvershadowedStatus segment = metadataSegments.next();
+ final DataSegment interned =
DataSegmentInterner.intern(segment.getDataSegment());
+ final SegmentWithOvershadowedStatus segmentWithOvershadowedStatus = new
SegmentWithOvershadowedStatus(
+ interned,
+ segment.isOvershadowed()
+ );
+ builder.add(segmentWithOvershadowedStatus);
+ }
+ // build operation can be expensive for high cardinality segments, so
calling it outside "lock"
+ final ImmutableSortedSet<SegmentWithOvershadowedStatus> immutableSortedSet
= builder.build();
+ synchronized (lock) {
+ publishedSegments = immutableSortedSet;
}
- // filter the segments from cache whose timestamp is not equal to latest
timestamp stored,
- // since the presence of a segment with an earlier timestamp indicates that
- // "that" segment is not returned by coordinator in latest poll, so it's
- // likely deleted and therefore we remove it from publishedSegments
- // Since segments are not atomically replaced because it can cause high
- // memory footprint due to large number of published segments, so
- // we are incrementally removing deleted segments from the map
- // This means publishedSegments will be eventually consistent with
- // the segments in coordinator
- publishedSegments.entrySet().removeIf(e -> e.getValue() != timestamp);
cachePopulated.set(true);
}
- public Iterator<DataSegment> getPublishedSegments()
+ public Iterator<SegmentWithOvershadowedStatus> getPublishedSegments()
{
if (isCacheEnabled) {
Preconditions.checkState(
lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS) &&
cachePopulated.get(),
"hold on, still syncing published segments"
);
- return publishedSegments.keySet().iterator();
+ synchronized (lock) {
Review comment:
removed lock and made `publishedSegments` volatile
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]