gianm commented on code in PR #19212:
URL: https://github.com/apache/druid/pull/19212#discussion_r3014403357


##########
embedded-tests/src/test/java/org/apache/druid/testing/embedded/server/EmbeddedBrokerServerViewOfLatestUsedSegmentsTest.java:
##########
@@ -0,0 +1,339 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.server;
+
+import org.apache.druid.client.BrokerServerView;
+import org.apache.druid.client.selector.ServerSelector;
+import org.apache.druid.common.utils.IdUtils;
+import org.apache.druid.indexing.common.task.TaskBuilder;
+import org.apache.druid.indexing.overlord.Segments;
+import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.query.TableDataSource;
+import org.apache.druid.server.coordinator.CoordinatorDynamicConfig;
+import 
org.apache.druid.sql.calcite.schema.BrokerServerViewOfLatestUsedSegments;
+import org.apache.druid.testing.embedded.EmbeddedBroker;
+import org.apache.druid.testing.embedded.EmbeddedClusterApis;
+import org.apache.druid.testing.embedded.EmbeddedCoordinator;
+import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
+import org.apache.druid.testing.embedded.EmbeddedHistorical;
+import org.apache.druid.testing.embedded.EmbeddedIndexer;
+import org.apache.druid.testing.embedded.EmbeddedOverlord;
+import org.apache.druid.testing.embedded.indexing.Resources;
+import org.apache.druid.testing.embedded.junit5.EmbeddedClusterTestBase;
+import org.apache.druid.timeline.DataSegment;
+import org.apache.druid.timeline.TimelineLookup;
+import org.apache.druid.timeline.TimelineObjectHolder;
+import org.apache.druid.timeline.partition.PartitionChunk;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Integration test for {@link BrokerServerViewOfLatestUsedSegments}.
+ * Verifies that the merged timeline covers both loaded segments and 
metadata-only segments.
+ */
+public class EmbeddedBrokerServerViewOfLatestUsedSegmentsTest extends 
EmbeddedClusterTestBase
+{
+  private String fixedDataSource;
+
+  private final EmbeddedCoordinator coordinator = new EmbeddedCoordinator();
+  private final EmbeddedOverlord overlord = new EmbeddedOverlord();
+  private final EmbeddedIndexer indexer = new EmbeddedIndexer();
+  private final EmbeddedHistorical historical = new EmbeddedHistorical();
+  private final EmbeddedBroker broker = new EmbeddedBroker();
+
+  @Override
+  protected EmbeddedDruidCluster createCluster()
+  {
+    indexer.addProperty("druid.segment.handoff.pollDuration", "PT0.1s");
+    broker.addProperty("druid.sql.planner.metadataSegmentCacheEnable", "true");
+    broker.addProperty("druid.sql.planner.metadataSegmentPollPeriod", "100");
+
+    return EmbeddedDruidCluster.withEmbeddedDerbyAndZookeeper()
+                               .useLatchableEmitter()
+                               .addServer(overlord)
+                               .addServer(coordinator)
+                               .addServer(indexer)
+                               .addServer(historical)
+                               .addServer(broker);
+  }
+
+  @BeforeAll
+  @Override
+  public void setup() throws Exception
+  {
+    fixedDataSource = EmbeddedClusterApis.createTestDatasourceName();
+    dataSource = fixedDataSource;
+    super.setup();
+    ingestData(dataSource);
+    cluster.callApi().waitForAllSegmentsToBeAvailable(dataSource, coordinator, 
broker);
+  }
+
+  @BeforeEach
+  @Override
+  protected void refreshDatasourceName()
+  {
+    dataSource = fixedDataSource;
+  }
+
+  @Test
+  public void testTimelineContainsAllAvailableSegments()
+  {
+    final var view = 
broker.bindings().getInstance(BrokerServerViewOfLatestUsedSegments.class);

Review Comment:
   It's more in the spirit of an embedded test to interact with the services 
through HTTP APIs rather than by directly interacting with Guice-provided Java 
objects. Perhaps you can introduce a runtime property to bind the 
`TimelineServerView` to this, and then test the resulting behavior.



##########
sql/src/main/java/org/apache/druid/sql/calcite/schema/MetadataSegmentView.java:
##########
@@ -71,21 +82,38 @@ public class MetadataSegmentView
   /**
    * Use {@link ImmutableSortedSet} so that the order of segments is 
deterministic and
    * sys.segments queries return the segments in sorted order based on 
segmentId.
-   *
+   * <p>
    * Volatile since this reference is reassigned in {@code poll()} and then 
read in {@code getPublishedSegments()}
    * from other threads.
    */
   @MonotonicNonNull
   private volatile ImmutableSortedSet<SegmentStatusInCluster> 
publishedSegments = null;
+
+  /**
+   * Collection of callbacks watching on segments changes.
+   */
+  private final ConcurrentMap<MetadataSegmentViewCallback, Executor> 
segmentViewCallbacks;
   /**
-   * Caches the replication factor for segment IDs. In case of coordinator 
restarts or leadership re-elections, the coordinator API returns `null` 
replication factor until load rules are evaluated.
+   * A set containing the identifiers of the segments currently being managed 
or tracked by this view.
+   * <p>
+   * This field is immutable and provides a snapshot of segment identifiers at 
a given point

Review Comment:
   In what sense is it immutable? It does look like it is mutated on each call 
to `poll`.
   
   It looks like it isn't used outside `poll`, so for clarity the javadoc could 
say something like "this is not accessed outside `poll()` and so does not need 
synchronization".



##########
sql/src/main/java/org/apache/druid/sql/calcite/schema/BrokerServerViewOfLatestUsedSegments.java:
##########
@@ -0,0 +1,380 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.sql.calcite.schema;
+
+import com.google.common.collect.Ordering;
+import com.google.inject.Inject;
+import org.apache.druid.client.BrokerServerView;
+import org.apache.druid.client.BrokerViewOfCoordinatorConfig;
+import org.apache.druid.client.DruidServer;
+import org.apache.druid.client.ImmutableDruidServer;
+import org.apache.druid.client.ServerView;
+import org.apache.druid.client.TimelineServerView;
+import org.apache.druid.client.selector.ServerSelector;
+import org.apache.druid.client.selector.TierSelectorStrategy;
+import org.apache.druid.guice.ManageLifecycle;
+import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.concurrent.Execs;
+import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.query.QueryRunner;
+import org.apache.druid.query.TableDataSource;
+import org.apache.druid.segment.realtime.appenderator.SegmentSchemas;
+import org.apache.druid.server.coordination.DruidServerMetadata;
+import org.apache.druid.timeline.DataSegment;
+import org.apache.druid.timeline.SegmentId;
+import org.apache.druid.timeline.TimelineLookup;
+import org.apache.druid.timeline.VersionedIntervalTimeline;
+import org.apache.druid.timeline.partition.PartitionChunk;
+
+import javax.inject.Named;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.Executor;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.function.Function;
+
+/**
+ * A {@link TimelineServerView} that exposes a superset of {@link 
BrokerServerView}:
+ * segments available on servers plus used segments that exist only in 
metadata.
+ * <p>
+ * This class maintains a merged per-datasource timeline updated incrementally 
from
+ * {@link BrokerServerView} and {@link MetadataSegmentView} callbacks.
+ */
+@ManageLifecycle
+public class BrokerServerViewOfLatestUsedSegments implements TimelineServerView
+{
+  private static final Logger log = new 
Logger(BrokerServerViewOfLatestUsedSegments.class);
+
+  private final BrokerServerView brokerServerView;
+  private final boolean isCacheEnabled;
+  private final TierSelectorStrategy historicalTierSelectorStrategy;
+  private final TierSelectorStrategy realtimeTierSelectorStrategy;
+  private final BrokerViewOfCoordinatorConfig brokerViewOfCoordinatorConfig;
+
+  // LOCK ORDERING: BsvCallback executes on the BrokerServerView callback 
thread, which holds
+  // BrokerServerView.lock (via directExecutor). BsvCallback then acquires 
this.lock.
+  // Therefore, this.lock must NEVER be held when calling into 
BrokerServerView (e.g.getBsvSelector),
+  // as that would invert the order and risk deadlock.
+  private final Object lock = new Object();
+  private final Map<String, VersionedIntervalTimeline<String, ServerSelector>> 
mergedTimelines = new HashMap<>();
+  private final Map<SegmentId, ServerSelector> mergedSelectors = new 
HashMap<>();
+  private final Map<SegmentId, DataSegment> metadataSegments = new HashMap<>();
+  private final Set<SegmentId> metadataRemovedSegmentIds = new HashSet<>();
+
+  private final ConcurrentMap<TimelineCallback, Executor> timelineCallbacks = 
new ConcurrentHashMap<>();
+  private final CountDownLatch brokerViewInitLatch = new CountDownLatch(1);
+  private final CountDownLatch metadataViewInitLatch = new CountDownLatch(1);
+  private final AtomicBoolean initialized = new AtomicBoolean(false);
+
+  @Inject
+  public BrokerServerViewOfLatestUsedSegments(
+      final BrokerServerView brokerServerView,
+      final MetadataSegmentView metadataSegmentView,
+      final BrokerSegmentMetadataCacheConfig cacheConfig,
+      final TierSelectorStrategy historicalTierSelectorStrategy,
+      @Named(BrokerServerView.REALTIME_SELECTOR) final TierSelectorStrategy 
realtimeTierSelectorStrategy,
+      final BrokerViewOfCoordinatorConfig brokerViewOfCoordinatorConfig
+  )
+  {
+    this.brokerServerView = brokerServerView;
+    this.isCacheEnabled = cacheConfig.isMetadataSegmentCacheEnable();
+    this.historicalTierSelectorStrategy = historicalTierSelectorStrategy;
+    this.realtimeTierSelectorStrategy = realtimeTierSelectorStrategy;
+    this.brokerViewOfCoordinatorConfig = brokerViewOfCoordinatorConfig;
+
+    brokerServerView.registerTimelineCallback(Execs.directExecutor(), new 
BsvCallback());
+    metadataSegmentView.registerSegmentViewCallback(Execs.directExecutor(), 
new MsvCallback());
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public <T extends TimelineLookup<String, ServerSelector>> Optional<T> 
getTimeline(final TableDataSource dataSource)
+  {
+    ensureCacheEnabled();
+
+    final VersionedIntervalTimeline<String, ServerSelector> mergedTimeline;
+    synchronized (lock) {
+      mergedTimeline = mergedTimelines.get(dataSource.getName());
+      if (mergedTimeline == null || mergedTimeline.isEmpty()) {
+        return Optional.empty();
+      }
+    }
+
+    return Optional.of((T) mergedTimeline);
+  }
+
+  @Override
+  public List<ImmutableDruidServer> getDruidServers()
+  {
+    ensureCacheEnabled();
+    return brokerServerView.getDruidServers();
+  }
+
+  @Override
+  public <T> QueryRunner<T> getQueryRunner(final DruidServer server)
+  {
+    ensureCacheEnabled();
+    return brokerServerView.getQueryRunner(server);
+  }
+
+  @Override
+  public void registerTimelineCallback(final Executor exec, final 
TimelineCallback callback)
+  {
+    ensureCacheEnabled();
+    timelineCallbacks.put(callback, exec);
+  }
+
+  @Override
+  public void registerServerCallback(final Executor exec, final ServerCallback 
callback)
+  {
+    ensureCacheEnabled();
+    brokerServerView.registerServerCallback(exec, callback);
+  }
+
+  @Override
+  public void registerSegmentCallback(final Executor exec, final 
SegmentCallback callback)
+  {
+    ensureCacheEnabled();
+    brokerServerView.registerSegmentCallback(exec, callback);
+  }
+
+  private class BsvCallback implements TimelineCallback
+  {
+    @Override
+    public ServerView.CallbackAction timelineInitialized()
+    {
+      brokerViewInitLatch.countDown();
+      maybeFireInitialized();
+      return ServerView.CallbackAction.CONTINUE;
+    }
+
+    @Override
+    public ServerView.CallbackAction segmentAdded(final DruidServerMetadata 
server, final DataSegment segment)
+    {
+      final ServerSelector selector = getBsvSelector(segment);
+      final boolean visible;
+
+      synchronized (lock) {
+        visible = selector != null && 
!metadataRemovedSegmentIds.contains(segment.getId());
+        if (visible) {
+          upsertMergedSelector(selector);
+        }
+      }
+
+      if (visible) {
+        runTimelineCallbacks(cb -> cb.segmentAdded(server, segment));
+      }
+      return ServerView.CallbackAction.CONTINUE;
+    }
+
+    @Override
+    public ServerView.CallbackAction segmentRemoved(final DataSegment segment)
+    {
+      final boolean shouldRemainVisibleAsUnavailable;
+      final boolean shouldFireRemoved;
+
+      synchronized (lock) {
+        final SegmentId segmentId = segment.getId();
+        shouldRemainVisibleAsUnavailable = 
metadataSegments.containsKey(segmentId)
+                                           && 
!metadataRemovedSegmentIds.contains(segmentId);
+        if (shouldRemainVisibleAsUnavailable) {
+          upsertMergedSelector(createEmptySelector(segment));
+        } else {
+          removeMergedSelector(segmentId);
+        }
+        shouldFireRemoved = !shouldRemainVisibleAsUnavailable && 
!metadataRemovedSegmentIds.contains(segmentId);
+      }
+
+      if (shouldFireRemoved) {
+        runTimelineCallbacks(cb -> cb.segmentRemoved(segment));
+      }
+      return ServerView.CallbackAction.CONTINUE;
+    }
+
+    @Override
+    public ServerView.CallbackAction serverSegmentRemoved(final 
DruidServerMetadata server, final DataSegment segment)
+    {
+      runTimelineCallbacks(cb -> cb.serverSegmentRemoved(server, segment));
+      return ServerView.CallbackAction.CONTINUE;
+    }
+
+    @Override
+    public ServerView.CallbackAction segmentSchemasAnnounced(final 
SegmentSchemas segmentSchemas)
+    {
+      runTimelineCallbacks(cb -> cb.segmentSchemasAnnounced(segmentSchemas));
+      return ServerView.CallbackAction.CONTINUE;
+    }
+  }
+
+  private class MsvCallback implements MetadataSegmentViewCallback
+  {
+    @Override
+    public void timelineInitialized()
+    {
+      metadataViewInitLatch.countDown();
+      maybeFireInitialized();
+    }
+
+    @Override
+    public void segmentsAdded(final Collection<DataSegment> segments)
+    {
+      // Pre-compute BSV selectors outside lock to respect lock ordering.
+      final Map<SegmentId, ServerSelector> bsvSelectors = new HashMap<>();
+      for (DataSegment segment : segments) {
+        bsvSelectors.put(segment.getId(), getBsvSelector(segment));
+      }
+
+      synchronized (lock) {
+        for (DataSegment segment : segments) {
+          final SegmentId segmentId = segment.getId();
+          final ServerSelector bsvSelector = bsvSelectors.get(segmentId);
+
+          metadataSegments.put(segmentId, segment);
+          metadataRemovedSegmentIds.remove(segmentId);
+          if (bsvSelector != null) {
+            if (mergedSelectors.get(segmentId) != bsvSelector) {
+              upsertMergedSelector(bsvSelector);
+            }
+          } else if (!mergedSelectors.containsKey(segmentId)) {
+            upsertMergedSelector(createEmptySelector(segment));
+          }
+        }
+      }
+    }
+
+    @Override
+    public void segmentsRemoved(final Collection<SegmentId> segmentIds)
+    {
+      final List<DataSegment> toFireRemoved = new ArrayList<>();
+
+      synchronized (lock) {
+        for (SegmentId segmentId : segmentIds) {
+          final DataSegment metadataSegment = 
metadataSegments.remove(segmentId);
+          metadataRemovedSegmentIds.add(segmentId);

Review Comment:
   What is this set being used for? It looks like it grows every time a segment 
is removed, and doesn't shrink unless a segment is added back after being 
removed. Over time, natural removal of segments from compaction, reindexing, 
etc will cause this to grow without bound. I wonder if we can solve the problem 
it is trying to solve some other way.



##########
sql/src/main/java/org/apache/druid/sql/calcite/schema/MetadataSegmentView.java:
##########
@@ -63,6 +72,8 @@
 public class MetadataSegmentView
 {
   private static final EmittingLogger log = new 
EmittingLogger(MetadataSegmentView.class);
+  private static final String NEW_SEGMENTS_DETECTED_METRIC_NAME = 
"metadataSegmentView/segments/added";

Review Comment:
   Elsewhere in this file, `Metric.SYNC_DURATION_MILLIS` is used for the sync 
time. The class here parallels the metadata sync on the Coordinator so it uses 
the same metric names when an equivalent is available. For these two I think 
the equivalents are `Metric.UPDATED_USED_SEGMENTS` and 
`Metric.DELETED_SEGMENTS`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to