capistrant commented on code in PR #18136: URL: https://github.com/apache/druid/pull/18136#discussion_r2178186767
########## server/src/main/java/org/apache/druid/client/selector/PreferredTierSelectorStrategy.java: ########## @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.client.selector; + + +import com.fasterxml.jackson.annotation.JacksonInject; +import it.unimi.dsi.fastutil.ints.Int2ObjectRBTreeMap; +import org.apache.druid.client.QueryableDruidServer; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.query.Query; +import org.apache.druid.timeline.DataSegment; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Set; + +public class PreferredTierSelectorStrategy extends AbstractTierSelectorStrategy +{ + private static final Logger log = new Logger(PreferredTierSelectorStrategy.class); + + private final String preferredTier; + private final TierSelectorStrategy priorityStrategy; + + public PreferredTierSelectorStrategy( + @JacksonInject ServerSelectorStrategy serverSelectorStrategy, + @JacksonInject PreferredTierSelectorStrategyConfig config + ) + { + super(serverSelectorStrategy); + this.preferredTier = config.getTier(); + + if (config.getPriority() == null) { + this.priorityStrategy = new HighestPriorityTierSelectorStrategy(serverSelectorStrategy); + } else { + if ("highest".equalsIgnoreCase(config.getPriority())) { + this.priorityStrategy = new HighestPriorityTierSelectorStrategy(serverSelectorStrategy); + } else if ("lowest".equalsIgnoreCase(config.getPriority())) { + this.priorityStrategy = new LowestPriorityTierSelectorStrategy(serverSelectorStrategy); + } else { + throw new IAE("druid.broker.select.tier.preferred.priority must be either 'highest' or 'lowest'"); + } + } + } + + @Override + public Comparator<Integer> getComparator() + { + return priorityStrategy.getComparator(); + } + + @Override + public <T> List<QueryableDruidServer> pick( + Query<T> query, + Int2ObjectRBTreeMap<Set<QueryableDruidServer>> prioritizedServers, + DataSegment segment, + int numServersToPick + ) + { + if (log.isDebugEnabled()) { + log.debug( + "Picking [%d] servers from preferred tier [%s] for segment [%s] with priority [%s]", + numServersToPick, preferredTier, segment.getId(), this.priorityStrategy.getClass().getSimpleName() + ); + } + + List<QueryableDruidServer> preferred = new ArrayList<>(numServersToPick); + List<QueryableDruidServer> nonPreferred = new ArrayList<>(numServersToPick); + for (Set<QueryableDruidServer> priorityServers : prioritizedServers.values()) { + for (QueryableDruidServer server : priorityServers) { + if (preferredTier.equals(server.getServer().getMetadata().getTier())) { + preferred.add(server); + if (preferred.size() == numServersToPick) { + return this.serverSelectorStrategy.pick(query, preferred, segment, numServersToPick); + } + } else { + // We have to iterate through all servers even the numbers of the non-preferred servers reach the limit + // This is because we don't know whether there're preferred servers left in the next priority set + nonPreferred.add(server); + } + } + } + + // Fill with non-preferred servers if we don't have enough preferred servers + int fillSize = numServersToPick - preferred.size(); + for (int i = 0; i < fillSize && i < nonPreferred.size(); i++) { + preferred.add(nonPreferred.get(i)); + } + + return this.serverSelectorStrategy.pick(query, preferred, segment, numServersToPick); + } Review Comment: Upon further review, I don't think that this is working as you designed. When I check out your changes on my local and run tests in loop, I get lots of failures. I think the best approach here is to split all of the incoming servers into two Int2ObjectMaps that are sorted by priority. One of them contains all of the preferred servers. The other, the non-preferred servers. You then delegate to the underlying priority selector to do the actual pick for you from the preferred first, and then the non-preferred second (if necessary). I have a potential implementation below in suggestion. side effect of this change would be to revert the ServerSelectorStrategy back to using Set instead of less restrictive Collection, since we no longer call it with lists when delegating pick to the priority strategy. ```suggestion @Override public <T> List<QueryableDruidServer> pick( Query<T> query, Int2ObjectRBTreeMap<Set<QueryableDruidServer>> prioritizedServers, DataSegment segment, int numServersToPick ) { if (log.isDebugEnabled()) { log.debug( "Picking [%d] servers from preferred tier [%s] for segment [%s] with priority [%s]", numServersToPick, preferredTier, segment.getId(), this.priorityStrategy.getClass().getSimpleName() ); } int numPreferredServers = 0; Int2ObjectRBTreeMap<Set<QueryableDruidServer>> preferred = new Int2ObjectRBTreeMap<>(priorityStrategy.getComparator()); Int2ObjectRBTreeMap<Set<QueryableDruidServer>> nonPreferred = new Int2ObjectRBTreeMap<>(priorityStrategy.getComparator()); for (Set<QueryableDruidServer> priorityServers : prioritizedServers.values()) { if (numPreferredServers == numServersToPick) { // We have enough preferred servers to satisfy the request, no need to check further break; } for (QueryableDruidServer server : priorityServers) { if (preferredTier.equals(server.getServer().getMetadata().getTier())) { numPreferredServers++; preferred.computeIfAbsent( server.getServer().getPriority(), p -> new HashSet<>() ); preferred.get(server.getServer().getPriority()).add(server); if (numPreferredServers == numServersToPick) { // We have enough preferred servers to satisfy the request, no need to check further break; } } else { // We have to iterate through all servers even the numbers of the non-preferred servers reach the limit // This is because we don't know whether there are preferred servers left in the next priority set nonPreferred.computeIfAbsent( server.getServer().getPriority(), p -> new HashSet<>() ); nonPreferred.get(server.getServer().getPriority()).add(server); } } } List<QueryableDruidServer> picks = new ArrayList<>(numServersToPick); if (numPreferredServers > 0) { // If we have preferred servers, pick them first picks.addAll(priorityStrategy.pick(query, preferred, segment, numServersToPick)); } if (picks.size() < numServersToPick) { // If we don't have enough preferred servers, pick from the non-preferred ones int remaining = numServersToPick - picks.size(); picks.addAll(priorityStrategy.pick(query, nonPreferred, segment, remaining)); } return picks; } ``` ########## server/src/test/java/org/apache/druid/client/selector/TierSelectorStrategyTest.java: ########## @@ -260,17 +261,223 @@ public void testServerSelectorStrategyDefaults() servers.add(p0); RandomServerSelectorStrategy strategy = new RandomServerSelectorStrategy(); Assert.assertEquals(strategy.pick(servers, EasyMock.createMock(DataSegment.class)), p0); - Assert.assertEquals(strategy.pick(EasyMock.createMock(Query.class), servers, EasyMock.createMock(DataSegment.class)), p0); - ServerSelectorStrategy defaultDeprecatedServerSelectorStrategy = new ServerSelectorStrategy() { + Assert.assertEquals( + strategy.pick( + EasyMock.createMock(Query.class), + servers, + EasyMock.createMock(DataSegment.class) + ), p0 + ); + ServerSelectorStrategy defaultDeprecatedServerSelectorStrategy = new ServerSelectorStrategy() + { @Override - public <T> List<QueryableDruidServer> pick(@Nullable Query<T> query, Set<QueryableDruidServer> servers, DataSegment segment, - int numServersToPick) + public <T> List<QueryableDruidServer> pick( + @Nullable Query<T> query, Collection<QueryableDruidServer> servers, DataSegment segment, + int numServersToPick + ) { return strategy.pick(servers, segment, numServersToPick); } }; - Assert.assertEquals(defaultDeprecatedServerSelectorStrategy.pick(servers, EasyMock.createMock(DataSegment.class)), p0); - Assert.assertEquals(defaultDeprecatedServerSelectorStrategy.pick(servers, EasyMock.createMock(DataSegment.class), 1).get(0), p0); + Assert.assertEquals( + defaultDeprecatedServerSelectorStrategy.pick(servers, EasyMock.createMock(DataSegment.class)), + p0 + ); + Assert.assertEquals( + defaultDeprecatedServerSelectorStrategy.pick(servers, EasyMock.createMock(DataSegment.class), 1) + .get(0), p0 + ); + } + + /** + * Tests the PreferredTierSelectorStrategy with various configurations and expected selections. + * It verifies + * 1. The preferred tier is respected when picking a server. + * 2. When getting all servers, the preferred tier is ignored, and the returned list is sorted by priority. + * 3. When getting a limited number of candidates, it returns the top N servers with the preferred tier first. + */ + private void testPreferredTierSelectorStrategy( + PreferredTierSelectorStrategy tierSelectorStrategy, + QueryableDruidServer... expectedSelection + ) + { + final ServerSelector serverSelector = new ServerSelector( + new DataSegment( + "test", + Intervals.of("2013-01-01/2013-01-02"), + DateTimes.of("2013-01-01").toString(), + new HashMap<>(), + new ArrayList<>(), + new ArrayList<>(), + NoneShardSpec.instance(), + 0, + 0L + ), + tierSelectorStrategy, + HistoricalFilter.IDENTITY_FILTER + ); + + List<QueryableDruidServer> servers = new ArrayList<>(Arrays.asList(expectedSelection)); + + List<DruidServerMetadata> expectedCandidates = new ArrayList<>(); + for (QueryableDruidServer server : servers) { + expectedCandidates.add(server.getServer().getMetadata()); + } + Collections.shuffle(servers); + for (QueryableDruidServer server : servers) { + serverSelector.addServerAndUpdateSegment(server, serverSelector.getSegment()); + } + + // Verify that the preferred tier is respected when picking a server + Assert.assertEquals(expectedSelection[0], serverSelector.pick(null, CloneQueryMode.EXCLUDECLONES)); + Assert.assertEquals(expectedSelection[0], serverSelector.pick(EasyMock.createMock(Query.class), CloneQueryMode.EXCLUDECLONES)); + + // Verify that when getting all severs, the preferred tier is ignored, the returned list is sorted by priority + List<DruidServerMetadata> allServers = new ArrayList<>(expectedCandidates); + allServers.sort((o1, o2) -> tierSelectorStrategy.getComparator().compare(o1.getPriority(), o2.getPriority())); + Assert.assertEquals(allServers, serverSelector.getCandidates(-1, CloneQueryMode.EXCLUDECLONES)); + + // Verify that when getting a limited number of candidates, returns the top N servers with preferred tier first + Assert.assertEquals(expectedCandidates.subList(0, 2), serverSelector.getCandidates(2, CloneQueryMode.EXCLUDECLONES)); + } + + @Test + public void testPreferredTierSelectorStrategyHighestPriority() + { + DirectDruidClient client = EasyMock.createMock(DirectDruidClient.class); + QueryableDruidServer preferredTierLowPriority = new QueryableDruidServer( + new DruidServer("test1", "localhost", null, 0, ServerType.HISTORICAL, "preferred", 0), + client + ); + QueryableDruidServer preferredTierHighPriority = new QueryableDruidServer( + new DruidServer("test2", "localhost", null, 0, ServerType.HISTORICAL, "preferred", 1), + client + ); + QueryableDruidServer nonPreferredTierHighestPriority = new QueryableDruidServer( + new DruidServer("test3", "localhost", null, 0, ServerType.HISTORICAL, "non-preferred", 2), + client + ); + + testPreferredTierSelectorStrategy( + new PreferredTierSelectorStrategy( + new ConnectionCountServerSelectorStrategy(), + new PreferredTierSelectorStrategyConfig("preferred", "highest") + ), + preferredTierHighPriority, preferredTierLowPriority, nonPreferredTierHighestPriority + ); + } + + @Test + public void testPreferredTierSelectorStrategyLowestPriority() + { + DirectDruidClient client = EasyMock.createMock(DirectDruidClient.class); + QueryableDruidServer preferredTierLowPriority = new QueryableDruidServer( + new DruidServer("test1", "localhost", null, 0, ServerType.HISTORICAL, "preferred", 0), + client + ); + QueryableDruidServer preferredTierHighPriority = new QueryableDruidServer( + new DruidServer("test2", "localhost", null, 0, ServerType.HISTORICAL, "preferred", 1), + client + ); + QueryableDruidServer nonPreferredTierLowestPriority = new QueryableDruidServer( + new DruidServer("test3", "localhost", null, 0, ServerType.HISTORICAL, "non-preferred", -1), + client + ); + + testPreferredTierSelectorStrategy( + new PreferredTierSelectorStrategy( + new ConnectionCountServerSelectorStrategy(), + new PreferredTierSelectorStrategyConfig("preferred", "lowest") + ), + preferredTierLowPriority, preferredTierHighPriority, nonPreferredTierLowestPriority + ); + } + + @Test + public void testPreferredTierSelectorStrategyWithFallback() + { + DirectDruidClient client = EasyMock.createMock(DirectDruidClient.class); + // Create only non-preferred tier servers with different priorities + QueryableDruidServer nonPreferredTierLowPriority = new QueryableDruidServer( + new DruidServer("test1", "localhost", null, 0, ServerType.HISTORICAL, "non-preferred", 0), + client + ); + QueryableDruidServer nonPreferredTierMediumPriority = new QueryableDruidServer( + new DruidServer("test2", "localhost", null, 0, ServerType.HISTORICAL, "non-preferred", 1), + client + ); + QueryableDruidServer nonPreferredTierHighPriority = new QueryableDruidServer( + new DruidServer("test3", "localhost", null, 0, ServerType.HISTORICAL, "non-preferred", 2), + client + ); + + // Since no preferred tier servers are available, it should fall back to other servers + // based on highest priority + testPreferredTierSelectorStrategy( + new PreferredTierSelectorStrategy( + new ConnectionCountServerSelectorStrategy(), + new PreferredTierSelectorStrategyConfig("preferred", "highest") + ), + nonPreferredTierHighPriority, nonPreferredTierMediumPriority, nonPreferredTierLowPriority + ); + } + + @Test + public void testPreferredTierSelectorStrategyMixedServers() + { + DirectDruidClient client = EasyMock.createMock(DirectDruidClient.class); + QueryableDruidServer preferredTierLowPriority = new QueryableDruidServer( + new DruidServer("test1", "localhost", null, 0, ServerType.HISTORICAL, "preferred", 0), + client + ); + QueryableDruidServer preferredTierHighPriority = new QueryableDruidServer( + new DruidServer("test2", "localhost", null, 0, ServerType.HISTORICAL, "preferred", 1), + client + ); + QueryableDruidServer anotherTierHighPriority = new QueryableDruidServer( + new DruidServer("test3", "localhost", null, 0, ServerType.HISTORICAL, "tier1", 2), + client + ); + QueryableDruidServer yetAnotherTierMediumPriority = new QueryableDruidServer( Review Comment: does this introduce randomness to the tests adding a second server of equal priority? That is at least what it seemed like on my local. I got intermittent test failures for this test because the two servers with priority of 1 were flip flopping in actual return order. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
