This is an automated email from the ASF dual-hosted git repository. dsmiley pushed a commit to branch branch_9x in repository https://gitbox.apache.org/repos/asf/solr.git
commit 4098d6ae3e298efdfa219d5a2629d69f60c5aff9 Author: pjmcarthur <[email protected]> AuthorDate: Thu Mar 14 12:49:48 2024 -0700 SOLR-16403: ClusterSingleton to remove inactive Shards (#1926) ClusterSingleton that periodically removes state=INACTIVE shards. These occur from shard splits. Co-authored-by: Paul McArthur <[email protected]> (cherry picked from commit ca58f1aa90b351b69b0ec4184adbaaca03978573) --- solr/CHANGES.txt | 2 + .../cluster/maintenance/InactiveShardRemover.java | 219 ++++++++++++++++++++ .../maintenance/InactiveShardRemoverConfig.java | 60 ++++++ .../solr/cluster/maintenance/package-info.java | 19 ++ .../org/apache/solr/cloud/DeleteShardTest.java | 52 +---- .../maintenance/InactiveShardRemoverTest.java | 225 +++++++++++++++++++++ .../pages/cluster-singleton-plugins.adoc | 93 +++++++++ .../java/org/apache/solr/cloud/ShardTestUtil.java | 57 ++++++ 8 files changed, 686 insertions(+), 41 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 0a034818332..0cbbc4ea70e 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -11,6 +11,8 @@ New Features * SOLR-599: Add a new SolrJ client using the JDK’s built-in Http Client. (James Dyer) +* SOLR-16403: A new cluster singleton plugin to automatically remove inactive shards. (Paul McArthur, David Smiley) + Improvements --------------------- * SOLR-17119: When registering or updating a ConfigurablePlugin through the `/cluster/plugin` API, diff --git a/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemover.java b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemover.java new file mode 100644 index 00000000000..177663d1140 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemover.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cluster.maintenance; + +import com.google.common.annotations.VisibleForTesting; +import java.lang.invoke.MethodHandles; +import java.util.Collection; +import java.util.HashSet; +import java.util.Objects; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import org.apache.solr.api.ConfigurablePlugin; +import org.apache.solr.client.solrj.SolrResponse; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.cloud.ClusterSingleton; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.util.ExecutorUtil; +import org.apache.solr.common.util.SolrNamedThreadFactory; +import org.apache.solr.core.CoreContainer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This Cluster Singleton can be configured to periodically find and remove {@link + * org.apache.solr.common.cloud.Slice.State#INACTIVE} Shards that are left behind after a Shard is + * split + */ +public class InactiveShardRemover + implements ClusterSingleton, ConfigurablePlugin<InactiveShardRemoverConfig> { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + public static final String PLUGIN_NAME = ".inactive-shard-remover"; + + static class DeleteActor { + + private final CoreContainer coreContainer; + + DeleteActor(final CoreContainer coreContainer) { + this.coreContainer = coreContainer; + } + + void delete(final Slice slice) { + CollectionAdminRequest.DeleteShard deleteRequest = + CollectionAdminRequest.deleteShard(slice.getCollection(), slice.getName()); + try { + SolrResponse response = + coreContainer.getZkController().getSolrCloudManager().request(deleteRequest); + if (response.getException() != null) { + throw response.getException(); + } + } catch (Exception e) { + log.warn("An exception occurred when deleting an inactive shard", e); + } + } + } + + private State state = State.STOPPED; + + private final CoreContainer coreContainer; + + private final DeleteActor deleteActor; + + private ScheduledExecutorService executor; + + private long scheduleIntervalSeconds; + + private long ttlSeconds; + + private int maxDeletesPerCycle; + + /** Constructor invoked via Reflection */ + public InactiveShardRemover(final CoreContainer cc) { + this(cc, new DeleteActor(cc)); + } + + public InactiveShardRemover(final CoreContainer cc, final DeleteActor deleteActor) { + this.coreContainer = cc; + this.deleteActor = deleteActor; + } + + @Override + public void configure(final InactiveShardRemoverConfig cfg) { + Objects.requireNonNull(cfg, "config must be specified"); + cfg.validate(); + this.scheduleIntervalSeconds = cfg.scheduleIntervalSeconds; + this.maxDeletesPerCycle = cfg.maxDeletesPerCycle; + this.ttlSeconds = cfg.ttlSeconds; + } + + @Override + public String getName() { + return PLUGIN_NAME; + } + + @Override + public State getState() { + return state; + } + + @Override + public void start() throws Exception { + state = State.STARTING; + executor = Executors.newScheduledThreadPool(1, new SolrNamedThreadFactory(PLUGIN_NAME)); + executor.scheduleAtFixedRate( + this::deleteInactiveSlices, + scheduleIntervalSeconds, + scheduleIntervalSeconds, + TimeUnit.SECONDS); + state = State.RUNNING; + } + + @Override + public void stop() { + if (state == State.RUNNING) { + state = State.STOPPING; + ExecutorUtil.shutdownNowAndAwaitTermination(executor); + } + state = State.STOPPED; + } + + @VisibleForTesting + void deleteInactiveSlices() { + final ClusterState clusterState = coreContainer.getZkController().getClusterState(); + Collection<Slice> inactiveSlices = + clusterState.getCollectionsMap().values().stream() + .flatMap(v -> collectInactiveSlices(v).stream()) + .collect(Collectors.toSet()); + + if (log.isInfoEnabled()) { + log.info( + "Found {} inactive Shards to delete, {} will be deleted", + inactiveSlices.size(), + Math.min(inactiveSlices.size(), maxDeletesPerCycle)); + } + + inactiveSlices.stream().limit(maxDeletesPerCycle).forEach(this::deleteShard); + } + + private Collection<Slice> collectInactiveSlices(final DocCollection docCollection) { + final Collection<Slice> slices = new HashSet<>(docCollection.getSlices()); + slices.removeAll(docCollection.getActiveSlices()); + return slices.stream().filter(this::isExpired).collect(Collectors.toSet()); + } + + private void deleteShard(final Slice s) { + deleteActor.delete(s); + } + + /** + * An Inactive Shard is expired if it has not undergone a state change in the period of time + * defined by {@link InactiveShardRemover#ttlSeconds}. If it is expired, it is eligible for + * removal. + */ + private boolean isExpired(final Slice slice) { + + final String collectionName = slice.getCollection(); + final String sliceName = slice.getName(); + + if (slice.getState() != Slice.State.INACTIVE) { + return false; + } + + final String lastChangeTimestamp = slice.getStr(ZkStateReader.STATE_TIMESTAMP_PROP); + if (lastChangeTimestamp == null || lastChangeTimestamp.isEmpty()) { + log.warn( + "Collection {} Shard {} has no last change timestamp and will not be deleted", + collectionName, + sliceName); + return false; + } + + final long epochTimestampNs; + try { + epochTimestampNs = Long.parseLong(lastChangeTimestamp); + } catch (NumberFormatException e) { + log.warn( + "Collection {} Shard {} has an invalid last change timestamp and will not be deleted", + collectionName, + sliceName); + return false; + } + + long currentEpochTimeNs = + coreContainer.getZkController().getSolrCloudManager().getTimeSource().getEpochTimeNs(); + long delta = TimeUnit.NANOSECONDS.toSeconds(currentEpochTimeNs - epochTimestampNs); + + boolean expired = delta >= ttlSeconds; + if (log.isDebugEnabled()) { + log.debug( + "collection {} shard {} last state change {} seconds ago. Expired={}", + slice.getCollection(), + slice.getName(), + delta, + expired); + } + return expired; + } +} diff --git a/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemoverConfig.java b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemoverConfig.java new file mode 100644 index 00000000000..22465e82359 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemoverConfig.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cluster.maintenance; + +import org.apache.solr.common.SolrException; +import org.apache.solr.common.annotation.JsonProperty; +import org.apache.solr.common.util.ReflectMapWriter; + +public class InactiveShardRemoverConfig implements ReflectMapWriter { + + public static final long DEFAULT_SCHEDULE_INTERVAL_SECONDS = 900L; // 15 minutes + + public static final long DEFAULT_TTL_SECONDS = 900L; // 15 minutes + + public static final int DEFAULT_MAX_DELETES_PER_CYCLE = 20; + + @JsonProperty public long scheduleIntervalSeconds; + + @JsonProperty public long ttlSeconds; + + @JsonProperty public int maxDeletesPerCycle; + + /** Default constructor required for deserialization */ + public InactiveShardRemoverConfig() { + this(DEFAULT_SCHEDULE_INTERVAL_SECONDS, DEFAULT_TTL_SECONDS, DEFAULT_MAX_DELETES_PER_CYCLE); + } + + public InactiveShardRemoverConfig( + final long scheduleIntervalSeconds, final long ttlSeconds, final int maxDeletesPerCycle) { + this.scheduleIntervalSeconds = scheduleIntervalSeconds; + this.ttlSeconds = ttlSeconds; + this.maxDeletesPerCycle = maxDeletesPerCycle; + } + + public void validate() { + if (scheduleIntervalSeconds <= 0) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, "scheduleIntervalSeconds must be greater than 0"); + } + if (maxDeletesPerCycle <= 0) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, "maxDeletesPerCycle must be greater than 0"); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/cluster/maintenance/package-info.java b/solr/core/src/java/org/apache/solr/cluster/maintenance/package-info.java new file mode 100644 index 00000000000..3001cb775c3 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/cluster/maintenance/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Cluster Singleton plugins that are used to perform maintenance tasks within the cluster. */ +package org.apache.solr.cluster.maintenance; diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java index 63bd18ea46d..91ab353be1b 100644 --- a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java @@ -18,17 +18,12 @@ package org.apache.solr.cloud; import java.io.IOException; import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.cloud.DistributedQueue; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.CoreStatus; -import org.apache.solr.cloud.overseer.OverseerAction; -import org.apache.solr.common.MapWriter; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.Slice.State; -import org.apache.solr.common.cloud.ZkNodeProps; -import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.util.FileUtils; import org.junit.After; import org.junit.Before; @@ -68,7 +63,7 @@ public class DeleteShardTest extends SolrCloudTestCase { CollectionAdminRequest.deleteShard(collection, "shard1") .process(cluster.getSolrClient())); - setSliceState(collection, "shard1", Slice.State.INACTIVE); + ShardTestUtil.setSliceState(cluster, collection, "shard1", Slice.State.INACTIVE); // Can delete an INACTIVE shard CollectionAdminRequest.deleteShard(collection, "shard1").process(cluster.getSolrClient()); @@ -76,46 +71,12 @@ public class DeleteShardTest extends SolrCloudTestCase { "Expected 'shard1' to be removed", collection, (n, c) -> c.getSlice("shard1") == null); // Can delete a shard under construction - setSliceState(collection, "shard2", Slice.State.CONSTRUCTION); + ShardTestUtil.setSliceState(cluster, collection, "shard2", Slice.State.CONSTRUCTION); CollectionAdminRequest.deleteShard(collection, "shard2").process(cluster.getSolrClient()); waitForState( "Expected 'shard2' to be removed", collection, (n, c) -> c.getSlice("shard2") == null); } - protected void setSliceState(String collection, String slice, State state) throws Exception { - - // TODO can this be encapsulated better somewhere? - MapWriter m = - ew -> - ew.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower()) - .put(slice, state.toString()) - .put(ZkStateReader.COLLECTION_PROP, collection); - final Overseer overseer = cluster.getOpenOverseer(); - if (overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate()) { - overseer - .getDistributedClusterStateUpdater() - .doSingleStateUpdate( - DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState, - new ZkNodeProps(m), - cluster.getOpenOverseer().getSolrCloudManager(), - cluster.getOpenOverseer().getZkStateReader()); - } else { - DistributedQueue inQueue = - cluster - .getJettySolrRunner(0) - .getCoreContainer() - .getZkController() - .getOverseer() - .getStateUpdateQueue(); - inQueue.offer(m); - } - - waitForState( - "Expected shard " + slice + " to be in state " + state, - collection, - (n, c) -> c.getSlice(slice).getState() == state); - } - @Test public void testDirectoryCleanupAfterDeleteShard() throws IOException, SolrServerException { @@ -162,4 +123,13 @@ public class DeleteShardTest extends SolrCloudTestCase { "Instance directory still exists", FileUtils.fileExists(coreStatus.getInstanceDirectory())); assertTrue("Data directory still exists", FileUtils.fileExists(coreStatus.getDataDirectory())); } + + private void setSliceState(String collectionName, String shardId, Slice.State state) + throws Exception { + ShardTestUtil.setSliceState(cluster, collectionName, shardId, state); + waitForState( + "Expected shard " + shardId + " to be in state " + state, + collectionName, + (n, c) -> c.getSlice(shardId).getState() == state); + } } diff --git a/solr/core/src/test/org/apache/solr/cluster/maintenance/InactiveShardRemoverTest.java b/solr/core/src/test/org/apache/solr/cluster/maintenance/InactiveShardRemoverTest.java new file mode 100644 index 00000000000..fb8f2b95715 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cluster/maintenance/InactiveShardRemoverTest.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cluster.maintenance; + +import static org.apache.solr.client.solrj.SolrRequest.METHOD.POST; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.V2Request; +import org.apache.solr.client.solrj.request.beans.PluginMeta; +import org.apache.solr.client.solrj.response.V2Response; +import org.apache.solr.cloud.ShardTestUtil; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.util.TimeSource; +import org.apache.solr.core.CoreContainer; +import org.junit.BeforeClass; +import org.junit.Test; + +public class InactiveShardRemoverTest extends SolrCloudTestCase { + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(1) + .addConfig( + "conf", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf")) + .configure(); + } + + @Test + public void testDeleteInactiveShard() throws Exception { + + addPlugin(new InactiveShardRemoverConfig(1, 0, 2)); + try { + final String collectionName = "testDeleteInactiveShard"; + createCollection(collectionName, 1); + + final String sliceName = + new ArrayList<>(getCollectionState(collectionName).getSlices()).get(0).getName(); + ShardTestUtil.setSliceState(cluster, collectionName, sliceName, Slice.State.INACTIVE); + + waitForState( + "Waiting for inactive shard to be deleted", + collectionName, + clusterShape(0, 0), + 5, + TimeUnit.SECONDS); + } finally { + removePlugin(); + } + } + + @Test + public void testTtl() throws Exception { + + final int ttlSeconds = 1 + random().nextInt(5); + final TimeSource timeSource = cluster.getOpenOverseer().getSolrCloudManager().getTimeSource(); + + addPlugin(new InactiveShardRemoverConfig(1, ttlSeconds, 1)); + try { + final String collectionName = "testTtl"; + createCollection(collectionName, 1); + + final String sliceName = + new ArrayList<>(getCollectionState(collectionName).getSlices()).get(0).getName(); + ShardTestUtil.setSliceState(cluster, collectionName, sliceName, Slice.State.INACTIVE); + waitForState( + "Expected shard " + sliceName + " to be in state " + Slice.State.INACTIVE, + collectionName, + (n, c) -> c.getSlice(sliceName).getState() == Slice.State.INACTIVE); + + final long ttlStart = timeSource.getTimeNs(); + + waitForState( + "Waiting for InactiveShardRemover to delete inactive shard", + collectionName, + clusterShape(0, 0), + ttlSeconds + 5, + TimeUnit.SECONDS); + + final long ttlEnd = timeSource.getTimeNs(); + final long ttlPeriodSeconds = TimeUnit.NANOSECONDS.toSeconds(ttlEnd - ttlStart); + + assertTrue(ttlPeriodSeconds >= ttlSeconds); + } finally { + removePlugin(); + } + } + + public void testMaxShardsToDeletePerCycle() throws Exception { + + final CoreContainer cc = cluster.getOpenOverseer().getCoreContainer(); + + final int maxDeletesPerCycle = 5; + final InactiveShardRemover remover = new InactiveShardRemover(cc); + remover.configure(new InactiveShardRemoverConfig(1, 0, maxDeletesPerCycle)); + + // Remove across multiple collections + final String collection1 = "testMaxShardsToDeletePerCycle-1"; + final String collection2 = "testMaxShardsToDeletePerCycle-2"; + final int shardsPerCollection = 10; + final int totalShards = 2 * shardsPerCollection; + + createCollection(collection1, shardsPerCollection); + createCollection(collection2, shardsPerCollection); + + setAllShardsInactive(collection1); + setAllShardsInactive(collection2); + + int cycle = 0; + int shardsDeleted = 0; + while (shardsDeleted < totalShards) { + cycle++; + remover.deleteInactiveSlices(); + DocCollection coll1 = getCollectionState(collection1); + DocCollection coll2 = getCollectionState(collection2); + + int remainingShards = coll1.getSlices().size() + coll2.getSlices().size(); + if (remainingShards != totalShards - maxDeletesPerCycle * cycle) { + System.out.println(coll1); + System.out.println(coll2); + } + assertEquals(totalShards - maxDeletesPerCycle * cycle, remainingShards); + shardsDeleted = totalShards - remainingShards; + } + } + + @Test + public void testConfigValidation() { + + try { + new InactiveShardRemoverConfig(0, 0, 1).validate(); + fail("Expected validation error for scheduleIntervalSeconds=0"); + } catch (SolrException e) { + assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code()); + } + + try { + new InactiveShardRemoverConfig(1, 0, 0).validate(); + fail("Expected validation error for maxDeletesPerCycle=0"); + } catch (SolrException e) { + assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code()); + } + } + + private static void addPlugin(final InactiveShardRemoverConfig config) + throws SolrServerException, IOException { + PluginMeta plugin = pluginMeta(config); + pluginRequest(Collections.singletonMap("add", plugin)); + } + + private static void removePlugin() throws SolrServerException, IOException { + pluginRequest(Collections.singletonMap("remove", InactiveShardRemover.PLUGIN_NAME)); + } + + private static void pluginRequest(Map<String, Object> payload) + throws SolrServerException, IOException { + V2Request req = + new V2Request.Builder("/cluster/plugin").withMethod(POST).withPayload(payload).build(); + V2Response rsp = req.process(cluster.getSolrClient()); + assertEquals(0, rsp.getStatus()); + } + + private static PluginMeta pluginMeta(final InactiveShardRemoverConfig config) { + PluginMeta plugin = pluginMeta(); + plugin.config = config; + return plugin; + } + + private static PluginMeta pluginMeta() { + PluginMeta plugin = new PluginMeta(); + plugin.klass = InactiveShardRemover.class.getName(); + plugin.name = InactiveShardRemover.PLUGIN_NAME; + return plugin; + } + + private void createCollection(final String collectionName, final int numShards) + throws SolrServerException, IOException { + CollectionAdminRequest.createCollection(collectionName, "conf", numShards, 1) + .process(cluster.getSolrClient()); + + cluster.waitForActiveCollection(collectionName, numShards, numShards); + } + + private void setAllShardsInactive(final String collectionName) { + DocCollection collection = getCollectionState(collectionName); + collection.getSlices().stream() + .filter(s -> s.getState() != Slice.State.INACTIVE) + .forEach( + s -> { + try { + ShardTestUtil.setSliceState( + cluster, s.getCollection(), s.getName(), Slice.State.INACTIVE); + waitForState( + "Expected shard " + s + " to be in state " + Slice.State.INACTIVE, + collection.getName(), + (n, c) -> c.getSlice(s.getName()).getState() == Slice.State.INACTIVE); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } +} diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/cluster-singleton-plugins.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/cluster-singleton-plugins.adoc new file mode 100644 index 00000000000..5afdcb0d18a --- /dev/null +++ b/solr/solr-ref-guide/modules/configuration-guide/pages/cluster-singleton-plugins.adoc @@ -0,0 +1,93 @@ += Cluster Singleton Plugins +:toclevels: 3 +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +The Solr distribution includes some Cluster Singleton plugins. +Additional plugins can be added - they have to implement the ClusterSingleton interface. +The configuration entry may also contain a config element if the plugin implements the ConfigurablePlugin interface. + +== Plugin Configuration +Cluster Singleton plugins can be configured in two ways, either by using the xref:cluster-plugins.adoc[cluster plugins API] or by declaring them in xref:configuring-solr-xml.adoc[solr.xml]. + +All cluster plugins must be declared using the same method. It is not possible to declare some plugins in solr.xml and use the cluster plugins API to manage other plugins. + +== Cluster Singleton Plugins Included with Solr +Solr includes the following plugins out-of-the-box. + +=== Inactive Shard Remover +This plugin will periodically find and delete shards that have an INACTIVE shard state. +Shards become INACTIVE when they are split, and the documents they contain are now managed by two or more sub-shards. + +Configuration using the cluster plugin API +[source,bash] +---- +curl -X POST -H 'Content-type: application/json' -d '{ + "add":{ + "name": ".inactive-shard-remover", + "class": "org.apache.solr.cluster.maintenance.InactiveShardRemover", + "config": { + "scheduleIntervalSeconds": 3600, + "ttlSeconds": 1800, + "maxDeletesPerCycle": 20 + } + }}' + http://localhost:8983/api/cluster/plugin +---- + +Configuration in solr.xml +[source,xml] +---- +<clusterSingleton name=".inactive-shard-remover" class="org.apache.solr.cluster.maintenance.InactiveShardRemover"> + <long name="scheduleIntervalSeconds">3600</long> + <long name="ttlSeconds">1800</long> + <int name="maxDeletesPerCycle">20</int> +</clusterSingleton> +---- + +NOTE: The Inactive Shard Remover plugin configuration MUST use the predefined name `.inactive-shard-remover`. +There can be only one (or none) of these configurations defined. + +==== Configuration + +`scheduleIntervalSeconds`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `900` Seconds +|=== ++ +This value determines how often the inactive shard remover will run + +`ttlSeconds`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `900` Seconds +|=== ++ +This value defines the minimum period of time that a Shard must be INACTIVE before it is considered for deletion. + +`maxDeletesPerCycle`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `20` +|=== ++ +This is the maximum number of shards that will be deleted each time the inactive shard remover runs. +If there are more Shards that could be deleted, they will be considered during the next cycle. diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/ShardTestUtil.java b/solr/test-framework/src/java/org/apache/solr/cloud/ShardTestUtil.java new file mode 100644 index 00000000000..4faf9708098 --- /dev/null +++ b/solr/test-framework/src/java/org/apache/solr/cloud/ShardTestUtil.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud; + +import org.apache.solr.client.solrj.cloud.DistributedQueue; +import org.apache.solr.cloud.overseer.OverseerAction; +import org.apache.solr.common.MapWriter; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.cloud.ZkStateReader; + +public class ShardTestUtil { + + public static void setSliceState( + MiniSolrCloudCluster cluster, String collection, String slice, Slice.State state) + throws Exception { + + MapWriter m = + ew -> + ew.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower()) + .put(slice, state.toString()) + .put(ZkStateReader.COLLECTION_PROP, collection); + final Overseer overseer = cluster.getOpenOverseer(); + if (overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate()) { + overseer + .getDistributedClusterStateUpdater() + .doSingleStateUpdate( + DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState, + new ZkNodeProps(m), + cluster.getOpenOverseer().getSolrCloudManager(), + cluster.getOpenOverseer().getZkStateReader()); + } else { + DistributedQueue inQueue = + cluster + .getJettySolrRunner(0) + .getCoreContainer() + .getZkController() + .getOverseer() + .getStateUpdateQueue(); + inQueue.offer(m); + } + } +}
