dsmiley commented on code in PR #3904: URL: https://github.com/apache/solr/pull/3904#discussion_r2659314424
########## changelog/unreleased/SOLR-18008-simulate_solr_core_remnants.yml: ########## @@ -0,0 +1,5 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Enhance existing setting for removing unknown cores at startup to also apply when creating collections and adding replicas and remnant core files already exist. +type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other Review Comment: IMO we should use "added" conservatively. Even your title shows you aren't so much adding something as you are modifying an existing thing. ########## solr/core/src/test/org/apache/solr/cloud/DeleteCoreRemnantsOnCreateTest.java: ########## @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.SolrCore; +import org.apache.solr.embedded.JettySolrRunner; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Solr occasionally gets into an inconsistent state with its cores lifecycle where remnant files + * are left on disk after various operations that delete a core. Examples include deleting a + * collection operation that doesn't properly finish, or maybe the Solr process unexpectedly gets + * killed. The system property "solr.cloud.delete.unknown.cores.enabled" is an expert setting that + * when enabled automatically deletes any remnant core data on disk when new cores are created that + * would otherwise fail due to the preexisting files. You should be cautious in enabling this + * feature, as it means that something isn't working well in your Solr setup. + */ +public class DeleteCoreRemnantsOnCreateTest extends SolrCloudTestCase { + private static final String DELETE_UNKNOWN_CORES_PROP = "solr.cloud.delete.unknown.cores.enabled"; + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(1).addConfig("conf", configset("cloud-minimal")).configure(); + } + + @Before + public void resetProperty() { + System.clearProperty(DELETE_UNKNOWN_CORES_PROP); + } + + /** + * Shared setup for testing collection creation with remnants. Creates a collection, deletes it, + * and then leaves behind a remnant directory. + */ + private void setupCollectionRemnant(String collectionName) throws Exception { + List<JettySolrRunner> jettys = cluster.getJettySolrRunners(); + String primaryNode = jettys.getFirst().getNodeName(); + + CollectionAdminRequest.Create createRequest = + CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1); + createRequest.setCreateNodeSet(primaryNode); Review Comment: We can assume one node, so we don't care what its name is later. ########## solr/core/src/test/org/apache/solr/cloud/DeleteCoreRemnantsOnCreateTest.java: ########## @@ -0,0 +1,327 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.CoreAdminRequest; +import org.apache.solr.client.solrj.response.json.JsonMapResponseParser; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.SolrCore; +import org.apache.solr.embedded.JettySolrRunner; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Solr occasionally gets into an inconsistent state with its cores lifecycle where remnant files + * are left on disk after various operations that delete a core. Examples include deleting a + * collection operation that doesn't properly finish, or maybe the Solr process unexpectedly gets + * killed. The system property "solr.cloud.delete.unknown.cores.enabled" is an expert setting that Review Comment: Use `{@link #DELETE_UNKNOWN_CORES_PROP}` -- IntelliJ will help write such things with auto-completion. Or just don't bother being specific. Moreover, I wouldn't be overly specific in javadoc, especially on tests. Documentation needs to be maintained; it isn't purely free/good (more isn't always better). This documentation should say less. You are including advise. This is a waste of time to say such things here; leave that to the ref guide. ########## solr/core/src/test/org/apache/solr/cloud/DeleteCoreRemnantsOnCreateTest.java: ########## @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.SolrCore; +import org.apache.solr.embedded.JettySolrRunner; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Solr occasionally gets into an inconsistent state with its cores lifecycle where remnant files + * are left on disk after various operations that delete a core. Examples include deleting a + * collection operation that doesn't properly finish, or maybe the Solr process unexpectedly gets + * killed. The system property "solr.cloud.delete.unknown.cores.enabled" is an expert setting that + * when enabled automatically deletes any remnant core data on disk when new cores are created that + * would otherwise fail due to the preexisting files. You should be cautious in enabling this + * feature, as it means that something isn't working well in your Solr setup. + */ +public class DeleteCoreRemnantsOnCreateTest extends SolrCloudTestCase { + private static final String DELETE_UNKNOWN_CORES_PROP = "solr.cloud.delete.unknown.cores.enabled"; + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(1).addConfig("conf", configset("cloud-minimal")).configure(); + } + + @Before Review Comment: needless ########## solr/solr-ref-guide/modules/configuration-guide/pages/solr-properties.adoc: ########## @@ -44,7 +44,7 @@ NOTE: Properties marked with "!" indicate inverted meaning between pre Solr 10 a |solr.cloud.prep.recovery.read.timeout.additional.ms|prepRecoveryReadTimeoutExtraWait|8000|Specifies additional milliseconds to wait during recovery read operations in SolrCloud mode. -|solr.cloud.startup.delete.unknown.cores.enabled|solr.deleteUnknownCores|false|Controls whether unknown cores are deleted at startup in SolrCloud mode. +|solr.cloud.delete.unknown.cores.enabled|solr.deleteUnknownCores|false|Determines if unknown cores should be removed at startup and during collection and replica creation in SolrCloud mode. Review Comment: I view these things as separate: * Deleting unexpected cores (according to ZK state.json as truth) on startup. * Deleting unexpected preexisting core data on replica creation. ########## solr/core/src/test/org/apache/solr/cloud/DeleteCoreRemnantsOnCreateTest.java: ########## @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.SolrCore; +import org.apache.solr.embedded.JettySolrRunner; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Solr occasionally gets into an inconsistent state with its cores lifecycle where remnant files + * are left on disk after various operations that delete a core. Examples include deleting a + * collection operation that doesn't properly finish, or maybe the Solr process unexpectedly gets + * killed. The system property "solr.cloud.delete.unknown.cores.enabled" is an expert setting that + * when enabled automatically deletes any remnant core data on disk when new cores are created that + * would otherwise fail due to the preexisting files. You should be cautious in enabling this + * feature, as it means that something isn't working well in your Solr setup. + */ +public class DeleteCoreRemnantsOnCreateTest extends SolrCloudTestCase { + private static final String DELETE_UNKNOWN_CORES_PROP = "solr.cloud.delete.unknown.cores.enabled"; + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(1).addConfig("conf", configset("cloud-minimal")).configure(); + } + + @Before + public void resetProperty() { + System.clearProperty(DELETE_UNKNOWN_CORES_PROP); + } + + /** + * Shared setup for testing collection creation with remnants. Creates a collection, deletes it, + * and then leaves behind a remnant directory. + */ + private void setupCollectionRemnant(String collectionName) throws Exception { + List<JettySolrRunner> jettys = cluster.getJettySolrRunners(); + String primaryNode = jettys.getFirst().getNodeName(); + + CollectionAdminRequest.Create createRequest = + CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1); + createRequest.setCreateNodeSet(primaryNode); + createRequest.process(cluster.getSolrClient()); + + waitForState( + "Expected collection to be fully active", + collectionName, + (n, c) -> SolrCloudTestCase.replicasForCollectionAreFullyActive(n, c, 1, 1)); + + Replica primaryReplica = getReplicaOnNode(collectionName, "shard1", primaryNode); + JettySolrRunner primaryJetty = cluster.getReplicaJetty(primaryReplica); + String originalCoreName = primaryReplica.getCoreName(); + Path remnantInstanceDir; + try (SolrCore core = primaryJetty.getCoreContainer().getCore(originalCoreName)) { + CoreDescriptor cd = core.getCoreDescriptor(); + remnantInstanceDir = cd.getInstanceDir(); + } + + CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient()); + waitForState("Expected collection deletion", collectionName, (n, c) -> c == null); + + // Simulate a core remnant still exists by creating the directory and core.properties + Files.createDirectories(remnantInstanceDir); + Files.writeString(remnantInstanceDir.resolve("core.properties"), "", StandardCharsets.UTF_8); + } + + @Test + public void testCreateCollectionWithRemnantsFailsWithoutSetting() throws Exception { + assertNull( + "Property should not be set by default", System.getProperty(DELETE_UNKNOWN_CORES_PROP)); + + String collectionName = "coreRemnantCreateNoSetting"; + setupCollectionRemnant(collectionName); + + // Try to create the collection again - this demonstrates the behavior without the setting + // In typical environments, this might fail, but behavior depends on configuration + CollectionAdminRequest.Create recreateRequest = + CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1); + List<JettySolrRunner> jettys = cluster.getJettySolrRunners(); + recreateRequest.setCreateNodeSet(jettys.getFirst().getNodeName()); + + // The request to create a collection SHOULD fail based on the remnant file, if it does not it + // means we've changed Solr's behavior when creating a core and + // remnants exist, and therefore we should rethink the utility of this setting. + try { Review Comment: See `assertThrows` for this well known pattern ########## solr/core/src/test/org/apache/solr/cloud/DeleteCoreRemnantsOnCreateTest.java: ########## @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.SolrCore; +import org.apache.solr.embedded.JettySolrRunner; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Solr occasionally gets into an inconsistent state with its cores lifecycle where remnant files + * are left on disk after various operations that delete a core. Examples include deleting a + * collection operation that doesn't properly finish, or maybe the Solr process unexpectedly gets + * killed. The system property "solr.cloud.delete.unknown.cores.enabled" is an expert setting that + * when enabled automatically deletes any remnant core data on disk when new cores are created that + * would otherwise fail due to the preexisting files. You should be cautious in enabling this + * feature, as it means that something isn't working well in your Solr setup. + */ +public class DeleteCoreRemnantsOnCreateTest extends SolrCloudTestCase { + private static final String DELETE_UNKNOWN_CORES_PROP = "solr.cloud.delete.unknown.cores.enabled"; + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(1).addConfig("conf", configset("cloud-minimal")).configure(); + } + + @Before + public void resetProperty() { + System.clearProperty(DELETE_UNKNOWN_CORES_PROP); + } + + /** + * Shared setup for testing collection creation with remnants. Creates a collection, deletes it, + * and then leaves behind a remnant directory. + */ + private void setupCollectionRemnant(String collectionName) throws Exception { + List<JettySolrRunner> jettys = cluster.getJettySolrRunners(); + String primaryNode = jettys.getFirst().getNodeName(); + + CollectionAdminRequest.Create createRequest = + CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1); + createRequest.setCreateNodeSet(primaryNode); Review Comment: why specify this? ########## solr/core/src/java/org/apache/solr/core/CoreContainer.java: ########## @@ -1511,6 +1511,17 @@ public SolrCore create( preExistingZkEntry = getZkController().checkIfCoreNodeNameAlreadyExists(cd); } + final boolean deleteUnknownCores = + Boolean.parseBoolean( + System.getProperty("solr.cloud.delete.unknown.cores.enabled", "false")); Review Comment: I thought you were very well aware of EnvUtils ########## solr/core/src/test/org/apache/solr/cloud/DeleteCoreRemnantsOnCreateTest.java: ########## @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.SolrCore; +import org.apache.solr.embedded.JettySolrRunner; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Solr occasionally gets into an inconsistent state with its cores lifecycle where remnant files + * are left on disk after various operations that delete a core. Examples include deleting a + * collection operation that doesn't properly finish, or maybe the Solr process unexpectedly gets + * killed. The system property "solr.cloud.delete.unknown.cores.enabled" is an expert setting that + * when enabled automatically deletes any remnant core data on disk when new cores are created that + * would otherwise fail due to the preexisting files. You should be cautious in enabling this + * feature, as it means that something isn't working well in your Solr setup. + */ +public class DeleteCoreRemnantsOnCreateTest extends SolrCloudTestCase { + private static final String DELETE_UNKNOWN_CORES_PROP = "solr.cloud.delete.unknown.cores.enabled"; + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(1).addConfig("conf", configset("cloud-minimal")).configure(); + } + + @Before + public void resetProperty() { + System.clearProperty(DELETE_UNKNOWN_CORES_PROP); + } + + /** + * Shared setup for testing collection creation with remnants. Creates a collection, deletes it, + * and then leaves behind a remnant directory. + */ + private void setupCollectionRemnant(String collectionName) throws Exception { + List<JettySolrRunner> jettys = cluster.getJettySolrRunners(); + String primaryNode = jettys.getFirst().getNodeName(); + + CollectionAdminRequest.Create createRequest = + CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1); + createRequest.setCreateNodeSet(primaryNode); + createRequest.process(cluster.getSolrClient()); + + waitForState( + "Expected collection to be fully active", + collectionName, + (n, c) -> SolrCloudTestCase.replicasForCollectionAreFullyActive(n, c, 1, 1)); + + Replica primaryReplica = getReplicaOnNode(collectionName, "shard1", primaryNode); + JettySolrRunner primaryJetty = cluster.getReplicaJetty(primaryReplica); + String originalCoreName = primaryReplica.getCoreName(); Review Comment: Instead you could simply call `org.apache.solr.core.CoreContainer#getAllCoreNames` and grab the first. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
