This is an automated email from the ASF dual-hosted git repository. jmckenzie pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push: new d0b9532f2b Add the ability for operators to loosen the definition of "empty" for edge cases d0b9532f2b is described below commit d0b9532f2b87a17a0508d0637556f2f3e8d0fd94 Author: Josh McKenzie <jmcken...@apache.org> AuthorDate: Mon Aug 22 15:04:19 2022 -0400 Add the ability for operators to loosen the definition of "empty" for edge cases Patch by David Capwell; reviewed by Josh McKenzie, Yifan Cai, and Sam Tunnicliffe for CASSANDRA-17842 Co-authored-by: David Capwell <dcapw...@apache.org> Co-authored-by: Josh McKenzie <jmcken...@apache.org> --- CHANGES.txt | 1 + NEWS.txt | 18 +++++++++++++----- .../cassandra/config/CassandraRelevantProperties.java | 3 +++ src/java/org/apache/cassandra/gms/EndpointState.java | 15 ++++++++++++++- src/java/org/apache/cassandra/gms/Gossiper.java | 13 +++++++++++++ src/java/org/apache/cassandra/gms/GossiperMBean.java | 3 +++ 6 files changed, 47 insertions(+), 6 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 84975ef1b9..dee8a5e741 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.2 + * Add the ability for operators to loosen the definition of "empty" for edge cases (CASSANDRA-17842) * Fix potential out of range exception on column index downsampling (CASSANDRA-17839) * Introduce target directory to vtable output for sstable_tasks and for compactionstats (CASSANDRA-13010) * Read/Write/Truncate throw RequestFailure in a race condition with callback timeouts, should return Timeout instead (CASSANDRA-17828) diff --git a/NEWS.txt b/NEWS.txt index fe87f0cd78..b488acbf20 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -77,12 +77,20 @@ New features - It is possible to list ephemeral snapshots by nodetool listsnaphots command when flag "-e" is specified. - Added a new flag to `nodetool profileload` and JMX endpoint to set up recurring profile load generation on specified intervals (see CASSANDRA-17821) + - Added a new property, gossiper.loose_empty_enabled, to allow for a looser definition of "empty" when + considering the heartbeat state of another node in Gossip. This should only be used by knowledgeable + operators in the following scenarios: + + Currently "empty" w/regards to heartbeat state in Gossip is very specific to a single edge case (i.e. in + isEmptyWithoutStatus() our usage of hbState() + applicationState), however there are other failure cases which + block host replacements and require intrusive workarounds and human intervention to recover from when you + have something in hbState() you don't expect. See CASSANDRA-17842 for further details. Upgrading --------- - Ephemeral marker files for snapshots done by repairs are not created anymore, - there is a dedicated flag in snapshot manifest instead. On upgrade of a node to version 4.2, on node's start, in case there - are such ephemeral snapshots on disk, they will be deleted (same behaviour as before) and any new ephemeral snapshots + there is a dedicated flag in snapshot manifest instead. On upgrade of a node to version 4.2, on node's start, in case there + are such ephemeral snapshots on disk, they will be deleted (same behaviour as before) and any new ephemeral snapshots will stop to create ephemeral marker files as flag in a snapshot manifest was introduced instead. Deprecation @@ -427,7 +435,7 @@ Upgrading - Native protocol v5 is promoted from beta in this release. The wire format has changed significantly and users should take care to ensure client drivers are upgraded to a version with support for the final v5 format, if currently connecting over v5-beta. (CASSANDRA-15299, CASSANDRA-14973) - - Cassandra removed support for the OldNetworkTopologyStrategy. Before upgrading you will need to change the + - Cassandra removed support for the OldNetworkTopologyStrategy. Before upgrading you will need to change the replication strategy for the keyspaces using this strategy to the NetworkTopologyStrategy. (CASSANDRA-13990) - Sstables for tables using with a frozen UDT written by C* 3.0 appear as corrupted. @@ -625,7 +633,7 @@ Upgrading reason, a opt-in system property has been added to disable the fix: -Dcassandra.unsafe.disable-serial-reads-linearizability=true Use this flag at your own risk as it revert SERIAL reads to the incorrect behavior of - previous versions. See CASSANDRA-12126 for details. + previous versions. See CASSANDRA-12126 for details. - SASI's `max_compaction_flush_memory_in_mb` setting was previously getting interpreted in bytes. From 3.11.8 it is correctly interpreted in megabytes, but prior to 3.11.10 previous configurations of this setting will lead to nodes OOM during compaction. From 3.11.10 previous configurations will be detected as incorrect, @@ -722,7 +730,7 @@ Compact Storage (only when upgrading from 3.X or any version lower than 3.0.15) Starting version 5.0, COMPACT STORAGE will no longer be supported. 'ALTER ... DROP COMPACT STORAGE' statement makes Compact Tables CQL-compatible, exposing internal structure of Thrift/Compact Tables. You can find more details - on exposed internal structure under: + on exposed internal structure under: http://cassandra.apache.org/doc/latest/cql/appendices.html#appendix-c-dropping-compact-storage For uninterrupted cluster upgrades, drivers now support 'NO_COMPACT' startup option. diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index 00c2f4cd28..81f9a64418 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -295,6 +295,9 @@ public enum CassandraRelevantProperties // for specific tests ORG_APACHE_CASSANDRA_CONF_CASSANDRA_RELEVANT_PROPERTIES_TEST("org.apache.cassandra.conf.CassandraRelevantPropertiesTest"), ORG_APACHE_CASSANDRA_DB_VIRTUAL_SYSTEM_PROPERTIES_TABLE_TEST("org.apache.cassandra.db.virtual.SystemPropertiesTableTest"), + + // Loosen the definition of "empty" for gossip state, for use during host replacements if things go awry + LOOSE_DEF_OF_EMPTY_ENABLED(Config.PROPERTY_PREFIX + "gossiper.loose_empty_enabled"); ; diff --git a/src/java/org/apache/cassandra/gms/EndpointState.java b/src/java/org/apache/cassandra/gms/EndpointState.java index c60a4793bd..69684e4b67 100644 --- a/src/java/org/apache/cassandra/gms/EndpointState.java +++ b/src/java/org/apache/cassandra/gms/EndpointState.java @@ -29,6 +29,7 @@ import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -46,6 +47,8 @@ public class EndpointState { protected static final Logger logger = LoggerFactory.getLogger(EndpointState.class); + static volatile boolean LOOSE_DEF_OF_EMPTY_ENABLED = CassandraRelevantProperties.LOOSE_DEF_OF_EMPTY_ENABLED.getBoolean(); + public final static IVersionedSerializer<EndpointState> serializer = new EndpointStateSerializer(); public final static IVersionedSerializer<EndpointState> nullableSerializer = NullableSerializer.wrap(serializer); @@ -202,7 +205,17 @@ public class EndpointState public boolean isEmptyWithoutStatus() { Map<ApplicationState, VersionedValue> state = applicationState.get(); - return hbState.isEmpty() && !(state.containsKey(ApplicationState.STATUS_WITH_PORT) || state.containsKey(ApplicationState.STATUS)); + boolean hasStatus = state.containsKey(ApplicationState.STATUS_WITH_PORT) || state.containsKey(ApplicationState.STATUS); + return hbState.isEmpty() && !hasStatus + // In the very specific case where hbState.isEmpty and STATUS is missing, this is known to be safe to "fake" + // the data, as this happens when the gossip state isn't coming from the node but instead from a peer who + // restarted and is missing the node's state. + // + // When hbState is not empty, then the node gossiped an empty STATUS; this happens during bootstrap and it's not + // possible to tell if this is ok or not (we can't really tell if the node is dead or having networking issues). + // For these cases allow an external actor to verify and inform Cassandra that it is safe - this is done by + // updating the LOOSE_DEF_OF_EMPTY_ENABLED field. + || (LOOSE_DEF_OF_EMPTY_ENABLED && !hasStatus); } public boolean isRpcReady() diff --git a/src/java/org/apache/cassandra/gms/Gossiper.java b/src/java/org/apache/cassandra/gms/Gossiper.java index 1d6a597c43..d0fab0cac5 100644 --- a/src/java/org/apache/cassandra/gms/Gossiper.java +++ b/src/java/org/apache/cassandra/gms/Gossiper.java @@ -2513,6 +2513,19 @@ public class Gossiper implements IFailureDetectionEventListener, GossiperMBean return minVersion; } + @Override + public boolean getLooseEmptyEnabled() + { + return EndpointState.LOOSE_DEF_OF_EMPTY_ENABLED; + } + + @Override + public void setLooseEmptyEnabled(boolean enabled) + { + logger.info("Setting loose definition of empty to {}", enabled); + EndpointState.LOOSE_DEF_OF_EMPTY_ENABLED = enabled; + } + public void unsafeSetEnabled() { scheduledGossipTask = new NotScheduledFuture<>(); diff --git a/src/java/org/apache/cassandra/gms/GossiperMBean.java b/src/java/org/apache/cassandra/gms/GossiperMBean.java index 47d7207ef8..2d59e37f2d 100644 --- a/src/java/org/apache/cassandra/gms/GossiperMBean.java +++ b/src/java/org/apache/cassandra/gms/GossiperMBean.java @@ -38,4 +38,7 @@ public interface GossiperMBean /** Returns each node's database release version */ public Map<String, List<String>> getReleaseVersionsWithPort(); + public boolean getLooseEmptyEnabled(); + + public void setLooseEmptyEnabled(boolean enabled); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org