David Capwell created CASSANDRA-15970:
-----------------------------------------
Summary: 3.x fails to start if commit log has range tombstones
from a column which is also deleted
Key: CASSANDRA-15970
URL: https://issues.apache.org/jira/browse/CASSANDRA-15970
Project: Cassandra
Issue Type: Bug
Components: Legacy/Local Write-Read Paths, Local/Commit Log
Reporter: David Capwell
Assignee: David Capwell
Cassandra crashes with the following exception
{code}
ERROR [node1_isolatedExecutor:1] node1 2020-07-21 18:59:39,048
JVMStabilityInspector.java:102 - Exiting due to error while processing commit
log during initialization.
org.apache.cassandra.db.commitlog.CommitLogReplayer$CommitLogReplayException:
Unexpected error deserializing mutation; saved to
/var/folders/cm/08cddl2s25j7fq3jdb76gh4r0000gn/T/mutation6239873170066752296dat.
This may be caused by replaying a mutation against a table with the same name
but incompatible schema.
at
org.apache.cassandra.db.commitlog.CommitLogReplayer.handleReplayError(CommitLogReplayer.java:731)
[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.commitlog.CommitLogReplayer.replayMutation(CommitLogReplayer.java:656)
[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.commitlog.CommitLogReplayer.replaySyncSection(CommitLogReplayer.java:609)
[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.commitlog.CommitLogReplayer.recover(CommitLogReplayer.java:493)
[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.commitlog.CommitLogReplayer.recover(CommitLogReplayer.java:189)
[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.commitlog.CommitLog.recover(CommitLog.java:170)
[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.commitlog.CommitLog.recover(CommitLog.java:151)
[dtest-3.0.21.jar:na]
at
org.apache.cassandra.distributed.impl.Instance.lambda$startup$7(Instance.java:535)
[dtest-3.0.21.jar:na]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
~[na:1.8.0_242]
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
~[na:1.8.0_242]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
~[na:1.8.0_242]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
~[na:1.8.0_242]
at
org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:83)
~[dtest-3.0.21.jar:na]
at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_242]
Caused by: java.lang.NullPointerException: null
at
org.apache.cassandra.db.ClusteringComparator.validate(ClusteringComparator.java:206)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.partitions.PartitionUpdate.validate(PartitionUpdate.java:494)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.commitlog.CommitLogReplayer.replayMutation(CommitLogReplayer.java:629)
[dtest-3.0.21.jar:na]
... 12 common frames omitted
{code}
If you drain in 2.2 before upgrade, you get the following
{code}
ERROR [SharedPool-Worker-1] node1 2020-07-21 22:17:25,661
AbstractLocalAwareExecutorService.java:169 - Uncaught exception on thread
Thread[SharedPool-Worker-1,5,node1]
java.lang.RuntimeException: java.lang.NullPointerException
at
org.apache.cassandra.service.StorageProxy$DroppableRunnable.run(StorageProxy.java:2537)
~[dtest-3.0.21.jar:na]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
~[na:1.8.0_242]
at
org.apache.cassandra.concurrent.AbstractLocalAwareExecutorService$FutureTask.run(AbstractLocalAwareExecutorService.java:165)
~[dtest-3.0.21.jar:na]
at org.apache.cassandra.concurrent.SEPWorker.run(SEPWorker.java:109)
[dtest-3.0.21.jar:na]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_242]
Caused by: java.lang.NullPointerException: null
at
org.apache.cassandra.db.ClusteringComparator.compare(ClusteringComparator.java:131)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.UnfilteredDeserializer$OldFormatDeserializer.compareNextTo(UnfilteredDeserializer.java:391)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.columniterator.SSTableIterator$ForwardReader.handlePreSliceData(SSTableIterator.java:105)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.columniterator.SSTableIterator$ForwardReader.hasNextInternal(SSTableIterator.java:164)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.columniterator.AbstractSSTableIterator$Reader.hasNext(AbstractSSTableIterator.java:336)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.filter.ClusteringIndexNamesFilter$1.hasNext(ClusteringIndexNamesFilter.java:157)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.rows.UnfilteredRowIterator.isEmpty(UnfilteredRowIterator.java:70)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.SinglePartitionReadCommand.queryMemtableAndSSTablesInTimestampOrder(SinglePartitionReadCommand.java:952)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.SinglePartitionReadCommand.queryMemtableAndDiskInternal(SinglePartitionReadCommand.java:679)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.SinglePartitionReadCommand.queryMemtableAndDisk(SinglePartitionReadCommand.java:656)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.SinglePartitionReadCommand.queryStorage(SinglePartitionReadCommand.java:491)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.db.ReadCommand.executeLocally(ReadCommand.java:418)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.service.StorageProxy$LocalReadRunnable.runMayThrow(StorageProxy.java:1793)
~[dtest-3.0.21.jar:na]
at
org.apache.cassandra.service.StorageProxy$DroppableRunnable.run(StorageProxy.java:2533)
~[dtest-3.0.21.jar:na]
... 4 common frames omitted
{code}
This can be replicated with the following upgrade dtest
{code}
package org.apache.cassandra.distributed.upgrade;
import java.net.InetAddress;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import org.junit.Assert;
import org.junit.Test;
import org.apache.cassandra.db.marshal.CompositeType;
import org.apache.cassandra.db.marshal.Int32Type;
import org.apache.cassandra.db.marshal.MapType;
import org.apache.cassandra.distributed.api.ConsistencyLevel;
import org.apache.cassandra.distributed.api.Feature;
import org.apache.cassandra.distributed.api.ICoordinator;
import org.apache.cassandra.distributed.api.QueryResults;
import org.apache.cassandra.distributed.api.SimpleQueryResult;
import org.apache.cassandra.distributed.shared.AssertUtils;
import org.apache.cassandra.distributed.shared.Versions;
import org.apache.cassandra.thrift.Cassandra;
import org.apache.cassandra.thrift.Deletion;
import org.apache.cassandra.thrift.Mutation;
import org.apache.cassandra.thrift.SlicePredicate;
import org.apache.cassandra.thrift.SliceRange;
import org.apache.cassandra.thrift.TFramedTransportFactory;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.transport.TTransport;
public class MigrateDropColumnsTest extends UpgradeTestBase
{
private static final MapType MAP_TYPE =
MapType.getInstance(Int32Type.instance, Int32Type.instance, true);
@Test
public void dropColumns() throws Throwable
{
new TestCase()
.upgrade(Versions.Major.v22, Versions.Major.v30)
.withConfig(c -> c.with(Feature.NATIVE_PROTOCOL))
.setup(cluster -> {
cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl(pk int,
tables map<int, int>, PRIMARY KEY (pk))");
ICoordinator coordinator = cluster.coordinator(1);
thriftClient(cluster.get(1).broadcastAddress().getAddress(), thrift
-> {
thrift.set_keyspace(KEYSPACE);
Mutation mutation = new Mutation();
Deletion deletion = new Deletion();
SlicePredicate slice = new SlicePredicate();
SliceRange range = new SliceRange();
range.setStart(CompositeType.build(ByteBufferUtil.bytes("tables")));
range.setFinish(CompositeType.build(ByteBufferUtil.bytes("tables")));
slice.setSlice_range(range);
deletion.setPredicate(slice);
deletion.setTimestamp(System.currentTimeMillis());
mutation.setDeletion(deletion);
thrift.batch_mutate(Collections.singletonMap(ByteBufferUtil.bytes(0),
Collections.singletonMap("tbl", Arrays.asList(mutation))),
org.apache.cassandra.thrift.ConsistencyLevel.ALL);
});
cluster.forEach(inst -> inst.flush(KEYSPACE));
cluster.schemaChange("ALTER TABLE " + KEYSPACE + ".tbl DROP
tables");
cluster.forEach(inst -> inst.flush("system"));
cluster.forEach(inst -> inst.forceCompact("system",
"schema_columnfamilies"));
// For this JIRA leave this uncommented to hit commit log reply
issue
// comment out to hit read path issue
cluster.forEach(inst ->
inst.nodetoolResult("drain").asserts().success());
})
.runAfterClusterUpgrade(cluster -> {
SimpleQueryResult qr = cluster.coordinator(1).
executeWithResult("SELECT column_name " +
"FROM system_schema.dropped_columns " +
"WHERE keyspace_name=?" +
"
AND table_name=?;",
ConsistencyLevel.ALL, KEYSPACE, "tbl");
Assert.assertEquals(ImmutableSet.of("tables"),
Sets.newHashSet(qr.map(r -> r.getString("column_name"))));
AssertUtils.assertRows(
cluster.coordinator(1).executeWithResult("SELECT * FROM " +
KEYSPACE + ".tbl WHERE pk=?", ConsistencyLevel.ALL, 0),
QueryResults.builder().row(0).build());
AssertUtils.assertRows(
cluster.coordinator(1).executeWithResult("SELECT * FROM " +
KEYSPACE + ".tbl WHERE pk=?", ConsistencyLevel.ALL, 1),
QueryResults.builder().row(1).build());
})
.run();
}
private static void thriftClient(InetAddress address, ThriftConsumer fn)
throws TException
{
try (TTransport transport = new
TFramedTransportFactory().openTransport(address.getHostAddress(), 9160))
{
Cassandra.Client client = new Cassandra.Client(new
TBinaryProtocol(transport));
fn.accept(client);
}
}
public interface ThriftConsumer
{
void accept(Cassandra.Client client) throws TException;
}
}
{code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]