[
https://issues.apache.org/jira/browse/CASSANDRA-15970?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17166585#comment-17166585
]
David Capwell commented on CASSANDRA-15970:
-------------------------------------------
CI Links
2.2:
https://app.circleci.com/pipelines/github/dcapwell/cassandra?branch=bug%2FCASSANDRA-15970-2.2
3.0:
https://app.circleci.com/pipelines/github/dcapwell/cassandra?branch=bug%2FCASSANDRA-15970-3.0
3.11:
https://app.circleci.com/pipelines/github/dcapwell/cassandra?branch=bug%2FCASSANDRA-15970-3.11
trunk:
https://app.circleci.com/pipelines/github/dcapwell/cassandra?branch=bug%2FCASSANDRA-15970
> 3.x fails to start if commit log has range tombstones from a column which is
> also deleted
> -----------------------------------------------------------------------------------------
>
> Key: CASSANDRA-15970
> URL: https://issues.apache.org/jira/browse/CASSANDRA-15970
> Project: Cassandra
> Issue Type: Bug
> Components: Legacy/Local Write-Read Paths, Local/Commit Log
> Reporter: David Capwell
> Assignee: David Capwell
> Priority: Normal
> Fix For: 3.0.x, 3.11.x
>
>
> Cassandra crashes with the following exception
> {code}
> ERROR [node1_isolatedExecutor:1] node1 2020-07-21 18:59:39,048
> JVMStabilityInspector.java:102 - Exiting due to error while processing commit
> log during initialization.
> org.apache.cassandra.db.commitlog.CommitLogReplayer$CommitLogReplayException:
> Unexpected error deserializing mutation; saved to
> /var/folders/cm/08cddl2s25j7fq3jdb76gh4r0000gn/T/mutation6239873170066752296dat.
> This may be caused by replaying a mutation against a table with the same
> name but incompatible schema.
> at
> org.apache.cassandra.db.commitlog.CommitLogReplayer.handleReplayError(CommitLogReplayer.java:731)
> [dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.commitlog.CommitLogReplayer.replayMutation(CommitLogReplayer.java:656)
> [dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.commitlog.CommitLogReplayer.replaySyncSection(CommitLogReplayer.java:609)
> [dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.commitlog.CommitLogReplayer.recover(CommitLogReplayer.java:493)
> [dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.commitlog.CommitLogReplayer.recover(CommitLogReplayer.java:189)
> [dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.commitlog.CommitLog.recover(CommitLog.java:170)
> [dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.commitlog.CommitLog.recover(CommitLog.java:151)
> [dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.distributed.impl.Instance.lambda$startup$7(Instance.java:535)
> [dtest-3.0.21.jar:na]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> ~[na:1.8.0_242]
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> ~[na:1.8.0_242]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> ~[na:1.8.0_242]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> ~[na:1.8.0_242]
> at
> org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:83)
> ~[dtest-3.0.21.jar:na]
> at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_242]
> Caused by: java.lang.NullPointerException: null
> at
> org.apache.cassandra.db.ClusteringComparator.validate(ClusteringComparator.java:206)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.partitions.PartitionUpdate.validate(PartitionUpdate.java:494)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.commitlog.CommitLogReplayer.replayMutation(CommitLogReplayer.java:629)
> [dtest-3.0.21.jar:na]
> ... 12 common frames omitted
> {code}
> If you drain in 2.2 before upgrade, you get the following
> {code}
> ERROR [SharedPool-Worker-1] node1 2020-07-21 22:17:25,661
> AbstractLocalAwareExecutorService.java:169 - Uncaught exception on thread
> Thread[SharedPool-Worker-1,5,node1]
> java.lang.RuntimeException: java.lang.NullPointerException
> at
> org.apache.cassandra.service.StorageProxy$DroppableRunnable.run(StorageProxy.java:2537)
> ~[dtest-3.0.21.jar:na]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> ~[na:1.8.0_242]
> at
> org.apache.cassandra.concurrent.AbstractLocalAwareExecutorService$FutureTask.run(AbstractLocalAwareExecutorService.java:165)
> ~[dtest-3.0.21.jar:na]
> at org.apache.cassandra.concurrent.SEPWorker.run(SEPWorker.java:109)
> [dtest-3.0.21.jar:na]
> at java.lang.Thread.run(Thread.java:748) [na:1.8.0_242]
> Caused by: java.lang.NullPointerException: null
> at
> org.apache.cassandra.db.ClusteringComparator.compare(ClusteringComparator.java:131)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.UnfilteredDeserializer$OldFormatDeserializer.compareNextTo(UnfilteredDeserializer.java:391)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.columniterator.SSTableIterator$ForwardReader.handlePreSliceData(SSTableIterator.java:105)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.columniterator.SSTableIterator$ForwardReader.hasNextInternal(SSTableIterator.java:164)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.columniterator.AbstractSSTableIterator$Reader.hasNext(AbstractSSTableIterator.java:336)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.filter.ClusteringIndexNamesFilter$1.hasNext(ClusteringIndexNamesFilter.java:157)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.rows.UnfilteredRowIterator.isEmpty(UnfilteredRowIterator.java:70)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.SinglePartitionReadCommand.queryMemtableAndSSTablesInTimestampOrder(SinglePartitionReadCommand.java:952)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.SinglePartitionReadCommand.queryMemtableAndDiskInternal(SinglePartitionReadCommand.java:679)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.SinglePartitionReadCommand.queryMemtableAndDisk(SinglePartitionReadCommand.java:656)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.SinglePartitionReadCommand.queryStorage(SinglePartitionReadCommand.java:491)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.db.ReadCommand.executeLocally(ReadCommand.java:418)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.service.StorageProxy$LocalReadRunnable.runMayThrow(StorageProxy.java:1793)
> ~[dtest-3.0.21.jar:na]
> at
> org.apache.cassandra.service.StorageProxy$DroppableRunnable.run(StorageProxy.java:2533)
> ~[dtest-3.0.21.jar:na]
> ... 4 common frames omitted
> {code}
> This can be replicated with the following upgrade dtest
> {code}
> package org.apache.cassandra.distributed.upgrade;
> import java.net.InetAddress;
> import java.util.Arrays;
> import java.util.Collections;
> import java.util.HashMap;
> import java.util.Map;
> import java.util.concurrent.ExecutionException;
> import com.google.common.collect.ImmutableSet;
> import com.google.common.collect.Sets;
> import org.junit.Assert;
> import org.junit.Test;
> import org.apache.cassandra.db.marshal.CompositeType;
> import org.apache.cassandra.db.marshal.Int32Type;
> import org.apache.cassandra.db.marshal.MapType;
> import org.apache.cassandra.distributed.api.ConsistencyLevel;
> import org.apache.cassandra.distributed.api.Feature;
> import org.apache.cassandra.distributed.api.ICoordinator;
> import org.apache.cassandra.distributed.api.QueryResults;
> import org.apache.cassandra.distributed.api.SimpleQueryResult;
> import org.apache.cassandra.distributed.shared.AssertUtils;
> import org.apache.cassandra.distributed.shared.Versions;
> import org.apache.cassandra.thrift.Cassandra;
> import org.apache.cassandra.thrift.Deletion;
> import org.apache.cassandra.thrift.Mutation;
> import org.apache.cassandra.thrift.SlicePredicate;
> import org.apache.cassandra.thrift.SliceRange;
> import org.apache.cassandra.thrift.TFramedTransportFactory;
> import org.apache.cassandra.utils.ByteBufferUtil;
> import org.apache.thrift.TException;
> import org.apache.thrift.protocol.TBinaryProtocol;
> import org.apache.thrift.transport.TTransport;
> public class MigrateDropColumnsTest extends UpgradeTestBase
> {
> private static final MapType MAP_TYPE =
> MapType.getInstance(Int32Type.instance, Int32Type.instance, true);
> @Test
> public void dropColumns() throws Throwable
> {
> new TestCase()
> .upgrade(Versions.Major.v22, Versions.Major.v30)
> .withConfig(c -> c.with(Feature.NATIVE_PROTOCOL))
> .setup(cluster -> {
> cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl(pk int,
> tables map<int, int>, PRIMARY KEY (pk))");
> ICoordinator coordinator = cluster.coordinator(1);
> thriftClient(cluster.get(1).broadcastAddress().getAddress(),
> thrift -> {
> thrift.set_keyspace(KEYSPACE);
> Mutation mutation = new Mutation();
> Deletion deletion = new Deletion();
> SlicePredicate slice = new SlicePredicate();
> SliceRange range = new SliceRange();
>
> range.setStart(CompositeType.build(ByteBufferUtil.bytes("tables")));
>
> range.setFinish(CompositeType.build(ByteBufferUtil.bytes("tables")));
> slice.setSlice_range(range);
> deletion.setPredicate(slice);
> deletion.setTimestamp(System.currentTimeMillis());
> mutation.setDeletion(deletion);
>
> thrift.batch_mutate(Collections.singletonMap(ByteBufferUtil.bytes(0),
>
> Collections.singletonMap("tbl", Arrays.asList(mutation))),
>
> org.apache.cassandra.thrift.ConsistencyLevel.ALL);
> });
> cluster.forEach(inst -> inst.flush(KEYSPACE));
> cluster.schemaChange("ALTER TABLE " + KEYSPACE + ".tbl DROP
> tables");
> cluster.forEach(inst -> inst.flush("system"));
> cluster.forEach(inst -> inst.forceCompact("system",
> "schema_columnfamilies"));
> // For this JIRA leave this uncommented to hit commit log reply
> issue
> // comment out to hit read path issue
> cluster.forEach(inst ->
> inst.nodetoolResult("drain").asserts().success());
> })
> .runAfterClusterUpgrade(cluster -> {
> SimpleQueryResult qr = cluster.coordinator(1).
>
> executeWithResult("SELECT column_name " +
>
> "FROM system_schema.dropped_columns " +
>
> "WHERE keyspace_name=?" +
> "
> AND table_name=?;",
>
> ConsistencyLevel.ALL, KEYSPACE, "tbl");
> Assert.assertEquals(ImmutableSet.of("tables"),
> Sets.newHashSet(qr.map(r -> r.getString("column_name"))));
> AssertUtils.assertRows(
> cluster.coordinator(1).executeWithResult("SELECT * FROM " +
> KEYSPACE + ".tbl WHERE pk=?", ConsistencyLevel.ALL, 0),
> QueryResults.builder().row(0).build());
> AssertUtils.assertRows(
> cluster.coordinator(1).executeWithResult("SELECT * FROM " +
> KEYSPACE + ".tbl WHERE pk=?", ConsistencyLevel.ALL, 1),
> QueryResults.builder().row(1).build());
> })
> .run();
> }
> private static void thriftClient(InetAddress address, ThriftConsumer fn)
> throws TException
> {
> try (TTransport transport = new
> TFramedTransportFactory().openTransport(address.getHostAddress(), 9160))
> {
> Cassandra.Client client = new Cassandra.Client(new
> TBinaryProtocol(transport));
> fn.accept(client);
> }
> }
> public interface ThriftConsumer
> {
> void accept(Cassandra.Client client) throws TException;
> }
> }
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]