[
https://issues.apache.org/jira/browse/CASSANDRA-20238?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
David Capwell updated CASSANDRA-20238:
--------------------------------------
Bug Category: Parent values: Correctness(12982)Level 1 values: Transient
Incorrect Response(12987)
Complexity: Normal
Discovered By: Fuzz Test
Fix Version/s: 5.0.x
Severity: Critical
Status: Open (was: Triage Needed)
> SAI query missing data in single node
> -------------------------------------
>
> Key: CASSANDRA-20238
> URL: https://issues.apache.org/jira/browse/CASSANDRA-20238
> Project: Apache Cassandra
> Issue Type: Bug
> Components: Feature/SAI
> Reporter: David Capwell
> Assignee: Caleb Rackliffe
> Priority: Normal
> Fix For: 5.0.x
>
>
> In CASSANDRA-20156 I have fuzz tests that cover multiple different types of
> queries and hit an issue where an SAI query returned missing data; the test
> was on a single node.
> To create the table/indexes
> {code}
> CREATE KEYSPACE IF NOT EXISTS ks1 WITH replication = {'class':
> 'SimpleStrategy', 'replication_factor': 1};
> CREATE TABLE ks1.tbl (
> pk0 time,
> pk1 varint,
> ck0 date,
> s0 boolean static,
> s1 text static,
> v0 boolean,
> PRIMARY KEY ((pk0, pk1), ck0)
> ) WITH CLUSTERING ORDER BY (ck0 ASC)
> AND additional_write_policy = '99p'
> AND allow_auto_snapshot = true
> AND bloom_filter_fp_chance = 0.01
> AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
> AND cdc = false
> AND comment = ''
> AND compaction = {'class':
> 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy',
> 'max_threshold': '32', 'min_threshold': '4'}
> AND compression = {'chunk_length_in_kb': '16', 'class':
> 'org.apache.cassandra.io.compress.LZ4Compressor'}
> AND memtable = 'default'
> AND crc_check_chance = 1.0
> AND fast_path = 'keyspace'
> AND default_time_to_live = 0
> AND extensions = {}
> AND gc_grace_seconds = 864000
> AND incremental_backups = true
> AND max_index_interval = 2048
> AND memtable_flush_period_in_ms = 0
> AND min_index_interval = 128
> AND read_repair = 'BLOCKING'
> AND transactional_mode = 'off'
> AND transactional_migration_from = 'none'
> AND speculative_retry = '99p';
> CREATE INDEX tbl_pk0 ON ks1.tbl(pk0) USING 'SAI';
> CREATE CUSTOM INDEX tbl_pk1 ON ks1.tbl(pk1) USING 'StorageAttachedIndex';
> CREATE INDEX tbl_ck0 ON ks1.tbl(ck0) USING 'SAI';
> CREATE INDEX tbl_s0 ON ks1.tbl(s0) USING 'SAI';
> CREATE CUSTOM INDEX tbl_s1 ON ks1.tbl(s1) USING 'StorageAttachedIndex';
> CREATE CUSTOM INDEX tbl_v0 ON ks1.tbl(v0) USING 'StorageAttachedIndex';
> {code}
> The steps taken to hit the issue
> {code}
> History:
> 1: INSERT INTO ks1.tbl (pk0, pk1, ck0, s0, s1, v0) VALUES
> ('23:15:13.897962392', -2272, '-1296648-01-08', false, 'ᕊଖꥬ㨢걲映㚃', false); --
> on node1
> 9: UPDATE ks1.tbl SET s0=true, s1='뾕⌒籖' + '鋿紞', v0=true WHERE pk0 =
> '23:15:13.897962392' AND pk1 = -2272 AND ck0 = '-1306427-11-21'; -- on node1
> 13: DELETE FROM ks1.tbl WHERE pk0 = '04:38:30.720018316' AND pk1 = 0;
> -- on node1
> 14: UPDATE ks1.tbl SET v0=true WHERE pk0 = '21:08:07.371523790' AND
> pk1 = -113810779 AND ck0 = '-3063364-12-03'; -- on node1
> 17: nodetool flush ks1 tbl
> 23: UPDATE ks1.tbl SET v0=true, s0=true, s1='낯훬' + '鰂' WHERE pk0 =
> '04:38:30.720018316' AND pk1 = 0 AND ck0 = '-4470111-11-16'; -- on node1
> 24: DELETE FROM ks1.tbl WHERE pk0 = '11:30:52.635751063' AND pk1 =
> -2780431 AND ck0 = '-3740052-12-21'; -- on node1
> 25: UPDATE ks1.tbl SET v0=true WHERE pk0 = '09:39:52.201668336' AND
> pk1 = 0 AND ck0 = '-2926364-04-03'; -- on node1
> 37: UPDATE ks1.tbl SET v0=true WHERE pk0 = '09:39:52.201668336' AND
> pk1 = 0 AND ck0 = '-4629966-09-04'; -- on node1
> 38: nodetool flush ks1 tbl
> 39: UPDATE ks1.tbl SET v0=false, s1='갸' + '냢儓㹳붔즘' WHERE pk0 =
> '14:39:04.020773177' AND pk1 = 32736763 AND ck0 = '-503020-09-13'; -- on
> node1
> 41: UPDATE ks1.tbl SET v0=true WHERE pk0 = '20:21:26.771172592' AND
> pk1 = 129558081 AND ck0 = '-266604-10-11'; -- on node1
> 42: DELETE FROM ks1.tbl WHERE pk0 = '11:30:52.635751063' AND pk1 =
> -2780431; -- on node1
> 46: DELETE FROM ks1.tbl WHERE pk0 = '14:39:04.020773177' AND pk1 =
> 32736763 AND ck0 = '-616256-09-24'; -- on node1
> 49: nodetool flush ks1 tbl
> 51: UPDATE ks1.tbl SET v0=false WHERE pk0 = '11:30:52.635751063' AND
> pk1 = -2780431 AND ck0 = '-1357124-04-27'; -- on node1
> 53: INSERT INTO ks1.tbl (pk0, pk1, ck0, s0, s1, v0) VALUES
> ('20:21:26.771172592', 129558081, '-5463789-04-16', true, '樝' + '钊薾稙氞Œ?嬙ﴨ즡',
> false); -- on node1
> 54: SELECT * FROM ks1.tbl WHERE s0 = true AND pk0 =
> '23:15:13.897962392' ALLOW FILTERING; -- s0 boolean (indexed with SAI), pk0
> time (indexed with SAI), on node1, fetch size 100
> {code}
> And the exception
> {code}
> Caused by: java.lang.AssertionError: Missing rows:
> pk0 | pk1 | ck0 | s0 | s1 | v0
> '23:15:13.897962392' | -2272 | '-1296648-01-08' | true | '뾕⌒籖鋿紞' | false
> Expected:
> pk0 | pk1 | ck0 | s0 | s1 | v0
> '23:15:13.897962392' | -2272 | '-1296648-01-08' | true | '뾕⌒籖鋿紞' | false
> '23:15:13.897962392' | -2272 | '-1306427-11-21' | true | '뾕⌒籖鋿紞' | true
> {code}
> I was able to simplify this history and produce a smaller repo
> {code}
> @Test
> public void test() throws IOException
> {
> try (Cluster cluster = Cluster.build(1).start())
> {
> cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS ks WITH
> replication = {'class': 'SimpleStrategy', 'replication_factor': 1};");
> cluster.schemaChange("CREATE TABLE ks.tbl (\n" +
> " pk0 int,\n" +
> " pk1 int,\n" +
> " ck0 int,\n" +
> " s1 int static,\n" +
> " v0 int,\n" +
> " PRIMARY KEY ((pk0, pk1), ck0)\n" +
> ")");
> cluster.schemaChange("CREATE INDEX tbl_pk0 ON ks.tbl(pk0) USING
> 'SAI';");
> // cluster.schemaChange("CREATE INDEX tbl_v0 ON ks.tbl(v0) USING
> 'SAI';");
> var node = cluster.get(1);
> node.executeInternal("UPDATE ks.tbl SET s1=0, v0=0 WHERE pk0 = 0
> AND pk1 = 1 AND ck0 = 0 ");
> node.executeInternal("DELETE FROM ks.tbl WHERE pk0 = 0 AND pk1
> = 1");
> node.executeInternal("UPDATE ks.tbl SET v0=1 WHERE pk0 = 0 AND
> pk1 = 1 AND ck0 = 1"); // expected result
> node.nodetoolResult("flush", "ks", "tbl").asserts().success();
> var qr = node.coordinator().executeWithResult("SELECT * FROM
> ks.tbl WHERE v0 = 1 AND pk0 = 0 ALLOW FILTERING", ConsistencyLevel.ALL);
> System.out.println("Results");
> int found = 0;
> while (qr.hasNext())
> {
> found++;
> System.out.println(qr.next());
> }
> Assertions.assertThat(found).isEqualTo(1);
> }
> }
> {code}
> I couldn't find a way to shrink this test further. Static columns are not
> touched, but removing causes the test to pass. Index on v0 doesn't matter
> even though its part of the query... since the failing column is a partition
> column I need to have the 2 partition columns else we can't index the column
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]