David Capwell created CASSANDRA-20238:
-----------------------------------------
Summary: SAI query missing data in single node
Key: CASSANDRA-20238
URL: https://issues.apache.org/jira/browse/CASSANDRA-20238
Project: Apache Cassandra
Issue Type: Bug
Components: Feature/SAI
Reporter: David Capwell
Assignee: Caleb Rackliffe
In CASSANDRA-20156 I have fuzz tests that cover multiple different types of
queries and hit an issue where an SAI query returned missing data; the test was
on a single node.
To create the table/indexes
{code}
CREATE KEYSPACE IF NOT EXISTS ks1 WITH replication = {'class':
'SimpleStrategy', 'replication_factor': 1};
CREATE TABLE ks1.tbl (
pk0 time,
pk1 varint,
ck0 date,
s0 boolean static,
s1 text static,
v0 boolean,
PRIMARY KEY ((pk0, pk1), ck0)
) WITH CLUSTERING ORDER BY (ck0 ASC)
AND additional_write_policy = '99p'
AND allow_auto_snapshot = true
AND bloom_filter_fp_chance = 0.01
AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
AND cdc = false
AND comment = ''
AND compaction = {'class':
'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy',
'max_threshold': '32', 'min_threshold': '4'}
AND compression = {'chunk_length_in_kb': '16', 'class':
'org.apache.cassandra.io.compress.LZ4Compressor'}
AND memtable = 'default'
AND crc_check_chance = 1.0
AND fast_path = 'keyspace'
AND default_time_to_live = 0
AND extensions = {}
AND gc_grace_seconds = 864000
AND incremental_backups = true
AND max_index_interval = 2048
AND memtable_flush_period_in_ms = 0
AND min_index_interval = 128
AND read_repair = 'BLOCKING'
AND transactional_mode = 'off'
AND transactional_migration_from = 'none'
AND speculative_retry = '99p';
CREATE INDEX tbl_pk0 ON ks1.tbl(pk0) USING 'SAI';
CREATE CUSTOM INDEX tbl_pk1 ON ks1.tbl(pk1) USING 'StorageAttachedIndex';
CREATE INDEX tbl_ck0 ON ks1.tbl(ck0) USING 'SAI';
CREATE INDEX tbl_s0 ON ks1.tbl(s0) USING 'SAI';
CREATE CUSTOM INDEX tbl_s1 ON ks1.tbl(s1) USING 'StorageAttachedIndex';
CREATE CUSTOM INDEX tbl_v0 ON ks1.tbl(v0) USING 'StorageAttachedIndex';
{code}
The steps taken to hit the issue
{code}
History:
1: INSERT INTO ks1.tbl (pk0, pk1, ck0, s0, s1, v0) VALUES
('23:15:13.897962392', -2272, '-1296648-01-08', false, 'ᕊଖꥬ㨢걲映㚃', false); -- on
node1
9: UPDATE ks1.tbl SET s0=true, s1='뾕⌒籖' + '鋿紞', v0=true WHERE pk0 =
'23:15:13.897962392' AND pk1 = -2272 AND ck0 = '-1306427-11-21'; -- on node1
13: DELETE FROM ks1.tbl WHERE pk0 = '04:38:30.720018316' AND pk1 = 0;
-- on node1
14: UPDATE ks1.tbl SET v0=true WHERE pk0 = '21:08:07.371523790' AND
pk1 = -113810779 AND ck0 = '-3063364-12-03'; -- on node1
17: nodetool flush ks1 tbl
23: UPDATE ks1.tbl SET v0=true, s0=true, s1='낯훬' + '鰂' WHERE pk0 =
'04:38:30.720018316' AND pk1 = 0 AND ck0 = '-4470111-11-16'; -- on node1
24: DELETE FROM ks1.tbl WHERE pk0 = '11:30:52.635751063' AND pk1 =
-2780431 AND ck0 = '-3740052-12-21'; -- on node1
25: UPDATE ks1.tbl SET v0=true WHERE pk0 = '09:39:52.201668336' AND
pk1 = 0 AND ck0 = '-2926364-04-03'; -- on node1
37: UPDATE ks1.tbl SET v0=true WHERE pk0 = '09:39:52.201668336' AND
pk1 = 0 AND ck0 = '-4629966-09-04'; -- on node1
38: nodetool flush ks1 tbl
39: UPDATE ks1.tbl SET v0=false, s1='갸' + '냢儓㹳붔즘' WHERE pk0 =
'14:39:04.020773177' AND pk1 = 32736763 AND ck0 = '-503020-09-13'; -- on node1
41: UPDATE ks1.tbl SET v0=true WHERE pk0 = '20:21:26.771172592' AND
pk1 = 129558081 AND ck0 = '-266604-10-11'; -- on node1
42: DELETE FROM ks1.tbl WHERE pk0 = '11:30:52.635751063' AND pk1 =
-2780431; -- on node1
46: DELETE FROM ks1.tbl WHERE pk0 = '14:39:04.020773177' AND pk1 =
32736763 AND ck0 = '-616256-09-24'; -- on node1
49: nodetool flush ks1 tbl
51: UPDATE ks1.tbl SET v0=false WHERE pk0 = '11:30:52.635751063' AND
pk1 = -2780431 AND ck0 = '-1357124-04-27'; -- on node1
53: INSERT INTO ks1.tbl (pk0, pk1, ck0, s0, s1, v0) VALUES
('20:21:26.771172592', 129558081, '-5463789-04-16', true, '樝' + '钊薾稙氞Œ?嬙ﴨ즡',
false); -- on node1
54: SELECT * FROM ks1.tbl WHERE s0 = true AND pk0 =
'23:15:13.897962392' ALLOW FILTERING; -- s0 boolean (indexed with SAI), pk0
time (indexed with SAI), on node1, fetch size 100
{code}
And the exception
{code}
Caused by: java.lang.AssertionError: Missing rows:
pk0 | pk1 | ck0 | s0 | s1 | v0
'23:15:13.897962392' | -2272 | '-1296648-01-08' | true | '뾕⌒籖鋿紞' | false
Expected:
pk0 | pk1 | ck0 | s0 | s1 | v0
'23:15:13.897962392' | -2272 | '-1296648-01-08' | true | '뾕⌒籖鋿紞' | false
'23:15:13.897962392' | -2272 | '-1306427-11-21' | true | '뾕⌒籖鋿紞' | true
{code}
I was able to simplify this history and produce a smaller repo
{code}
@Test
public void test() throws IOException
{
try (Cluster cluster = Cluster.build(1).start())
{
cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS ks WITH
replication = {'class': 'SimpleStrategy', 'replication_factor': 1};");
cluster.schemaChange("CREATE TABLE ks.tbl (\n" +
" pk0 int,\n" +
" pk1 int,\n" +
" ck0 int,\n" +
" s1 int static,\n" +
" v0 int,\n" +
" PRIMARY KEY ((pk0, pk1), ck0)\n" +
")");
cluster.schemaChange("CREATE INDEX tbl_pk0 ON ks.tbl(pk0) USING
'SAI';");
// cluster.schemaChange("CREATE INDEX tbl_v0 ON ks.tbl(v0) USING
'SAI';");
var node = cluster.get(1);
node.executeInternal("UPDATE ks.tbl SET s1=0, v0=0 WHERE pk0 = 0
AND pk1 = 1 AND ck0 = 0 ");
node.executeInternal("DELETE FROM ks.tbl WHERE pk0 = 0 AND pk1 =
1");
node.executeInternal("UPDATE ks.tbl SET v0=1 WHERE pk0 = 0 AND
pk1 = 1 AND ck0 = 1"); // expected result
node.nodetoolResult("flush", "ks", "tbl").asserts().success();
var qr = node.coordinator().executeWithResult("SELECT * FROM ks.tbl
WHERE v0 = 1 AND pk0 = 0 ALLOW FILTERING", ConsistencyLevel.ALL);
System.out.println("Results");
int found = 0;
while (qr.hasNext())
{
found++;
System.out.println(qr.next());
}
Assertions.assertThat(found).isEqualTo(1);
}
}
{code}
I couldn't find a way to shrink this test further. Static columns are not
touched, but removing causes the test to pass. Index on v0 doesn't matter even
though its part of the query... since the failing column is a partition column
I need to have the 2 partition columns else we can't index the column
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]