David Capwell created CASSANDRA-20238:
-----------------------------------------

             Summary: SAI query missing data in single node
                 Key: CASSANDRA-20238
                 URL: https://issues.apache.org/jira/browse/CASSANDRA-20238
             Project: Apache Cassandra
          Issue Type: Bug
          Components: Feature/SAI
            Reporter: David Capwell
            Assignee: Caleb Rackliffe


In CASSANDRA-20156 I have fuzz tests that cover multiple different types of 
queries and hit an issue where an SAI query returned missing data; the test was 
on a single node.

To create the table/indexes

{code}
CREATE KEYSPACE IF NOT EXISTS ks1 WITH replication = {'class': 
'SimpleStrategy', 'replication_factor': 1};
CREATE TABLE ks1.tbl (
                pk0 time,
                pk1 varint,
                ck0 date,
                s0 boolean static,
                s1 text static,
                v0 boolean,
                PRIMARY KEY ((pk0, pk1), ck0)
) WITH CLUSTERING ORDER BY (ck0 ASC)
                AND additional_write_policy = '99p'
                AND allow_auto_snapshot = true
                AND bloom_filter_fp_chance = 0.01
                AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
                AND cdc = false
                AND comment = ''
                AND compaction = {'class': 
'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 
'max_threshold': '32', 'min_threshold': '4'}
                AND compression = {'chunk_length_in_kb': '16', 'class': 
'org.apache.cassandra.io.compress.LZ4Compressor'}
                AND memtable = 'default'
                AND crc_check_chance = 1.0
                AND fast_path = 'keyspace'
                AND default_time_to_live = 0
                AND extensions = {}
                AND gc_grace_seconds = 864000
                AND incremental_backups = true
                AND max_index_interval = 2048
                AND memtable_flush_period_in_ms = 0
                AND min_index_interval = 128
                AND read_repair = 'BLOCKING'
                AND transactional_mode = 'off'
                AND transactional_migration_from = 'none'
                AND speculative_retry = '99p';
CREATE INDEX tbl_pk0 ON ks1.tbl(pk0) USING 'SAI';
CREATE CUSTOM INDEX tbl_pk1 ON ks1.tbl(pk1) USING 'StorageAttachedIndex';
CREATE INDEX tbl_ck0 ON ks1.tbl(ck0) USING 'SAI';
CREATE INDEX tbl_s0 ON ks1.tbl(s0) USING 'SAI';
CREATE CUSTOM INDEX tbl_s1 ON ks1.tbl(s1) USING 'StorageAttachedIndex';
CREATE CUSTOM INDEX tbl_v0 ON ks1.tbl(v0) USING 'StorageAttachedIndex';
{code}

The steps taken to hit the issue

{code}
History:
        1: INSERT INTO ks1.tbl (pk0, pk1, ck0, s0, s1, v0) VALUES 
('23:15:13.897962392', -2272, '-1296648-01-08', false, 'ᕊଖꥬ㨢걲映㚃', false); -- on 
node1
        9: UPDATE ks1.tbl SET s0=true, s1='뾕⌒籖' + '鋿紞', v0=true WHERE  pk0 = 
'23:15:13.897962392' AND  pk1 = -2272 AND  ck0 = '-1306427-11-21'; -- on node1
        13: DELETE FROM ks1.tbl WHERE  pk0 = '04:38:30.720018316' AND  pk1 = 0; 
-- on node1
        14: UPDATE ks1.tbl SET v0=true WHERE  pk0 = '21:08:07.371523790' AND  
pk1 = -113810779 AND  ck0 = '-3063364-12-03'; -- on node1
        17: nodetool flush ks1 tbl
        23: UPDATE ks1.tbl SET v0=true, s0=true, s1='낯훬' + '鰂' WHERE  pk0 = 
'04:38:30.720018316' AND  pk1 = 0 AND  ck0 = '-4470111-11-16'; -- on node1
        24: DELETE FROM ks1.tbl WHERE  pk0 = '11:30:52.635751063' AND  pk1 = 
-2780431 AND  ck0 = '-3740052-12-21'; -- on node1
        25: UPDATE ks1.tbl SET v0=true WHERE  pk0 = '09:39:52.201668336' AND  
pk1 = 0 AND  ck0 = '-2926364-04-03'; -- on node1
        37: UPDATE ks1.tbl SET v0=true WHERE  pk0 = '09:39:52.201668336' AND  
pk1 = 0 AND  ck0 = '-4629966-09-04'; -- on node1
        38: nodetool flush ks1 tbl
        39: UPDATE ks1.tbl SET v0=false, s1='갸' + '냢儓㹳붔즘' WHERE  pk0 = 
'14:39:04.020773177' AND  pk1 = 32736763 AND  ck0 = '-503020-09-13'; -- on node1
        41: UPDATE ks1.tbl SET v0=true WHERE  pk0 = '20:21:26.771172592' AND  
pk1 = 129558081 AND  ck0 = '-266604-10-11'; -- on node1
        42: DELETE FROM ks1.tbl WHERE  pk0 = '11:30:52.635751063' AND  pk1 = 
-2780431; -- on node1
        46: DELETE FROM ks1.tbl WHERE  pk0 = '14:39:04.020773177' AND  pk1 = 
32736763 AND  ck0 = '-616256-09-24'; -- on node1
        49: nodetool flush ks1 tbl
        51: UPDATE ks1.tbl SET v0=false WHERE  pk0 = '11:30:52.635751063' AND  
pk1 = -2780431 AND  ck0 = '-1357124-04-27'; -- on node1
        53: INSERT INTO ks1.tbl (pk0, pk1, ck0, s0, s1, v0) VALUES 
('20:21:26.771172592', 129558081, '-5463789-04-16', true, '樝' + '钊薾稙氞Œ?嬙ﴨ즡', 
false); -- on node1
        54: SELECT * FROM ks1.tbl WHERE s0 = true AND pk0 = 
'23:15:13.897962392' ALLOW FILTERING; -- s0 boolean (indexed with SAI), pk0 
time (indexed with SAI), on node1, fetch size 100
{code}

And the exception

{code}
Caused by: java.lang.AssertionError: Missing rows:
pk0                  | pk1   | ck0              | s0   | s1      | v0
'23:15:13.897962392' | -2272 | '-1296648-01-08' | true | '뾕⌒籖鋿紞' | false

Expected:
pk0                  | pk1   | ck0              | s0   | s1      | v0
'23:15:13.897962392' | -2272 | '-1296648-01-08' | true | '뾕⌒籖鋿紞' | false
'23:15:13.897962392' | -2272 | '-1306427-11-21' | true | '뾕⌒籖鋿紞' | true
{code}

I was able to simplify this history and produce a smaller repo

{code}
    @Test
    public void test() throws IOException
    {
        try (Cluster cluster = Cluster.build(1).start())
        {
            cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS ks WITH 
replication = {'class': 'SimpleStrategy', 'replication_factor': 1};");
            cluster.schemaChange("CREATE TABLE ks.tbl (\n" +
                                 "  pk0 int,\n" +
                                 "  pk1 int,\n" +
                                 "  ck0 int,\n" +
                                 "  s1 int static,\n" +
                                 "  v0 int,\n" +
                                 "  PRIMARY KEY ((pk0, pk1), ck0)\n" +
                                 ")");
            cluster.schemaChange("CREATE INDEX tbl_pk0 ON ks.tbl(pk0) USING 
'SAI';");
//            cluster.schemaChange("CREATE INDEX tbl_v0 ON ks.tbl(v0) USING 
'SAI';");

            var node = cluster.get(1);
            node.executeInternal("UPDATE ks.tbl SET s1=0, v0=0 WHERE  pk0 = 0 
AND  pk1 = 1 AND  ck0 = 0 ");
            node.executeInternal("DELETE FROM ks.tbl WHERE  pk0 = 0 AND  pk1 = 
1");
            node.executeInternal("UPDATE ks.tbl SET v0=1 WHERE  pk0 = 0 AND  
pk1 = 1 AND  ck0 = 1"); // expected result
            node.nodetoolResult("flush", "ks", "tbl").asserts().success();

            var qr = node.coordinator().executeWithResult("SELECT * FROM ks.tbl 
WHERE v0 = 1 AND pk0 = 0 ALLOW FILTERING", ConsistencyLevel.ALL);
            System.out.println("Results");
            int found = 0;
            while (qr.hasNext())
            {
                found++;
                System.out.println(qr.next());
            }
            Assertions.assertThat(found).isEqualTo(1);
        }
    }
{code}

I couldn't find a way to shrink this test further.  Static columns are not 
touched, but removing causes the test to pass.  Index on v0 doesn't matter even 
though its part of the query... since the failing column is a partition column 
I need to have the 2 partition columns else we can't index the column



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to