[ 
https://issues.apache.org/jira/browse/CASSANDRA-20238?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

David Capwell updated CASSANDRA-20238:
--------------------------------------
     Bug Category: Parent values: Correctness(12982)Level 1 values: Transient 
Incorrect Response(12987)
       Complexity: Normal
    Discovered By: Fuzz Test
    Fix Version/s: 5.0.x
         Severity: Critical
           Status: Open  (was: Triage Needed)

> SAI query missing data in single node
> -------------------------------------
>
>                 Key: CASSANDRA-20238
>                 URL: https://issues.apache.org/jira/browse/CASSANDRA-20238
>             Project: Apache Cassandra
>          Issue Type: Bug
>          Components: Feature/SAI
>            Reporter: David Capwell
>            Assignee: Caleb Rackliffe
>            Priority: Normal
>             Fix For: 5.0.x
>
>
> In CASSANDRA-20156 I have fuzz tests that cover multiple different types of 
> queries and hit an issue where an SAI query returned missing data; the test 
> was on a single node.
> To create the table/indexes
> {code}
> CREATE KEYSPACE IF NOT EXISTS ks1 WITH replication = {'class': 
> 'SimpleStrategy', 'replication_factor': 1};
> CREATE TABLE ks1.tbl (
>               pk0 time,
>               pk1 varint,
>               ck0 date,
>               s0 boolean static,
>               s1 text static,
>               v0 boolean,
>               PRIMARY KEY ((pk0, pk1), ck0)
> ) WITH CLUSTERING ORDER BY (ck0 ASC)
>               AND additional_write_policy = '99p'
>               AND allow_auto_snapshot = true
>               AND bloom_filter_fp_chance = 0.01
>               AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
>               AND cdc = false
>               AND comment = ''
>               AND compaction = {'class': 
> 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 
> 'max_threshold': '32', 'min_threshold': '4'}
>               AND compression = {'chunk_length_in_kb': '16', 'class': 
> 'org.apache.cassandra.io.compress.LZ4Compressor'}
>               AND memtable = 'default'
>               AND crc_check_chance = 1.0
>               AND fast_path = 'keyspace'
>               AND default_time_to_live = 0
>               AND extensions = {}
>               AND gc_grace_seconds = 864000
>               AND incremental_backups = true
>               AND max_index_interval = 2048
>               AND memtable_flush_period_in_ms = 0
>               AND min_index_interval = 128
>               AND read_repair = 'BLOCKING'
>               AND transactional_mode = 'off'
>               AND transactional_migration_from = 'none'
>               AND speculative_retry = '99p';
> CREATE INDEX tbl_pk0 ON ks1.tbl(pk0) USING 'SAI';
> CREATE CUSTOM INDEX tbl_pk1 ON ks1.tbl(pk1) USING 'StorageAttachedIndex';
> CREATE INDEX tbl_ck0 ON ks1.tbl(ck0) USING 'SAI';
> CREATE INDEX tbl_s0 ON ks1.tbl(s0) USING 'SAI';
> CREATE CUSTOM INDEX tbl_s1 ON ks1.tbl(s1) USING 'StorageAttachedIndex';
> CREATE CUSTOM INDEX tbl_v0 ON ks1.tbl(v0) USING 'StorageAttachedIndex';
> {code}
> The steps taken to hit the issue
> {code}
> History:
>       1: INSERT INTO ks1.tbl (pk0, pk1, ck0, s0, s1, v0) VALUES 
> ('23:15:13.897962392', -2272, '-1296648-01-08', false, 'ᕊଖꥬ㨢걲映㚃', false); -- 
> on node1
>       9: UPDATE ks1.tbl SET s0=true, s1='뾕⌒籖' + '鋿紞', v0=true WHERE  pk0 = 
> '23:15:13.897962392' AND  pk1 = -2272 AND  ck0 = '-1306427-11-21'; -- on node1
>       13: DELETE FROM ks1.tbl WHERE  pk0 = '04:38:30.720018316' AND  pk1 = 0; 
> -- on node1
>       14: UPDATE ks1.tbl SET v0=true WHERE  pk0 = '21:08:07.371523790' AND  
> pk1 = -113810779 AND  ck0 = '-3063364-12-03'; -- on node1
>       17: nodetool flush ks1 tbl
>       23: UPDATE ks1.tbl SET v0=true, s0=true, s1='낯훬' + '鰂' WHERE  pk0 = 
> '04:38:30.720018316' AND  pk1 = 0 AND  ck0 = '-4470111-11-16'; -- on node1
>       24: DELETE FROM ks1.tbl WHERE  pk0 = '11:30:52.635751063' AND  pk1 = 
> -2780431 AND  ck0 = '-3740052-12-21'; -- on node1
>       25: UPDATE ks1.tbl SET v0=true WHERE  pk0 = '09:39:52.201668336' AND  
> pk1 = 0 AND  ck0 = '-2926364-04-03'; -- on node1
>       37: UPDATE ks1.tbl SET v0=true WHERE  pk0 = '09:39:52.201668336' AND  
> pk1 = 0 AND  ck0 = '-4629966-09-04'; -- on node1
>       38: nodetool flush ks1 tbl
>       39: UPDATE ks1.tbl SET v0=false, s1='갸' + '냢儓㹳붔즘' WHERE  pk0 = 
> '14:39:04.020773177' AND  pk1 = 32736763 AND  ck0 = '-503020-09-13'; -- on 
> node1
>       41: UPDATE ks1.tbl SET v0=true WHERE  pk0 = '20:21:26.771172592' AND  
> pk1 = 129558081 AND  ck0 = '-266604-10-11'; -- on node1
>       42: DELETE FROM ks1.tbl WHERE  pk0 = '11:30:52.635751063' AND  pk1 = 
> -2780431; -- on node1
>       46: DELETE FROM ks1.tbl WHERE  pk0 = '14:39:04.020773177' AND  pk1 = 
> 32736763 AND  ck0 = '-616256-09-24'; -- on node1
>       49: nodetool flush ks1 tbl
>       51: UPDATE ks1.tbl SET v0=false WHERE  pk0 = '11:30:52.635751063' AND  
> pk1 = -2780431 AND  ck0 = '-1357124-04-27'; -- on node1
>       53: INSERT INTO ks1.tbl (pk0, pk1, ck0, s0, s1, v0) VALUES 
> ('20:21:26.771172592', 129558081, '-5463789-04-16', true, '樝' + '钊薾稙氞Œ?嬙ﴨ즡', 
> false); -- on node1
>       54: SELECT * FROM ks1.tbl WHERE s0 = true AND pk0 = 
> '23:15:13.897962392' ALLOW FILTERING; -- s0 boolean (indexed with SAI), pk0 
> time (indexed with SAI), on node1, fetch size 100
> {code}
> And the exception
> {code}
> Caused by: java.lang.AssertionError: Missing rows:
> pk0                  | pk1   | ck0              | s0   | s1      | v0
> '23:15:13.897962392' | -2272 | '-1296648-01-08' | true | '뾕⌒籖鋿紞' | false
> Expected:
> pk0                  | pk1   | ck0              | s0   | s1      | v0
> '23:15:13.897962392' | -2272 | '-1296648-01-08' | true | '뾕⌒籖鋿紞' | false
> '23:15:13.897962392' | -2272 | '-1306427-11-21' | true | '뾕⌒籖鋿紞' | true
> {code}
> I was able to simplify this history and produce a smaller repo
> {code}
>     @Test
>     public void test() throws IOException
>     {
>         try (Cluster cluster = Cluster.build(1).start())
>         {
>             cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS ks WITH 
> replication = {'class': 'SimpleStrategy', 'replication_factor': 1};");
>             cluster.schemaChange("CREATE TABLE ks.tbl (\n" +
>                                  "  pk0 int,\n" +
>                                  "  pk1 int,\n" +
>                                  "  ck0 int,\n" +
>                                  "  s1 int static,\n" +
>                                  "  v0 int,\n" +
>                                  "  PRIMARY KEY ((pk0, pk1), ck0)\n" +
>                                  ")");
>             cluster.schemaChange("CREATE INDEX tbl_pk0 ON ks.tbl(pk0) USING 
> 'SAI';");
> //            cluster.schemaChange("CREATE INDEX tbl_v0 ON ks.tbl(v0) USING 
> 'SAI';");
>             var node = cluster.get(1);
>             node.executeInternal("UPDATE ks.tbl SET s1=0, v0=0 WHERE  pk0 = 0 
> AND  pk1 = 1 AND  ck0 = 0 ");
>             node.executeInternal("DELETE FROM ks.tbl WHERE  pk0 = 0 AND  pk1 
> = 1");
>             node.executeInternal("UPDATE ks.tbl SET v0=1 WHERE  pk0 = 0 AND  
> pk1 = 1 AND  ck0 = 1"); // expected result
>             node.nodetoolResult("flush", "ks", "tbl").asserts().success();
>             var qr = node.coordinator().executeWithResult("SELECT * FROM 
> ks.tbl WHERE v0 = 1 AND pk0 = 0 ALLOW FILTERING", ConsistencyLevel.ALL);
>             System.out.println("Results");
>             int found = 0;
>             while (qr.hasNext())
>             {
>                 found++;
>                 System.out.println(qr.next());
>             }
>             Assertions.assertThat(found).isEqualTo(1);
>         }
>     }
> {code}
> I couldn't find a way to shrink this test further.  Static columns are not 
> touched, but removing causes the test to pass.  Index on v0 doesn't matter 
> even though its part of the query... since the failing column is a partition 
> column I need to have the 2 partition columns else we can't index the column



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to