Re: [PR] To improve accord interoperability test coverage, need to extend the harry model domain to handle more possible CQL states [cassandra]

via GitHub Mon, 27 Jan 2025 09:39:43 -0800


dcapwell commented on code in PR #3785:
URL: https://github.com/apache/cassandra/pull/3785#discussion_r1930946377



##########
test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java:
##########
@@ -0,0 +1,1289 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.harry.model;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableSet;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.function.IntFunction;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import javax.annotation.Nullable;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+import accord.utils.Invariants;
+import org.apache.cassandra.cql3.ast.Conditional;
+import org.apache.cassandra.cql3.ast.Element;
+import org.apache.cassandra.cql3.ast.Expression;
+import org.apache.cassandra.cql3.ast.ExpressionEvaluator;
+import org.apache.cassandra.cql3.ast.FunctionCall;
+import org.apache.cassandra.cql3.ast.Mutation;
+import org.apache.cassandra.cql3.ast.Select;
+import org.apache.cassandra.cql3.ast.Symbol;
+import org.apache.cassandra.db.BufferClustering;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.harry.util.StringUtils;
+import org.apache.cassandra.schema.TableMetadata;
+import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.cassandra.utils.ImmutableUniqueList;
+import org.apache.cassandra.utils.Pair;
+import org.apache.cassandra.utils.TableUtil;
+
+import static org.apache.cassandra.harry.model.BytesPartitionState.asCQL;
+
+public class ASTSingleTableModel
+{
+    public final BytesPartitionState.Factory factory;
+    private final TreeMap<BytesPartitionState.Ref, BytesPartitionState> 
partitions = new TreeMap<>();
+
+    public ASTSingleTableModel(TableMetadata metadata)
+    {
+        this.factory = new BytesPartitionState.Factory(metadata);
+    }
+
+    public NavigableSet<BytesPartitionState.Ref> partitionKeys()
+    {
+        return partitions.navigableKeySet();
+    }
+
+    public int size()
+    {
+        return partitions.size();
+    }
+
+    public boolean isEmpty()
+    {
+        return partitions.isEmpty();
+    }
+
+    public TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> 
index(Symbol symbol)
+    {
+        if (factory.pkPositions.contains(symbol))
+            return indexPartitionColumn(symbol);
+        if (factory.staticPositions.contains(symbol))
+            return indexStaticColumn(symbol);
+        return indexRowColumn(symbol);
+    }
+
+    private TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> 
indexPartitionColumn(Symbol symbol)
+    {
+        int offset = factory.pkPositions.indexOf(symbol);
+        TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> index = new 
TreeMap<>(symbol.type()::compare);
+        for (BytesPartitionState partition : partitions.values())
+        {
+            if (partition.isEmpty()) continue;
+            ByteBuffer bb = partition.key.bufferAt(offset);
+            List<BytesPartitionState.PrimaryKey> list = 
index.computeIfAbsent(bb, i -> new ArrayList<>());
+            for (BytesPartitionState.Row row : partition.rows())
+                list.add(row.ref());
+        }
+        return index;
+    }
+
+    private TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> 
indexStaticColumn(Symbol symbol)
+    {
+        TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> index = new 
TreeMap<>(symbol.type()::compare);
+        for (BytesPartitionState partition : partitions.values())
+        {
+            if (partition.isEmpty()) continue;
+            ByteBuffer bb = partition.staticRow().get(symbol);
+            if (bb == null)
+                continue;
+            List<BytesPartitionState.PrimaryKey> list = 
index.computeIfAbsent(bb, i -> new ArrayList<>());
+            for (BytesPartitionState.Row row : partition.rows())
+                list.add(row.ref());
+        }
+        return index;
+    }
+
+    private TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> 
indexRowColumn(Symbol symbol)
+    {
+        boolean clustering = factory.ckPositions.contains(symbol);
+        int offset = clustering ? factory.ckPositions.indexOf(symbol) : 
factory.regularPositions.indexOf(symbol);
+        TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> index = new 
TreeMap<>(symbol.type()::compare);
+        for (BytesPartitionState partition : partitions.values())
+        {
+            if (partition.isEmpty()) continue;
+            for (BytesPartitionState.Row row : partition.rows())
+            {
+                ByteBuffer bb = clustering ? row.clustering.bufferAt(offset) : 
row.get(offset);
+                if (bb == null)
+                    continue;
+                index.computeIfAbsent(bb, i -> new 
ArrayList<>()).add(row.ref());
+            }
+        }
+        return index;
+    }
+
+    public void update(Mutation mutation)
+    {
+        switch (mutation.kind)
+        {
+            case INSERT:
+                update((Mutation.Insert) mutation);
+                break;
+            case UPDATE:
+                update((Mutation.Update) mutation);
+                break;
+            case DELETE:
+                update((Mutation.Delete) mutation);
+                break;
+            default:
+                throw new UnsupportedOperationException(mutation.kind.name());
+        }
+    }
+
+    public void update(Mutation.Insert insert)
+    {
+        Clustering<ByteBuffer> pd = pd(insert);
+        BytesPartitionState partition = partitions.get(factory.createRef(pd));
+        if (partition == null)
+        {
+            partition = factory.create(pd);
+            partitions.put(partition.ref(), partition);
+        }
+        Map<Symbol, Expression> values = insert.values;
+        if (!factory.staticPositions.isEmpty() && 
!Sets.intersection(factory.staticPositions.asSet(), values.keySet()).isEmpty())
+        {
+            // static columns to add in.  If we are doing something like += to 
a row that doesn't exist, we still update statics...
+            Map<Symbol, ByteBuffer> write = new HashMap<>();
+            for (Symbol col : 
Sets.intersection(factory.staticPositions.asSet(), values.keySet()))
+                write.put(col, eval(values.get(col)));
+            partition.setStaticColumns(write);
+        }
+        Map<Symbol, ByteBuffer> write = new HashMap<>();
+        for (Symbol col : Sets.intersection(factory.regularPositions.asSet(), 
values.keySet()))
+            write.put(col, eval(values.get(col)));
+        partition.setColumns(key(insert.values, factory.ckPositions),
+                             write,
+                             true);
+    }
+
+    public void update(Mutation.Update update)
+    {
+        var split = splitOnPartition(update.where.simplify());
+        List<Clustering<ByteBuffer>> pks = split.left;
+        List<Conditional> remaining = split.right;
+        for (Clustering<ByteBuffer> pd : pks)
+        {
+            BytesPartitionState partition = 
partitions.get(factory.createRef(pd));
+            if (partition == null)
+            {
+                partition = factory.create(pd);
+                partitions.put(partition.ref(), partition);
+            }
+            Map<Symbol, Expression> set = update.set;
+            if (!factory.staticPositions.isEmpty() && 
!Sets.intersection(factory.staticPositions.asSet(), set.keySet()).isEmpty())
+            {
+                // static columns to add in.  If we are doing something like 
+= to a row that doesn't exist, we still update statics...
+                Map<Symbol, ByteBuffer> write = new HashMap<>();
+                for (Symbol col : 
Sets.intersection(factory.staticPositions.asSet(), set.keySet()))
+                    write.put(col, eval(set.get(col)));
+                partition.setStaticColumns(write);
+            }
+            for (Clustering<ByteBuffer> cd : clustering(remaining))
+            {
+                Map<Symbol, ByteBuffer> write = new HashMap<>();
+                for (Symbol col : 
Sets.intersection(factory.regularPositions.asSet(), set.keySet()))
+                    write.put(col, eval(set.get(col)));
+
+                partition.setColumns(cd, write, false);
+            }
+        }
+    }
+
+    private enum DeleteKind
+    {PARTITION, ROW, COLUMN}
+
+    public void update(Mutation.Delete delete)
+    {
+        //TODO (coverage): range deletes
+        var split = splitOnPartition(delete.where.simplify());
+        List<Clustering<ByteBuffer>> pks = split.left;
+        List<Clustering<ByteBuffer>> clusterings = split.right.isEmpty() ? 
Collections.emptyList() : clustering(split.right);
+        HashSet<Symbol> columns = delete.columns.isEmpty() ? null : new 
HashSet<>(delete.columns);
+        for (Clustering<ByteBuffer> pd : pks)
+        {
+            BytesPartitionState partition = 
partitions.get(factory.createRef(pd));
+            if (partition == null) return; // can't delete a partition that 
doesn't exist...
+
+            DeleteKind kind = DeleteKind.PARTITION;
+            if (!delete.columns.isEmpty())
+                kind = DeleteKind.COLUMN;
+            else if (!clusterings.isEmpty())
+                kind = DeleteKind.ROW;
+
+            switch (kind)
+            {
+                case PARTITION:
+                    partitions.remove(partition.ref());
+                    break;
+                case ROW:
+                    for (Clustering<ByteBuffer> cd : clusterings)
+                    {
+                        partition.deleteRow(cd);
+                        if (partition.shouldDelete())
+                            partitions.remove(partition.ref());
+                    }
+                    break;
+                case COLUMN:
+                    if (clusterings.isEmpty())
+                    {
+                        partition.deleteStaticColumns(columns);
+                    }
+                    else
+                    {
+                        for (Clustering<ByteBuffer> cd : clusterings)
+                        {
+                            partition.deleteColumns(cd, columns);
+                            if (partition.shouldDelete())
+                                partitions.remove(partition.ref());
+                        }
+                    }
+                    break;
+//                case SLICE:
+//                case RANGE:
+                default:
+                    throw new UnsupportedOperationException();
+            }
+        }
+    }
+
+    private List<Clustering<ByteBuffer>> clustering(List<Conditional> 
conditionals)
+    {
+        if (conditionals.isEmpty())
+        {
+            if (factory.ckPositions.isEmpty()) return 
Collections.singletonList(Clustering.EMPTY);
+            throw new IllegalArgumentException("No clustering columns defined 
in the WHERE clause, but clustering columns exist; expected " + 
factory.ckPositions);
+        }
+        var split = splitOnClustering(conditionals);
+        var clusterings = split.left;
+        var remaining = split.right;
+        if (!remaining.isEmpty())
+            throw new IllegalArgumentException("Non Partition/Clustering 
columns found in WHERE clause; " + 
remaining.stream().map(Element::toCQL).collect(Collectors.joining(", ")));
+        return clusterings;
+    }
+
+    private Pair<List<Clustering<ByteBuffer>>, List<Conditional>> 
splitOnPartition(List<Conditional> conditionals)
+    {
+        return splitOn(factory.pkPositions.asSet(), conditionals);
+    }
+
+    private Pair<List<Clustering<ByteBuffer>>, List<Conditional>> 
splitOnClustering(List<Conditional> conditionals)
+    {
+        return splitOn(factory.ckPositions.asSet(), conditionals);
+    }
+
+    private Pair<List<Clustering<ByteBuffer>>, List<Conditional>> 
splitOn(ImmutableUniqueList<Symbol>.AsSet columns, List<Conditional> 
conditionals)
+    {
+        // pk requires equality
+        Map<Symbol, Set<ByteBuffer>> pks = new HashMap<>();
+        List<Conditional> other = new ArrayList<>();
+        for (Conditional c : conditionals)
+        {
+            if (c instanceof Conditional.Where)
+            {
+                Conditional.Where w = (Conditional.Where) c;
+                if (w.kind == Conditional.Where.Inequality.EQUAL && 
columns.contains(w.lhs))
+                {
+                    Symbol col = (Symbol) w.lhs;
+                    ByteBuffer bb = eval(w.rhs);
+                    if (pks.containsKey(col))
+                        throw new IllegalArgumentException("Partition column " 
+ col + " was defined multiple times in the WHERE clause");
+                    pks.put(col, Collections.singleton(bb));
+                }
+                else
+                {
+                    other.add(c);
+                }
+            }
+            else if (c instanceof Conditional.In)
+            {
+                Conditional.In i = (Conditional.In) c;
+                if (columns.contains(i.ref))
+                {
+                    Symbol col = (Symbol) i.ref;
+                    if (pks.containsKey(col))
+                        throw new IllegalArgumentException("Partition column " 
+ col + " was defined multiple times in the WHERE clause");
+                    var set = 
i.expressions.stream().map(ASTSingleTableModel::eval).collect(Collectors.toSet());
+                    pks.put(col, set);
+                }
+                else
+                {
+                    other.add(c);
+                }
+            }
+            else
+            {
+                other.add(c);
+            }
+        }
+        if (!columns.equals(pks.keySet()))
+        {
+            var missing = Sets.difference(columns, pks.keySet());
+            throw new AssertionError("Unable to find expected columns " + 
missing);
+        }
+
+        List<Clustering<ByteBuffer>> partitionKeys = keys(columns, pks);
+        return Pair.create(partitionKeys, other);
+    }
+
+    private List<Clustering<ByteBuffer>> keys(Collection<Symbol> columns, 
Map<Symbol, Set<ByteBuffer>> pks)
+    {
+        //TODO (coverage): handle IN
+        ByteBuffer[] bbs = new ByteBuffer[columns.size()];
+        int idx = 0;
+        for (Symbol s : columns)
+        {
+            Set<ByteBuffer> values = pks.get(s);
+            if (values.size() > 1)
+                throw new UnsupportedOperationException("IN clause is 
currently unsupported... its on the backlog!");
+            bbs[idx++] = Iterables.getFirst(values, null);
+        }
+        return Collections.singletonList(BufferClustering.make(bbs));
+    }
+
+    private Clustering<ByteBuffer> pd(Mutation.Insert mutation)
+    {
+        return key(mutation.values, factory.pkPositions);
+    }
+
+    public BytesPartitionState get(BytesPartitionState.Ref ref)
+    {
+        return partitions.get(ref);
+    }
+
+    public List<BytesPartitionState> getByToken(long token)

Review Comment:
   i added for debugger support but i can just create the token directly, so 
don't need to add this to the public api



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] To improve accord interoperability test coverage, need to extend the harry model domain to handle more possible CQL states [cassandra]

Reply via email to