belliottsmith commented on code in PR #6:
URL: https://github.com/apache/cassandra-accord/pull/6#discussion_r942385285
##########
accord-core/src/main/java/accord/primitives/Deps.java:
##########
@@ -0,0 +1,978 @@
+package accord.primitives;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiConsumer;
+import java.util.function.Consumer;
+import java.util.function.Function;
+import java.util.function.Predicate;
+
+import com.google.common.base.Preconditions;
+
+import accord.api.Key;
+import accord.local.Command;
+import accord.local.CommandStore;
+import accord.utils.InlineHeap;
+import accord.utils.SortedArrays;
+
+import static accord.utils.SortedArrays.remap;
+import static accord.utils.SortedArrays.remapper;
+
+// TODO (now): switch to RoutingKey
+public class Deps implements Iterable<Map.Entry<Key, TxnId>>
+{
+ private static final TxnId[] NO_TXNIDS = new TxnId[0];
+ private static final int[] NO_INTS = new int[0];
+ public static final Deps NONE = new Deps(Keys.EMPTY, NO_TXNIDS, NO_INTS);
+
+ public static class Builder
+ {
+ final Keys keys;
+ final Map<TxnId, Integer> txnIdLookup = new HashMap<>(); // TODO:
primitive map
+ TxnId[] txnIds = new TxnId[4];
+ final int[][] keysToTxnId;
+ final int[] keysToTxnIdCounts;
+
+ public Builder(Keys keys)
+ {
+ this.keys = keys;
+ this.keysToTxnId = new int[keys.size()][4];
+ this.keysToTxnIdCounts = new int[keys.size()];
+ }
+
+ public boolean isEmpty()
+ {
+ return Arrays.stream(keysToTxnIdCounts).allMatch(i -> i == 0);
+ }
+
+ public void add(Command command)
+ {
+ int idx = ensureTxnIdx(command.txnId());
+ keys.foldlIntersect(command.txn().keys, (li, ri, k, p, v) -> {
+ if (keysToTxnId[li].length == keysToTxnIdCounts[li])
+ keysToTxnId[li] = Arrays.copyOf(keysToTxnId[li],
keysToTxnId[li].length * 2);
+ keysToTxnId[li][keysToTxnIdCounts[li]++] = idx;
+ return 0;
+ }, 0, 0, 1);
+ }
+
+ public void add(Key key, TxnId txnId)
+ {
+ int txnIdx = ensureTxnIdx(txnId);
+ int keyIdx = keys.indexOf(key);
+ if (keysToTxnIdCounts[keyIdx] == keysToTxnId[keyIdx].length)
+ keysToTxnId[keyIdx] = Arrays.copyOf(keysToTxnId[keyIdx],
Math.max(4, keysToTxnIdCounts[keyIdx] * 2));
+ keysToTxnId[keyIdx][keysToTxnIdCounts[keyIdx]++] = txnIdx;
+ }
+
+ public boolean contains(TxnId txnId)
+ {
+ return txnIdx(txnId) >= 0;
+ }
+
+ private int txnIdx(TxnId txnId)
+ {
+ return txnIdLookup.getOrDefault(txnId, -1);
+ }
+
+ private int ensureTxnIdx(TxnId txnId)
+ {
+ return txnIdLookup.computeIfAbsent(txnId, ignore -> {
+ if (txnIds.length == txnIdLookup.size())
+ txnIds = Arrays.copyOf(txnIds, txnIds.length * 2);
+ return txnIdLookup.size();
+ });
+ }
+
+ public Deps build()
+ {
+ TxnId[] txnIds = txnIdLookup.keySet().toArray(TxnId[]::new);
+ Arrays.sort(txnIds, TxnId::compareTo);
+ int[] txnIdMap = new int[txnIds.length];
+ for (int i = 0 ; i < txnIdMap.length ; i++)
+ txnIdMap[txnIdLookup.get(txnIds[i])] = i;
+
+ int keyCount = 0;
+ int[] result; {
+ int count = 0;
+ for (int i = 0 ; i < keys.size() ; ++i)
+ {
+ keyCount += keysToTxnIdCounts[i] > 0 ? 1 : 0;
+ count += keysToTxnIdCounts[i];
+ }
+ result = new int[count + keyCount];
+ }
+
+ int keyIndex = 0;
+ int offset = keyCount;
+ for (int i = 0 ; i < keys.size() ; ++i)
+ {
+ if (keysToTxnIdCounts[i] > 0)
+ {
+ int count = keysToTxnIdCounts[i];
+ int[] src = keysToTxnId[i];
+ for (int j = 0 ; j < count ; ++j)
+ result[j + offset] = txnIdMap[src[j]];
+ Arrays.sort(result, offset, count + offset);
+ int dups = 0;
+ for (int j = offset + 1 ; j < offset + count ; ++j)
+ {
+ if (result[j] == result[j - 1]) ++dups;
+ else if (dups > 0) result[j - dups] = result[j];
+ }
+ result[keyIndex] = offset += count - dups;
+ ++keyIndex;
+ }
+ }
+ if (offset < result.length)
+ result = Arrays.copyOf(result, offset);
+
+ Keys keys = this.keys;
+ if (keyCount < keys.size())
+ {
+ keyIndex = 0;
+ Key[] newKeys = new Key[keyCount];
+ for (int i = 0 ; i < keys.size() ; ++i)
+ {
+ if (keysToTxnIdCounts[i] > 0)
+ newKeys[keyIndex++] = keys.get(i);
+ }
+ keys = new Keys(newKeys);
+ }
+
+ return new Deps(keys, txnIds, result);
+ }
+ }
+
+ public static Builder builder(Keys keys)
+ {
+ return new Builder(keys);
+ }
+
+ static class MergeStream
+ {
+ final Deps source;
+ // TODO: could share backing array for all of these if we want, with
an additional offset
+ final int[] input;
+ final int keyCount;
+ int[] remap; // TODO: use cached backing array
+ int[] keys; // TODO: use cached backing array
+ int keyIndex;
+ int index;
+ int endIndex;
+
+ MergeStream(Deps source)
+ {
+ this.source = source;
+ this.input = source.keyToTxnId;
+ this.keyCount = source.keys.size();
+ }
+
+ private void init(Keys keys, TxnId[] txnIds)
+ {
+ this.remap = remapper(source.txnIds, txnIds, true);
+ this.keys = source.keys.remapper(keys, true);
+ while (input[keyIndex] == keyCount)
+ ++keyIndex;
+ this.index = keyCount;
+ this.endIndex = input[keyIndex];
+ }
+ }
+
+ public static <T> Deps merge(Keys keys, List<T> merge, Function<T, Deps>
getter)
Review Comment:
> my tests bias towards large number of keys (200+) but when smaller there
really isn't a difference between the two.
Interesting. I just checked out your PR and made some tweaks to improve
`remapper`, use `MergeIterator` and cache `int[]`, and while the performance is
relatively stable for large numbers of keys (I haven't compared for small
numbers of keys), I see a performance delta that is much larger between the two
approaches for smaller numbers of keys.
```
13:22:43.148 [main] INFO accord.txn.DepsTest - Benchmark[name=merge(No.=2,
keys=7, size=904)] avg 7.65ms, 0.21ms GC, allocated 594 MiB
13:22:45.846 [main] INFO accord.txn.DepsTest - Benchmark[name=builder
forEach(No.=2, keys=7, size=904)] avg 63.79ms, 0.22ms GC, allocated 3289 MiB
13:22:45.872 [main] INFO accord.txn.DepsTest - Benchmark[name=merge(No.=1,
keys=14, size=149)] avg 0.06ms, 0.17ms GC, allocated 4 MiB
13:22:46.145 [main] INFO accord.txn.DepsTest - Benchmark[name=builder
forEach(No.=1, keys=14, size=149)] avg 6.13ms, 0.21ms GC, allocated 340 MiB
13:22:57.425 [main] INFO accord.txn.DepsTest - Benchmark[name=merge(No.=11,
keys=21, size=3863)] avg 249.98ms, 0.29ms GC, allocated 3297 MiB
13:23:12.559 [main] INFO accord.txn.DepsTest - Benchmark[name=builder
forEach(No.=11, keys=21, size=3863)] avg 359.93ms, 0.23ms GC, allocated 11533
MiB
13:23:15.202 [main] INFO accord.txn.DepsTest - Benchmark[name=merge(No.=3,
keys=13, size=1512)] avg 56.32ms, 0.23ms GC, allocated 1400 MiB
13:23:21.237 [main] INFO accord.txn.DepsTest - Benchmark[name=builder
forEach(No.=3, keys=13, size=1512)] avg 143.27ms, 0.22ms GC, allocated 5531 MiB
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]