dcapwell commented on code in PR #6:
URL: https://github.com/apache/cassandra-accord/pull/6#discussion_r943748108
##########
accord-core/src/main/java/accord/utils/SortedArrays.java:
##########
@@ -0,0 +1,535 @@
+package accord.utils;
+
+import java.util.Arrays;
+import java.util.function.IntFunction;
+
+import org.apache.cassandra.utils.concurrent.Inline;
+
+import static java.util.Arrays.*;
+
+public class SortedArrays
+{
+ /**
+ * Given two sorted arrays, return an array containing the elements
present in either, preferentially returning one
+ * of the inputs if it contains all elements of the other.
+ *
+ * TODO: introduce exponential search optimised version
+ */
+ public static <T extends Comparable<? super T>> T[] linearUnion(T[] left,
T[] right, IntFunction<T[]> allocate)
+ {
+ int leftIdx = 0;
+ int rightIdx = 0;
+
+ T[] result = null;
+ int resultSize = 0;
+
+ // first, pick the superset candidate and merge the two until we find
the first missing item
+ // if none found, return the superset candidate
+ if (left.length >= right.length)
+ {
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp <= 0)
+ {
+ leftIdx += 1;
+ rightIdx += cmp == 0 ? 1 : 0;
+ }
+ else
+ {
+ resultSize = leftIdx;
+ result = allocate.apply(resultSize + (left.length -
leftIdx) + (right.length - (rightIdx - 1)));
+ System.arraycopy(left, 0, result, 0, resultSize);
+ result[resultSize++] = right[rightIdx++];
+ break;
+ }
+ }
+
+ if (result == null)
+ {
+ if (rightIdx == right.length)
+ return left;
+ result = allocate.apply(left.length + (right.length -
rightIdx));
+ resultSize = leftIdx;
+ System.arraycopy(left, 0, result, 0, resultSize);
+ }
+ }
+ else
+ {
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp >= 0)
+ {
+ rightIdx += 1;
+ leftIdx += cmp == 0 ? 1 : 0;
+ }
+ else
+ {
+ resultSize = rightIdx;
+ result = allocate.apply(resultSize + (left.length -
(leftIdx - 1)) + (right.length - rightIdx));
+ System.arraycopy(right, 0, result, 0, resultSize);
+ result[resultSize++] = left[leftIdx++];
+ break;
+ }
+ }
+
+ if (result == null)
+ {
+ if (leftIdx == left.length)
+ return right;
+ result = allocate.apply(right.length + (left.length -
leftIdx));
+ resultSize = rightIdx;
+ System.arraycopy(right, 0, result, 0, resultSize);
+ }
+ }
+
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ T leftKey = left[leftIdx];
+ T rightKey = right[rightIdx];
+ int cmp = leftKey.compareTo(rightKey);
+ T minKey;
+ if (cmp == 0)
+ {
+ leftIdx++;
+ rightIdx++;
+ minKey = leftKey;
+ }
+ else if (cmp < 0)
+ {
+ leftIdx++;
+ minKey = leftKey;
+ }
+ else
+ {
+ rightIdx++;
+ minKey = rightKey;
+ }
+ result[resultSize++] = minKey;
+ }
+
+ while (leftIdx < left.length)
+ result[resultSize++] = left[leftIdx++];
+
+ while (rightIdx < right.length)
+ result[resultSize++] = right[rightIdx++];
+
+ if (resultSize < result.length)
+ result = copyOf(result, resultSize);
+
+ return result;
+ }
+
+ /**
+ * Given two sorted arrays, return the elements present only in both,
preferentially returning one of the inputs if
+ * it contains no elements not present in the other.
+ *
+ * TODO: introduce exponential search optimised version
+ */
+ @SuppressWarnings("unused") // was used until recently, might be used
again?
+ public static <T extends Comparable<? super T>> T[] linearIntersection(T[]
left, T[] right, IntFunction<T[]> allocate)
+ {
+ int leftIdx = 0;
+ int rightIdx = 0;
+
+ T[] result = null;
+ int resultSize = 0;
+
+ // first pick a subset candidate, and merge both until we encounter an
element not present in the other array
+ if (left.length <= right.length)
+ {
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp >= 0)
+ {
+ rightIdx += 1;
+ leftIdx += cmp == 0 ? 1 : 0;
+ }
+ else
+ {
+ resultSize = leftIdx++;
+ result = allocate.apply(resultSize + Math.min(left.length
- leftIdx, right.length - rightIdx));
+ System.arraycopy(left, 0, result, 0, resultSize);
+ break;
+ }
+ }
+
+ if (result == null)
+ return left;
+ }
+ else
+ {
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp <= 0)
+ {
+ leftIdx += 1;
+ rightIdx += cmp == 0 ? 1 : 0;
+ }
+ else
+ {
+ resultSize = rightIdx++;
+ result = allocate.apply(resultSize + Math.min(left.length
- leftIdx, right.length - rightIdx));
+ System.arraycopy(right, 0, result, 0, resultSize);
+ break;
+ }
+ }
+
+ if (result == null)
+ return right;
+ }
+
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ T leftKey = left[leftIdx];
+ int cmp = leftKey.compareTo(right[rightIdx]);
+ if (cmp == 0)
+ {
+ leftIdx++;
+ rightIdx++;
+ result[resultSize++] = leftKey;
+ }
+ else
+ {
+ if (cmp < 0) leftIdx++;
+ else rightIdx++;
+ }
+ }
+
+ if (resultSize < result.length)
+ result = Arrays.copyOf(result, resultSize);
+
+ return result;
+ }
+
+ /**
+ * Given two sorted arrays, return the elements present only in the first,
preferentially returning the first array
+ * itself if possible
+ */
+ @SuppressWarnings("unused") // was used until recently, might be used
again?
+ public static <T extends Comparable<? super T>> T[] linearDifference(T[]
left, T[] right, IntFunction<T[]> allocate)
+ {
+ int rightIdx = 0;
+ int leftIdx = 0;
+
+ T[] result = null;
+ int resultSize = 0;
+
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp == 0)
+ {
+ resultSize = leftIdx++;
+ ++rightIdx;
+ result = allocate.apply(resultSize + left.length - leftIdx);
+ System.arraycopy(left, 0, result, 0, resultSize);
+ break;
+ }
+ else if (cmp < 0)
+ {
+ ++leftIdx;
+ }
+ else
+ {
+ ++rightIdx;
+ }
+ }
+
+ if (result == null)
+ return left;
+
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp > 0)
+ {
+ result[resultSize++] = left[leftIdx++];
+ }
+ else if (cmp < 0)
+ {
+ ++rightIdx;
+ }
+ else
+ {
+ ++leftIdx;
+ ++rightIdx;
+ }
+ }
+
+ if (resultSize < result.length)
+ result = Arrays.copyOf(result, resultSize);
+
+ return result;
+ }
+
+ public static <A, R> A[] sliceWithOverlaps(A[] slice, R[] select,
IntFunction<A[]> factory, AsymmetricComparator<A, R> cmp1,
AsymmetricComparator<R, A> cmp2)
+ {
+ A[] result;
+ int resultCount;
+ int ai = 0, ri = 0;
+ while (true)
+ {
+ long ari = findNextIntersection(slice, ai, select, ri, cmp1, cmp2,
Search.CEIL);
+ if (ari < 0)
+ {
+ if (ai == slice.length)
+ return slice;
+
+ return Arrays.copyOf(slice, ai);
+ }
+
+ int nextai = (int)(ari >>> 32);
+ if (ai != nextai)
+ {
+ resultCount = ai;
+ result = factory.apply(ai + (slice.length - nextai));
+ System.arraycopy(slice, 0, result, 0, resultCount);
+ ai = nextai;
Review Comment:
why don't we update `ri`? I am in a debugger trying to see the reason and
not seeing a clear reason, setting `ri = (int)ari;` is passing in my tests.
The side effect I see by not setting it is that
```
int nextai = exponentialSearch(slice, ai, slice.length,
select[ri], cmp2, Search.FLOOR) + 1;
while (ai < nextai)
result[resultCount++] = slice[ai++];
```
is always a wasted operation on the first call, this is because `ri` wasn't
updated so we *know* a new `ri` is the next match so we search for a match that
doesn't exist.
##########
accord-core/src/main/java/accord/utils/SortedArrays.java:
##########
@@ -0,0 +1,535 @@
+package accord.utils;
+
+import java.util.Arrays;
+import java.util.function.IntFunction;
+
+import org.apache.cassandra.utils.concurrent.Inline;
+
+import static java.util.Arrays.*;
+
+public class SortedArrays
+{
+ /**
+ * Given two sorted arrays, return an array containing the elements
present in either, preferentially returning one
+ * of the inputs if it contains all elements of the other.
+ *
+ * TODO: introduce exponential search optimised version
+ */
+ public static <T extends Comparable<? super T>> T[] linearUnion(T[] left,
T[] right, IntFunction<T[]> allocate)
+ {
+ int leftIdx = 0;
+ int rightIdx = 0;
+
+ T[] result = null;
+ int resultSize = 0;
+
+ // first, pick the superset candidate and merge the two until we find
the first missing item
+ // if none found, return the superset candidate
+ if (left.length >= right.length)
+ {
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp <= 0)
+ {
+ leftIdx += 1;
+ rightIdx += cmp == 0 ? 1 : 0;
+ }
+ else
+ {
+ resultSize = leftIdx;
+ result = allocate.apply(resultSize + (left.length -
leftIdx) + (right.length - (rightIdx - 1)));
+ System.arraycopy(left, 0, result, 0, resultSize);
+ result[resultSize++] = right[rightIdx++];
+ break;
+ }
+ }
+
+ if (result == null)
+ {
+ if (rightIdx == right.length)
+ return left;
+ result = allocate.apply(left.length + (right.length -
rightIdx));
+ resultSize = leftIdx;
+ System.arraycopy(left, 0, result, 0, resultSize);
+ }
+ }
+ else
+ {
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp >= 0)
+ {
+ rightIdx += 1;
+ leftIdx += cmp == 0 ? 1 : 0;
+ }
+ else
+ {
+ resultSize = rightIdx;
+ result = allocate.apply(resultSize + (left.length -
(leftIdx - 1)) + (right.length - rightIdx));
+ System.arraycopy(right, 0, result, 0, resultSize);
+ result[resultSize++] = left[leftIdx++];
+ break;
+ }
+ }
+
+ if (result == null)
+ {
+ if (leftIdx == left.length)
+ return right;
+ result = allocate.apply(right.length + (left.length -
leftIdx));
+ resultSize = rightIdx;
+ System.arraycopy(right, 0, result, 0, resultSize);
+ }
+ }
+
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ T leftKey = left[leftIdx];
+ T rightKey = right[rightIdx];
+ int cmp = leftKey.compareTo(rightKey);
+ T minKey;
+ if (cmp == 0)
+ {
+ leftIdx++;
+ rightIdx++;
+ minKey = leftKey;
+ }
+ else if (cmp < 0)
+ {
+ leftIdx++;
+ minKey = leftKey;
+ }
+ else
+ {
+ rightIdx++;
+ minKey = rightKey;
+ }
+ result[resultSize++] = minKey;
+ }
+
+ while (leftIdx < left.length)
+ result[resultSize++] = left[leftIdx++];
+
+ while (rightIdx < right.length)
+ result[resultSize++] = right[rightIdx++];
+
+ if (resultSize < result.length)
+ result = copyOf(result, resultSize);
+
+ return result;
+ }
+
+ /**
+ * Given two sorted arrays, return the elements present only in both,
preferentially returning one of the inputs if
+ * it contains no elements not present in the other.
+ *
+ * TODO: introduce exponential search optimised version
+ */
+ @SuppressWarnings("unused") // was used until recently, might be used
again?
+ public static <T extends Comparable<? super T>> T[] linearIntersection(T[]
left, T[] right, IntFunction<T[]> allocate)
+ {
+ int leftIdx = 0;
+ int rightIdx = 0;
+
+ T[] result = null;
+ int resultSize = 0;
+
+ // first pick a subset candidate, and merge both until we encounter an
element not present in the other array
+ if (left.length <= right.length)
+ {
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp >= 0)
+ {
+ rightIdx += 1;
+ leftIdx += cmp == 0 ? 1 : 0;
+ }
+ else
+ {
+ resultSize = leftIdx++;
+ result = allocate.apply(resultSize + Math.min(left.length
- leftIdx, right.length - rightIdx));
+ System.arraycopy(left, 0, result, 0, resultSize);
+ break;
+ }
+ }
+
+ if (result == null)
+ return left;
+ }
+ else
+ {
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp <= 0)
+ {
+ leftIdx += 1;
+ rightIdx += cmp == 0 ? 1 : 0;
+ }
+ else
+ {
+ resultSize = rightIdx++;
+ result = allocate.apply(resultSize + Math.min(left.length
- leftIdx, right.length - rightIdx));
+ System.arraycopy(right, 0, result, 0, resultSize);
+ break;
+ }
+ }
+
+ if (result == null)
+ return right;
+ }
+
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ T leftKey = left[leftIdx];
+ int cmp = leftKey.compareTo(right[rightIdx]);
+ if (cmp == 0)
+ {
+ leftIdx++;
+ rightIdx++;
+ result[resultSize++] = leftKey;
+ }
+ else
+ {
+ if (cmp < 0) leftIdx++;
+ else rightIdx++;
+ }
+ }
+
+ if (resultSize < result.length)
+ result = Arrays.copyOf(result, resultSize);
+
+ return result;
+ }
+
+ /**
+ * Given two sorted arrays, return the elements present only in the first,
preferentially returning the first array
+ * itself if possible
+ */
+ @SuppressWarnings("unused") // was used until recently, might be used
again?
+ public static <T extends Comparable<? super T>> T[] linearDifference(T[]
left, T[] right, IntFunction<T[]> allocate)
+ {
+ int rightIdx = 0;
+ int leftIdx = 0;
+
+ T[] result = null;
+ int resultSize = 0;
+
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp == 0)
+ {
+ resultSize = leftIdx++;
+ ++rightIdx;
+ result = allocate.apply(resultSize + left.length - leftIdx);
+ System.arraycopy(left, 0, result, 0, resultSize);
+ break;
+ }
+ else if (cmp < 0)
+ {
+ ++leftIdx;
+ }
+ else
+ {
+ ++rightIdx;
+ }
+ }
+
+ if (result == null)
+ return left;
+
+ while (leftIdx < left.length && rightIdx < right.length)
+ {
+ int cmp = left[leftIdx].compareTo(right[rightIdx]);
+ if (cmp > 0)
+ {
+ result[resultSize++] = left[leftIdx++];
+ }
+ else if (cmp < 0)
+ {
+ ++rightIdx;
+ }
+ else
+ {
+ ++leftIdx;
+ ++rightIdx;
+ }
+ }
+
+ if (resultSize < result.length)
+ result = Arrays.copyOf(result, resultSize);
+
+ return result;
+ }
+
+ public static <A, R> A[] sliceWithOverlaps(A[] slice, R[] select,
IntFunction<A[]> factory, AsymmetricComparator<A, R> cmp1,
AsymmetricComparator<R, A> cmp2)
+ {
+ A[] result;
+ int resultCount;
+ int ai = 0, ri = 0;
+ while (true)
+ {
+ long ari = findNextIntersection(slice, ai, select, ri, cmp1, cmp2,
Search.CEIL);
+ if (ari < 0)
+ {
+ if (ai == slice.length)
+ return slice;
+
+ return Arrays.copyOf(slice, ai);
+ }
+
+ int nextai = (int)(ari >>> 32);
+ if (ai != nextai)
+ {
+ resultCount = ai;
+ result = factory.apply(ai + (slice.length - nextai));
+ System.arraycopy(slice, 0, result, 0, resultCount);
+ ai = nextai;
Review Comment:
I added it in cd51a71795d48010be07aab7c583ac9bfd19913f with docs for this
method
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]