bereng commented on code in PR #1891:
URL: https://github.com/apache/cassandra/pull/1891#discussion_r1129240955
##########
src/java/org/apache/cassandra/utils/streamhist/StreamingTombstoneHistogramBuilder.java:
##########
@@ -242,47 +249,52 @@ void mergeNearestPoints()
{
assert isFull() : "DataHolder must be full in order to merge two
points";
- final int[] smallestDifference =
findPointPairWithSmallestDistance();
+ final long[] smallestDifference =
findPointPairWithSmallestDistance();
- final int point1 = smallestDifference[0];
- final int point2 = smallestDifference[1];
+ final long point1 = smallestDifference[0];
+ final long point2 = smallestDifference[1];
- long key = wrap(point1, 0);
- int index = Arrays.binarySearch(data, key);
+ int index = Arrays.binarySearch(points, point1);
if (index < 0)
{
index = -index - 1;
- assert (index < data.length) : "Not found in array";
- assert (unwrapPoint(data[index]) == point1) : "Not found in
array";
+ assert (index < points.length) : "Not found in array";
+ assert (points[index] == point1) : "Not found in array";
}
- long value1 = unwrapValue(data[index]);
- long value2 = unwrapValue(data[index + 1]);
+ long value1 = values[index];
+ long value2 = values[index + 1];
- assert (unwrapPoint(data[index + 1]) == point2) : "point2 should
follow point1";
+ assert (points[index + 1] == point2) : "point2 should follow
point1";
long sum = value1 + value2;
//let's evaluate in long values to handle overflow in
multiplication
- int newPoint = saturatingCastToInt((point1 * value1 + point2 *
value2) / sum);
+ //long newPoint = saturatingCastToMaxDeletionTime((point1 * value1
+ point2 * value2) / sum); // TODO 14227 Better weighted merged point possible?
+ long newPoint = (point2 - point1) >> 1; // approx division by 2
for perf reasons
Review Comment:
It's not equivalent. That is the mid point that is later used for a simple
average. At the time of writing that was the best that didn't break stuff. But
you triggered me to revisit this again and with fresh eyes I pushed a weighted
average version that works. I think when I originally tried that long ago I
wasn't accounting for merge boundaries correctly if my memory serves me right.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]