abstractdog commented on a change in pull request #2225: URL: https://github.com/apache/hive/pull/2225#discussion_r680944608
########## File path: ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/ValueBoundaryScanner.java ########## @@ -664,6 +632,87 @@ public Object computeValue(Object row) throws HiveException { */ public abstract boolean isEqual(Object v1, Object v2); + protected int binarySearchBack(int r, PTFPartition p, Object sortKey, int amt, + Order order) throws HiveException { + boolean isOrderDesc = order.equals(Order.DESC); + Object rowVal = sortKey; + + int rMin = -1; // tracks lowest possible number fulfilling the range requirement + int rMax = r; // tracks highest possible number fulfilling the range requirement + + boolean isDistanceGreater = true; + while (true) { + if (rMin == rMax) { + return rMin; + } + isDistanceGreater = + isDistanceGreater(isOrderDesc ? rowVal : sortKey, isOrderDesc ? sortKey : rowVal, amt); + if (!isDistanceGreater && r == 0) { + return 0; + } + if (isDistanceGreater) { + rMin = r + 1; + } else { + rMax = r; + } + r = rMin + (rMax - rMin) / 2; + rowVal = computeValueUseCache(r, p); + } + } + + private int linearSearchBack(int r, PTFPartition p, Object sortKey, int amt, + Order order) throws HiveException { + boolean isOrderDesc = order.equals(Order.DESC); + Object rowVal = sortKey; + while (r >= 0 && !isDistanceGreater(isOrderDesc ? rowVal : sortKey, + isOrderDesc ? sortKey : rowVal, amt)) { + Pair<Integer, Object> stepResult = skipOrStepBack(r, p); + r = stepResult.getLeft(); + rowVal = stepResult.getRight(); + } + return r + 1; + } + + protected int binarySearchForward(int r, PTFPartition p, Object sortKey, int amt, + Order order) throws HiveException { + boolean isOrderDesc = order.equals(Order.DESC); + Object rowVal = sortKey; + int rMin = r; // tracks lowest possible number fulfilling the range requirement + int rMax = p.size(); // tracks highest possible number fulfilling the range requirement + + boolean isDistanceGreater = true; + while (true) { + if (rMin == rMax) { + return rMin; + } + isDistanceGreater = + isDistanceGreater(isOrderDesc ? sortKey : rowVal, isOrderDesc ? rowVal : sortKey, amt); + if (!isDistanceGreater && r == p.size() - 1) { Review comment: to stay in line with the current linear implementation, we need to be aware that this range end is exclusive changing this to p.size()-1 will make TestBoundaryCache fail, showing that the binary search led to different results than the linear actually it's because p.size() means range will contain every row until p.size() exclusive == p.size() - 1 inclusive (== every row in the partition) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org