abstractdog commented on a change in pull request #2225:
URL: https://github.com/apache/hive/pull/2225#discussion_r682423839



##########
File path: 
ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/ValueBoundaryScanner.java
##########
@@ -664,6 +629,84 @@ public Object computeValue(Object row) throws 
HiveException {
    */
   public abstract boolean isEqual(Object v1, Object v2);
 
+  protected int binarySearchBack(int rowId, PTFPartition p, Object sortKey, 
int amt,
+      Order order) throws HiveException {
+    boolean isOrderDesc = order.equals(Order.DESC);
+    Object rowVal = sortKey;
+
+    int rMin = 0;  // tracks lowest possible number fulfilling the range 
requirement
+    int rMax = rowId; // tracks highest possible number fulfilling the range 
requirement
+
+    boolean isDistanceGreater = true;
+    while (rMin < rMax) {
+      isDistanceGreater = isDistanceGreater(isOrderDesc ? rowVal : sortKey, 
isOrderDesc ? sortKey : rowVal, amt);
+      if (isDistanceGreater) {
+        rMin = rowId + 1;
+      } else {
+        rMax = rowId;
+      }
+      rowId = rMin + (rMax - rMin) / 2;
+      rowVal = computeValueUseCache(rowId, p);
+    }
+    return rMin;
+  }
+
+  private int linearSearchBack(int r, PTFPartition p, Object sortKey, int amt,
+      Order order) throws HiveException {
+    boolean isOrderDesc = order.equals(Order.DESC);
+    Object rowVal = sortKey;
+    while (r >= 0 && !isDistanceGreater(isOrderDesc ? rowVal : sortKey,
+        isOrderDesc ? sortKey : rowVal, amt)) {
+      Pair<Integer, Object> stepResult = skipOrStepBack(r, p);
+      r = stepResult.getLeft();
+      rowVal = stepResult.getRight();
+    }
+    return r + 1;
+  }
+
+  protected int binarySearchForward(int rowId, PTFPartition p, Object sortKey, 
int amt,
+      Order order) throws HiveException {
+    boolean isOrderDesc = order.equals(Order.DESC);
+    Object rowVal = sortKey;
+    int rMin = rowId;  // tracks lowest possible number fulfilling the range 
requirement
+    int rMax = p.size(); // tracks highest possible number fulfilling the 
range requirement
+
+    boolean isDistanceGreater = true;
+    while (rMin < rMax) {
+      isDistanceGreater =
+          isDistanceGreater(isOrderDesc ? sortKey : rowVal, isOrderDesc ? 
rowVal : sortKey, amt);
+      /*
+       * if the currently inspected row is the last (p.size() - 1) but we're 
not far enough to fulfill
+       * the range requirement (!isDistanceGreater), we cannot move forward, 
let's simply return p.size(),
+       * range end is exclusive
+       */
+      if (!isDistanceGreater && rowId == p.size() - 1) {

Review comment:
       you're right! changed accordingly and it seems to be working, the 
algorithm now is as clear as possible, thanks!
   I'm uploading a new commit

##########
File path: 
ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/ValueBoundaryScanner.java
##########
@@ -664,6 +629,84 @@ public Object computeValue(Object row) throws 
HiveException {
    */
   public abstract boolean isEqual(Object v1, Object v2);
 
+  protected int binarySearchBack(int rowId, PTFPartition p, Object sortKey, 
int amt,
+      Order order) throws HiveException {
+    boolean isOrderDesc = order.equals(Order.DESC);
+    Object rowVal = sortKey;
+
+    int rMin = 0;  // tracks lowest possible number fulfilling the range 
requirement
+    int rMax = rowId; // tracks highest possible number fulfilling the range 
requirement
+
+    boolean isDistanceGreater = true;
+    while (rMin < rMax) {
+      isDistanceGreater = isDistanceGreater(isOrderDesc ? rowVal : sortKey, 
isOrderDesc ? sortKey : rowVal, amt);
+      if (isDistanceGreater) {
+        rMin = rowId + 1;
+      } else {
+        rMax = rowId;
+      }
+      rowId = rMin + (rMax - rMin) / 2;
+      rowVal = computeValueUseCache(rowId, p);
+    }
+    return rMin;
+  }
+
+  private int linearSearchBack(int r, PTFPartition p, Object sortKey, int amt,
+      Order order) throws HiveException {
+    boolean isOrderDesc = order.equals(Order.DESC);
+    Object rowVal = sortKey;
+    while (r >= 0 && !isDistanceGreater(isOrderDesc ? rowVal : sortKey,
+        isOrderDesc ? sortKey : rowVal, amt)) {
+      Pair<Integer, Object> stepResult = skipOrStepBack(r, p);
+      r = stepResult.getLeft();
+      rowVal = stepResult.getRight();
+    }
+    return r + 1;
+  }
+
+  protected int binarySearchForward(int rowId, PTFPartition p, Object sortKey, 
int amt,
+      Order order) throws HiveException {
+    boolean isOrderDesc = order.equals(Order.DESC);
+    Object rowVal = sortKey;
+    int rMin = rowId;  // tracks lowest possible number fulfilling the range 
requirement
+    int rMax = p.size(); // tracks highest possible number fulfilling the 
range requirement
+
+    boolean isDistanceGreater = true;
+    while (rMin < rMax) {
+      isDistanceGreater =
+          isDistanceGreater(isOrderDesc ? sortKey : rowVal, isOrderDesc ? 
rowVal : sortKey, amt);
+      /*
+       * if the currently inspected row is the last (p.size() - 1) but we're 
not far enough to fulfill
+       * the range requirement (!isDistanceGreater), we cannot move forward, 
let's simply return p.size(),
+       * range end is exclusive
+       */
+      if (!isDistanceGreater && rowId == p.size() - 1) {

Review comment:
       you're right! changed accordingly and it seems to be working, the 
algorithm now is as clear as possible, thanks!
   I'm pushing a new commit




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to