HIVE-10985 : Value based windowing on timestamp and double can't handle NULL 
value (Aihua Xu via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <hashut...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b86728de
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b86728de
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b86728de

Branch: refs/heads/spark
Commit: b86728dea0132238d819ad5c7d3855f3aa65157b
Parents: 670f57c
Author: Aihua Xu <aihu...@gmail.com>
Authored: Fri Jun 12 07:21:00 2015 -0700
Committer: Ashutosh Chauhan <hashut...@apache.org>
Committed: Mon Jun 15 10:33:01 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/udf/ptf/WindowingTableFunction.java | 56 +++++++++-----
 .../clientpositive/windowing_windowspec3.q      | 20 +++++
 .../clientpositive/windowing_windowspec3.q.out  | 80 ++++++++++++++++++++
 3 files changed, 136 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b86728de/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java
index 4635ba2..32471f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java
@@ -1161,20 +1161,28 @@ public class WindowingTableFunction extends 
TableFunctionEvaluator {
 
     @Override
     public boolean isGreater(Object v1, Object v2, int amt) {
-      long l1 = PrimitiveObjectInspectorUtils.getLong(v1,
-          (PrimitiveObjectInspector) expressionDef.getOI());
-      long l2 = PrimitiveObjectInspectorUtils.getLong(v2,
-          (PrimitiveObjectInspector) expressionDef.getOI());
-      return (l1 -l2) > amt;
+      if (v1 != null && v2 != null) {
+        long l1 = PrimitiveObjectInspectorUtils.getLong(v1,
+            (PrimitiveObjectInspector) expressionDef.getOI());
+        long l2 = PrimitiveObjectInspectorUtils.getLong(v2,
+            (PrimitiveObjectInspector) expressionDef.getOI());
+        return (l1 -l2) > amt;
+      }
+
+      return v1 != null || v2 != null; // True if only one value is null
     }
 
     @Override
     public boolean isEqual(Object v1, Object v2) {
-      long l1 = PrimitiveObjectInspectorUtils.getLong(v1,
-          (PrimitiveObjectInspector) expressionDef.getOI());
-      long l2 = PrimitiveObjectInspectorUtils.getLong(v2,
-          (PrimitiveObjectInspector) expressionDef.getOI());
-      return l1 == l2;
+      if (v1 != null && v2 != null) {
+        long l1 = PrimitiveObjectInspectorUtils.getLong(v1,
+            (PrimitiveObjectInspector) expressionDef.getOI());
+        long l2 = PrimitiveObjectInspectorUtils.getLong(v2,
+            (PrimitiveObjectInspector) expressionDef.getOI());
+        return l1 == l2;
+      }
+
+      return v1 == null && v2 == null; // True if both are null
     }
   }
 
@@ -1186,20 +1194,28 @@ public class WindowingTableFunction extends 
TableFunctionEvaluator {
 
     @Override
     public boolean isGreater(Object v1, Object v2, int amt) {
-      double d1 = PrimitiveObjectInspectorUtils.getDouble(v1,
-          (PrimitiveObjectInspector) expressionDef.getOI());
-      double d2 = PrimitiveObjectInspectorUtils.getDouble(v2,
-          (PrimitiveObjectInspector) expressionDef.getOI());
-      return (d1 -d2) > amt;
+      if (v1 != null && v2 != null) {
+        double d1 = PrimitiveObjectInspectorUtils.getDouble(v1,
+            (PrimitiveObjectInspector) expressionDef.getOI());
+        double d2 = PrimitiveObjectInspectorUtils.getDouble(v2,
+            (PrimitiveObjectInspector) expressionDef.getOI());
+        return (d1 -d2) > amt;
+      }
+
+      return v1 != null || v2 != null; // True if only one value is null
     }
 
     @Override
     public boolean isEqual(Object v1, Object v2) {
-      double d1 = PrimitiveObjectInspectorUtils.getDouble(v1,
-          (PrimitiveObjectInspector) expressionDef.getOI());
-      double d2 = PrimitiveObjectInspectorUtils.getDouble(v2,
-          (PrimitiveObjectInspector) expressionDef.getOI());
-      return d1 == d2;
+      if (v1 != null && v2 != null) {
+        double d1 = PrimitiveObjectInspectorUtils.getDouble(v1,
+            (PrimitiveObjectInspector) expressionDef.getOI());
+        double d2 = PrimitiveObjectInspectorUtils.getDouble(v2,
+            (PrimitiveObjectInspector) expressionDef.getOI());
+        return d1 == d2;
+      }
+
+      return v1 == null && v2 == null; // True if both are null
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b86728de/ql/src/test/queries/clientpositive/windowing_windowspec3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing_windowspec3.q 
b/ql/src/test/queries/clientpositive/windowing_windowspec3.q
index c87aaff..608a6cf 100644
--- a/ql/src/test/queries/clientpositive/windowing_windowspec3.q
+++ b/ql/src/test/queries/clientpositive/windowing_windowspec3.q
@@ -29,3 +29,23 @@ select deptno, empno, hiredate, salary,
     sum(salary) over (partition by deptno order by hiredate range between 10 
following and unbounded following),
     sum(salary) over (partition by deptno order by hiredate range between 
unbounded preceding and 10 following)
 from emp;
+
+-- Support timestamp datatype. Value in seconds (90days = 90 * 24 * 3600 
seconds)
+select deptno, empno, hirets, salary,
+    sum(salary) over (partition by deptno order by hirets range 7776000 
preceding),
+    sum(salary) over (partition by deptno order by hirets range between 
7776000 preceding and 7776000 following),
+    sum(salary) over (partition by deptno order by hirets range between 
7776000 preceding and 864000 preceding),
+    sum(salary) over (partition by deptno order by hirets range between 864000 
following and 7776000 following),
+    sum(salary) over (partition by deptno order by hirets range between 864000 
following and unbounded following),
+    sum(salary) over (partition by deptno order by hirets range between 
unbounded preceding and 864000 following)
+from emp;
+
+-- Support double datatype
+select deptno, empno, bonus,
+    avg(bonus) over (partition by deptno order by bonus range 200 preceding),
+    avg(bonus) over (partition by deptno order by bonus range between 200 
preceding and 200 following),
+    avg(bonus) over (partition by deptno order by bonus range between 200 
preceding and 100 preceding),
+    avg(bonus) over (partition by deptno order by bonus range between 100 
following and 200 following),
+    avg(bonus) over (partition by deptno order by bonus range between 200 
following and unbounded following),
+    avg(bonus) over (partition by deptno order by bonus range between 
unbounded preceding and 200 following)
+from emp;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/b86728de/ql/src/test/results/clientpositive/windowing_windowspec3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/windowing_windowspec3.q.out 
b/ql/src/test/results/clientpositive/windowing_windowspec3.q.out
index bf7797a..42c042f 100644
--- a/ql/src/test/results/clientpositive/windowing_windowspec3.q.out
+++ b/ql/src/test/results/clientpositive/windowing_windowspec3.q.out
@@ -108,3 +108,83 @@ POSTHOOK: Input: default@emp
 30     7844    1981-09-08      1500.0  1500.0  3700.0  NULL    2200.0  2200.0  
7200.0
 30     7654    1981-09-28      1250.0  2750.0  3700.0  1500.0  950.0   950.0   
8450.0
 30     7900    1981-12-03      950.0   3700.0  3700.0  2750.0  NULL    NULL    
9400.0
+PREHOOK: query: -- Support timestamp datatype. Value in seconds (90days = 90 * 
24 * 3600 seconds)
+select deptno, empno, hirets, salary,
+    sum(salary) over (partition by deptno order by hirets range 7776000 
preceding),
+    sum(salary) over (partition by deptno order by hirets range between 
7776000 preceding and 7776000 following),
+    sum(salary) over (partition by deptno order by hirets range between 
7776000 preceding and 864000 preceding),
+    sum(salary) over (partition by deptno order by hirets range between 864000 
following and 7776000 following),
+    sum(salary) over (partition by deptno order by hirets range between 864000 
following and unbounded following),
+    sum(salary) over (partition by deptno order by hirets range between 
unbounded preceding and 864000 following)
+from emp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emp
+#### A masked pattern was here ####
+POSTHOOK: query: -- Support timestamp datatype. Value in seconds (90days = 90 
* 24 * 3600 seconds)
+select deptno, empno, hirets, salary,
+    sum(salary) over (partition by deptno order by hirets range 7776000 
preceding),
+    sum(salary) over (partition by deptno order by hirets range between 
7776000 preceding and 7776000 following),
+    sum(salary) over (partition by deptno order by hirets range between 
7776000 preceding and 864000 preceding),
+    sum(salary) over (partition by deptno order by hirets range between 864000 
following and 7776000 following),
+    sum(salary) over (partition by deptno order by hirets range between 864000 
following and unbounded following),
+    sum(salary) over (partition by deptno order by hirets range between 
unbounded preceding and 864000 following)
+from emp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emp
+#### A masked pattern was here ####
+10     7988    NULL    1500.0  3000.0  3000.0  NULL    NULL    8750.0  3000.0
+10     7987    NULL    1500.0  3000.0  3000.0  NULL    NULL    8750.0  3000.0
+10     7782    1981-06-09 00:00:00     2450.0  2450.0  2450.0  NULL    NULL    
6300.0  5450.0
+10     7839    1981-11-17 00:00:00     5000.0  5000.0  6300.0  NULL    1300.0  
1300.0  10450.0
+10     7934    1982-01-23 00:00:00     1300.0  6300.0  6300.0  5000.0  NULL    
NULL    11750.0
+20     7369    1980-12-17 00:00:00     800.0   800.0   800.0   NULL    NULL    
10075.0 800.0
+20     7566    1981-04-02 00:00:00     2975.0  2975.0  2975.0  NULL    NULL    
7100.0  3775.0
+20     7902    1981-12-03 00:00:00     3000.0  3000.0  3000.0  NULL    NULL    
4100.0  6775.0
+20     7788    1982-12-09 00:00:00     3000.0  3000.0  4100.0  NULL    1100.0  
1100.0  9775.0
+20     7876    1983-01-12 00:00:00     1100.0  4100.0  4100.0  3000.0  NULL    
NULL    10875.0
+30     7499    1981-02-20 00:00:00     1600.0  1600.0  5700.0  NULL    2850.0  
6550.0  2850.0
+30     7521    1981-02-22 00:00:00     1250.0  2850.0  5700.0  NULL    2850.0  
6550.0  2850.0
+30     7698    1981-05-01 00:00:00     2850.0  5700.0  5700.0  2850.0  NULL    
3700.0  5700.0
+30     7844    1981-09-08 00:00:00     1500.0  1500.0  3700.0  NULL    2200.0  
2200.0  7200.0
+30     7654    1981-09-28 00:00:00     1250.0  2750.0  3700.0  1500.0  950.0   
950.0   8450.0
+30     7900    1981-12-03 00:00:00     950.0   3700.0  3700.0  2750.0  NULL    
NULL    9400.0
+PREHOOK: query: -- Support double datatype
+select deptno, empno, bonus,
+    avg(bonus) over (partition by deptno order by bonus range 200 preceding),
+    avg(bonus) over (partition by deptno order by bonus range between 200 
preceding and 200 following),
+    avg(bonus) over (partition by deptno order by bonus range between 200 
preceding and 100 preceding),
+    avg(bonus) over (partition by deptno order by bonus range between 100 
following and 200 following),
+    avg(bonus) over (partition by deptno order by bonus range between 200 
following and unbounded following),
+    avg(bonus) over (partition by deptno order by bonus range between 
unbounded preceding and 200 following)
+from emp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emp
+#### A masked pattern was here ####
+POSTHOOK: query: -- Support double datatype
+select deptno, empno, bonus,
+    avg(bonus) over (partition by deptno order by bonus range 200 preceding),
+    avg(bonus) over (partition by deptno order by bonus range between 200 
preceding and 200 following),
+    avg(bonus) over (partition by deptno order by bonus range between 200 
preceding and 100 preceding),
+    avg(bonus) over (partition by deptno order by bonus range between 100 
following and 200 following),
+    avg(bonus) over (partition by deptno order by bonus range between 200 
following and unbounded following),
+    avg(bonus) over (partition by deptno order by bonus range between 
unbounded preceding and 200 following)
+from emp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emp
+#### A masked pattern was here ####
+10     7934    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+10     7987    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+10     7839    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+10     7988    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+10     7782    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+20     7788    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+20     7902    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+20     7876    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+20     7566    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+20     7369    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+30     7698    NULL    NULL    NULL    NULL    NULL    550.0   NULL
+30     7900    NULL    NULL    NULL    NULL    NULL    550.0   NULL
+30     7844    0.0     0.0     0.0     NULL    NULL    733.3333333333334       
0.0
+30     7499    300.0   300.0   400.0   NULL    500.0   1400.0  
266.6666666666667
+30     7521    500.0   400.0   400.0   300.0   NULL    1400.0  
266.6666666666667
+30     7654    1400.0  1400.0  1400.0  NULL    NULL    NULL    550.0

Reply via email to