[5/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)

2018-02-20 Thread mmccline
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
index e50f3e2..23914f8 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
@@ -148,31 +148,15 @@ STAGE PLANS:
 partitionColumnCount: 0
 scratchColumnTypeNames: [double, decimal(11,4)]
 Reducer 2 
-Execution mode: vectorized, llap
+Execution mode: llap
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
-reduceColumnSortOrder: +
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
-rowBatchContext:
-dataColumnCount: 11
-dataColumns: KEY._col0:boolean, KEY._col1:tinyint, 
KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, 
VALUE._col1:double, 
VALUE._col2:struct, 
VALUE._col3:struct, VALUE._col4:float, 
VALUE._col5:tinyint
-partitionColumnCount: 0
-scratchColumnTypeNames: []
+notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+vectorized: false
 Reduce Operator Tree:
   Group By Operator
 aggregations: max(VALUE._col0), sum(VALUE._col1), 
stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), 
min(VALUE._col5)
-Group By Vectorization:
-aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, 
VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFVarFinal(col 
7:struct) -> double aggregation: 
stddev_pop, VectorUDAFVarFinal(col 
8:struct) -> double aggregation: 
stddev_pop, VectorUDAFMaxDouble(col 9:float) -> float, VectorUDAFMinLong(col 
10:tinyint) -> tinyint
-className: VectorGroupByOperator
-groupByMode: MERGEPARTIAL
-keyExpressions: col 0:boolean, col 1:tinyint, col 
2:timestamp, col 3:float, col 4:string
-native: false
-vectorProcessingMode: MERGE_PARTIAL
-projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
 keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), 
KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
 mode: mergepartial
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
@@ -180,21 +164,10 @@ STAGE PLANS:
 Select Operator
   expressions: _col0 (type: boolean), _col1 (type: tinyint), 
_col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) 
(type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), 
_col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: 
double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: 
double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + 
_col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: 
double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 
* UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), 
_col10 (type: tinyint)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 
16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
-  selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 
11:tinyint, LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: 
LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 13:tinyint, 
DoubleColMultiplyDoubleColumn(col 6:double, col 15:double)(children: 
CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, 
col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 
14:tinyint) -> 

[6/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)

2018-02-20 Thread mmccline
HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt 
McCline, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8975924e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8975924e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8975924e

Branch: refs/heads/master
Commit: 8975924ec070380069d71d325a3358fe9932befb
Parents: 3df6bc2
Author: Matt McCline 
Authored: Tue Feb 20 22:33:59 2018 -0800
Committer: Matt McCline 
Committed: Tue Feb 20 22:33:59 2018 -0800

--
 .../UDAFTemplates/VectorUDAFVarMerge.txt|   5 +
 .../llap/parquet_types_vectorization.q.out  |   2 +-
 .../llap/vector_decimal_aggregate.q.out |  66 +--
 .../llap/vector_decimal_udf.q.out   | 100 ++
 .../llap/vector_reuse_scratchcols.q.out |  58 +-
 .../llap/vector_string_decimal.q.out| 137 ++
 .../llap/vector_udf_string_to_boolean.q.out | 189 +++
 .../clientpositive/llap/vectorization_0.q.out   |  50 ++---
 .../clientpositive/llap/vectorization_1.q.out   |  29 +--
 .../clientpositive/llap/vectorization_12.q.out  |  33 +---
 .../clientpositive/llap/vectorization_13.q.out  |  57 +-
 .../clientpositive/llap/vectorization_14.q.out  |  33 +---
 .../clientpositive/llap/vectorization_16.q.out  |  30 +--
 .../clientpositive/llap/vectorization_2.q.out   |  29 +--
 .../clientpositive/llap/vectorization_3.q.out   |  29 +--
 .../clientpositive/llap/vectorization_4.q.out   |  29 +--
 .../clientpositive/llap/vectorization_9.q.out   |  30 +--
 .../vectorization_input_format_excludes.q.out   |  28 ++-
 .../llap/vectorization_part_project.q.out   |  12 +-
 .../llap/vectorization_short_regress.q.out  | 186 +++---
 .../llap/vectorized_mapjoin3.q.out  | 100 --
 .../llap/vectorized_parquet.q.out   |   7 +-
 .../llap/vectorized_parquet_types.q.out |  19 +-
 .../llap/vectorized_timestamp.q.out |  24 +--
 .../llap/vectorized_timestamp_funcs.q.out   |  22 +--
 .../spark/parquet_vectorization_0.q.out |  46 +
 .../spark/parquet_vectorization_1.q.out |  28 +--
 .../spark/parquet_vectorization_12.q.out|  32 +---
 .../spark/parquet_vectorization_13.q.out|  55 +-
 .../spark/parquet_vectorization_14.q.out|  32 +---
 .../spark/parquet_vectorization_16.q.out|  29 +--
 .../spark/parquet_vectorization_2.q.out |  28 +--
 .../spark/parquet_vectorization_3.q.out |  28 +--
 .../spark/parquet_vectorization_4.q.out |  28 +--
 .../spark/parquet_vectorization_9.q.out |  29 +--
 .../spark/vector_decimal_aggregate.q.out|  64 +--
 .../clientpositive/spark/vectorization_0.q.out  |  46 +
 .../clientpositive/spark/vectorization_1.q.out  |  28 +--
 .../clientpositive/spark/vectorization_12.q.out |  32 +---
 .../clientpositive/spark/vectorization_13.q.out |  55 +-
 .../clientpositive/spark/vectorization_14.q.out |  32 +---
 .../clientpositive/spark/vectorization_16.q.out |  29 +--
 .../clientpositive/spark/vectorization_2.q.out  |  28 +--
 .../clientpositive/spark/vectorization_3.q.out  |  28 +--
 .../clientpositive/spark/vectorization_4.q.out  |  28 +--
 .../clientpositive/spark/vectorization_9.q.out  |  29 +--
 .../vectorization_input_format_excludes.q.out   |  24 +--
 .../spark/vectorization_short_regress.q.out | 178 ++---
 .../spark/vectorized_timestamp_funcs.q.out  |  21 +--
 49 files changed, 575 insertions(+), 1686 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt
--
diff --git a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt 
b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt
index 9b1c1cd..ccc5a22 100644
--- a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt
+++ b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt
@@ -487,6 +487,9 @@ public class  extends VectorAggregateExpression {
  * Mode FINAL.
 #ENDIF FINAL
  */
+
+/*
+There seems to be a Wrong Results bug in VectorUDAFVarFinal -- disabling 
vectorization for now...
 return
 GenericUDAFVariance.isVarianceFamilyName(name) &&
 inputColVectorType == ColumnVector.Type.STRUCT &&
@@ -498,6 +501,8 @@ public class  extends VectorAggregateExpression {
 outputColVectorType == ColumnVector.Type.DOUBLE &&
 mode == Mode.FINAL;
 #ENDIF FINAL
+*/
+return false;
   }
 
   @Override


[1/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)

2018-02-20 Thread mmccline
Repository: hive
Updated Branches:
  refs/heads/master 3df6bc28b -> 8975924ec


http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
index 6b63764..9683efa 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
@@ -140,40 +140,23 @@ STAGE PLANS:
 usesVectorUDFAdaptor: false
 vectorized: true
 Reducer 2 
-Execution mode: vectorized
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
+notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+vectorized: false
 Reduce Operator Tree:
   Group By Operator
 aggregations: avg(VALUE._col0), sum(VALUE._col1), 
stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), 
avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8)
-Group By Vectorization:
-aggregators: VectorUDAFAvgFinal(col 
0:struct) -> double, VectorUDAFSumDouble(col 
1:double) -> double, VectorUDAFVarFinal(col 
2:struct) -> double aggregation: 
stddev_pop, VectorUDAFVarFinal(col 
3:struct) -> double aggregation: 
stddev_samp, VectorUDAFVarFinal(col 
4:struct) -> double aggregation: 
var_samp, VectorUDAFAvgFinal(col 5:struct) 
-> double, VectorUDAFVarFinal(col 
6:struct) -> double aggregation: 
stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, 
VectorUDAFCountMerge(col 8:bigint) -> bigint
-className: VectorGroupByOperator
-groupByMode: MERGEPARTIAL
-native: false
-vectorProcessingMode: GLOBAL
-projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
 mode: mergepartial
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
 Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: _col0 (type: double), (_col0 + -3728.0) (type: 
double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: 
double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 
(type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 
+ -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0 (type: double), 
_col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + 
-3728.0 (type: double), ((_col2 - (- (- (_col0 + -3728.0 * _col2) 
(type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) 
(type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) 
(type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), 
(_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), 
(UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) 
(type: double)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 9, 11, 10, 14, 1, 12, 2, 
15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25]
-  selectExpressions: DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 
10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 
10:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: 
DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 10:double) -> 12:double) -> 10:double, 
DoubleColMultiplyDoubleColumn(col 12:double, col 13:double)(children: 
DoubleColUnaryMinus(col 

[2/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)

2018-02-20 Thread mmccline
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
index f4a5b55..591de4b 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
@@ -144,31 +144,14 @@ STAGE PLANS:
 partitionColumnCount: 0
 scratchColumnTypeNames: []
 Reducer 2 
-Execution mode: vectorized
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-reduceColumnNullOrder: 
-reduceColumnSortOrder: 
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
-rowBatchContext:
-dataColumnCount: 9
-dataColumns: KEY._col0:double, KEY._col1:bigint, 
KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, 
VALUE._col1:struct, 
VALUE._col2:struct, VALUE._col3:bigint, 
VALUE._col4:struct
-partitionColumnCount: 0
-scratchColumnTypeNames: []
+notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: 
FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+vectorized: false
 Reduce Operator Tree:
   Group By Operator
 aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), 
avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4)
-Group By Vectorization:
-aggregators: VectorUDAFCountMerge(col 4:bigint) -> bigint, 
VectorUDAFVarFinal(col 5:struct) -> 
double aggregation: stddev_samp, VectorUDAFAvgFinal(col 
6:struct) -> double, 
VectorUDAFSumLong(col 7:bigint) -> bigint, VectorUDAFVarFinal(col 
8:struct) -> double aggregation: 
stddev_pop
-className: VectorGroupByOperator
-groupByMode: MERGEPARTIAL
-keyExpressions: col 0:double, col 1:bigint, col 2:string, 
col 3:boolean
-native: false
-vectorProcessingMode: MERGE_PARTIAL
-projectedOutputColumnNums: [0, 1, 2, 3, 4]
 keys: KEY._col0 (type: double), KEY._col1 (type: bigint), 
KEY._col2 (type: string), KEY._col3 (type: boolean)
 mode: mergepartial
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
@@ -176,21 +159,10 @@ STAGE PLANS:
 Select Operator
   expressions: _col1 (type: bigint), _col3 (type: boolean), 
_col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), 
(- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), 
_col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- 
((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- 
(-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) 
(type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: 
double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * 
_col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col17, _col18, _col19
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [1, 3, 2, 0, 9, 10, 4, 11, 5, 
13, 12, 6, 15, 17, 7, 18, 19, 14, 8]
-  selectExpressions: DoubleScalarMultiplyDoubleColumn(val 
-6432.0, col 0:double) -> 9:double, LongColUnaryMinus(col 1:bigint) -> 
10:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 11:bigint, 
DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: 
DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 
13:double, DoubleColUnaryMinus(col 14:double)(children: 
DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: 
DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 
14:double) -> 12:double, DoubleColUnaryMinus(col 14:double)(children: 

[3/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)

2018-02-20 Thread mmccline
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out 
b/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out
index 59a58e7..34b273c 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out
@@ -117,47 +117,23 @@ STAGE PLANS:
 partitionColumnCount: 0
 scratchColumnTypeNames: [double]
 Reducer 2 
-Execution mode: vectorized
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-reduceColumnNullOrder: 
-reduceColumnSortOrder: 
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
-rowBatchContext:
-dataColumnCount: 6
-dataColumns: 
VALUE._col0:struct, 
VALUE._col1:double, VALUE._col2:tinyint, VALUE._col3:int, 
VALUE._col4:struct, VALUE._col5:bigint
-partitionColumnCount: 0
-scratchColumnTypeNames: []
+notVectorizedReason: GROUPBY operator: Vector aggregation : 
"var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFVarianceEvaluator
+vectorized: false
 Reduce Operator Tree:
   Group By Operator
 aggregations: var_pop(VALUE._col0), sum(VALUE._col1), 
max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5)
-Group By Vectorization:
-aggregators: VectorUDAFVarFinal(col 
0:struct) -> double aggregation: 
var_pop, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFMaxLong(col 
2:tinyint) -> tinyint, VectorUDAFMaxLong(col 3:int) -> int, 
VectorUDAFVarFinal(col 4:struct) -> 
double aggregation: var_samp, VectorUDAFCountMerge(col 5:bigint) -> bigint
-className: VectorGroupByOperator
-groupByMode: MERGEPARTIAL
-native: false
-vectorProcessingMode: GLOBAL
-projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
 mode: mergepartial
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: _col0 (type: double), (_col0 / -26.28) (type: 
double), _col1 (type: double), (-1.389 + _col1) (type: double), (_col1 * 
(-1.389 + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389 + 
_col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 
79.553) (type: decimal(16,3)), _col4 (type: double), (10.175 % (- (_col1 * 
(-1.389 + _col1 (type: double), _col5 (type: bigint), (-563 % _col3) (type: 
int)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 6, 1, 7, 9, 2, 8, 3, 12, 
4, 13, 5, 14]
-  selectExpressions: DoubleColDivideDoubleScalar(col 
0:double, val -26.28) -> 6:double, DoubleScalarAddDoubleColumn(val -1.389, col 
1:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 1:double, col 
8:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 
8:double) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: 
DoubleColMultiplyDoubleColumn(col 1:double, col 8:double)(children: 
DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 8:double) -> 
10:double) -> 8:double, DecimalColMultiplyDecimalScalar(col 11:decimal(10,0), 
val 79.553)(children: CastLongToDecimal(col 3:int) -> 11:decimal(10,0)) -> 
12:decimal(16,3), DoubleScalarModuloDoubleColumn(val 10.175, col 
10:double)(children: DoubleColUnaryMinus(col 13:double)(children: 
DoubleColMultiplyDoubleColumn(col 1:double, col 10:double)(children: 
DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 10:double) -> 
13:double) -> 10:double) -> 13:double, LongScalarModuloLongColumn(v
 al -563, col 3:int) -> 14:int
   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
   File Output Operator
   

[4/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)

2018-02-20 Thread mmccline
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
index 0027ab5..bd5e284 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
@@ -142,40 +142,24 @@ STAGE PLANS:
 usesVectorUDFAdaptor: false
 vectorized: true
 Reducer 2 
-Execution mode: vectorized, llap
+Execution mode: llap
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
+notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+vectorized: false
 Reduce Operator Tree:
   Group By Operator
 aggregations: avg(VALUE._col0), sum(VALUE._col1), 
stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), 
avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8)
-Group By Vectorization:
-aggregators: VectorUDAFAvgFinal(col 
0:struct) -> double, VectorUDAFSumDouble(col 
1:double) -> double, VectorUDAFVarFinal(col 
2:struct) -> double aggregation: 
stddev_pop, VectorUDAFVarFinal(col 
3:struct) -> double aggregation: 
stddev_samp, VectorUDAFVarFinal(col 
4:struct) -> double aggregation: 
var_samp, VectorUDAFAvgFinal(col 5:struct) 
-> double, VectorUDAFVarFinal(col 
6:struct) -> double aggregation: 
stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, 
VectorUDAFCountMerge(col 8:bigint) -> bigint
-className: VectorGroupByOperator
-groupByMode: MERGEPARTIAL
-native: false
-vectorProcessingMode: GLOBAL
-projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
 mode: mergepartial
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
 Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE 
Column stats: COMPLETE
 Select Operator
   expressions: _col0 (type: double), (_col0 + -3728.0) (type: 
double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: 
double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 
(type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 
+ -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0 (type: double), 
_col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + 
-3728.0 (type: double), ((_col2 - (- (- (_col0 + -3728.0 * _col2) 
(type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) 
(type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) 
(type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), 
(_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), 
(UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) 
(type: double)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 9, 11, 10, 14, 1, 12, 2, 
15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25]
-  selectExpressions: DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 
10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 
10:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: 
DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 10:double) -> 12:double) -> 10:double, 
DoubleColMultiplyDoubleColumn(col 12:double, col 13:double)(children: 
DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 

[1/2] hive git commit: HIVE-18737 : add an option to disable LLAP IO ACID for non-original files (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan)

2018-02-20 Thread sershe
Repository: hive
Updated Branches:
  refs/heads/master e51f7c9d2 -> 3df6bc28b


HIVE-18737 : add an option to disable LLAP IO ACID for non-original files 
(Sergey Shelukhin, reviewed by Gopal Vijayaraghavan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dbf38ed1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dbf38ed1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dbf38ed1

Branch: refs/heads/master
Commit: dbf38ed1434656f487f942b2f9df4fea3e29e44a
Parents: e51f7c9
Author: sergey 
Authored: Tue Feb 20 17:21:00 2018 -0800
Committer: sergey 
Committed: Tue Feb 20 17:21:00 2018 -0800

--
 common/src/java/org/apache/hadoop/hive/conf/HiveConf.java  | 1 +
 ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java | 8 +---
 2 files changed, 6 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/dbf38ed1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index b51dc7e..38f6430 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3120,6 +3120,7 @@ public class HiveConf extends Configuration {
 false, "Use Tez cartesian product edge to speed up cross product"),
 // The default is different on the client and server, so it's null here.
 LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer 
is enabled."),
+LLAP_IO_ACID_ENABLED("hive.llap.io.acid", true, "Whether the LLAP IO layer 
is enabled for ACID."),
 LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb",
 new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false),
 "The buffer size for a per-fragment LLAP debug trace. 0 to disable."),

http://git-wip-us.apache.org/repos/asf/hive/blob/dbf38ed1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
index 91d855b..96c5916 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
@@ -29,6 +29,7 @@ import java.util.List;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.ql.io.AcidInputFormat;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.ColumnarSplit;
@@ -234,13 +235,13 @@ public class OrcSplit extends FileSplit implements 
ColumnarSplit, LlapAwareSplit
   public boolean canUseLlapIo(Configuration conf) {
 final boolean hasDelta = deltas != null && !deltas.isEmpty();
 final boolean isAcidRead = AcidUtils.isFullAcidScan(conf);
-final boolean isVectorized = HiveConf.getBoolVar(conf,
-HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
+final boolean isVectorized = HiveConf.getBoolVar(conf, 
ConfVars.HIVE_VECTORIZATION_ENABLED);
 Boolean isSplitUpdate = null;
 if (isAcidRead) {
   final AcidUtils.AcidOperationalProperties acidOperationalProperties
   = AcidUtils.getAcidOperationalProperties(conf);
   isSplitUpdate = acidOperationalProperties.isSplitUpdate();
+  // TODO: this is brittle. Who said everyone has to upgrade using upgrade 
process?
   assert isSplitUpdate : "should be true in Hive 3.0";
 }
 
@@ -250,7 +251,8 @@ public class OrcSplit extends FileSplit implements 
ColumnarSplit, LlapAwareSplit
 return true;
   }
 } else {
-  if (isAcidRead && hasBase && isVectorized) {
+  boolean isAcidEnabled = HiveConf.getBoolVar(conf, 
ConfVars.LLAP_IO_ACID_ENABLED);
+  if (isAcidEnabled && isAcidRead && hasBase && isVectorized) {
 if (hasDelta) {
   if (isSplitUpdate) {
 // Base with delete deltas



[2/2] hive git commit: HIVE-18658 : WM: allow not specifying scheduling policy when creating a pool (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

2018-02-20 Thread sershe
HIVE-18658 : WM: allow not specifying scheduling policy when creating a pool 
(Sergey Shelukhin, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3df6bc28
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3df6bc28
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3df6bc28

Branch: refs/heads/master
Commit: 3df6bc28b7c4a3223a2771caa0058a63941cec39
Parents: dbf38ed
Author: sergey 
Authored: Tue Feb 20 17:25:38 2018 -0800
Committer: sergey 
Committed: Tue Feb 20 17:25:38 2018 -0800

--
 .../hive/ql/parse/DDLSemanticAnalyzer.java  |   7 +-
 .../test/queries/clientpositive/resourceplan.q  |   6 +-
 .../clientpositive/llap/resourceplan.q.out  | 256 +++
 .../results/clientpositive/llap/sysdb.q.out |   8 +-
 .../hive/metastore/MetaStoreDirectSql.java  |   2 +
 5 files changed, 227 insertions(+), 52 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/3df6bc28/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
index 8ad6fce..718faff 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
@@ -1135,8 +1135,8 @@ public class DDLSemanticAnalyzer extends 
BaseSemanticAnalyzer {
 
   private void analyzeCreatePool(ASTNode ast) throws SemanticException {
 // TODO: allow defaults for e.g. scheduling policy.
-if (ast.getChildCount() != 5) {
-  throw new SemanticException("Invalid syntax for create pool.");
+if (ast.getChildCount() < 3) {
+  throw new SemanticException("Expected more arguments: " + 
ast.toStringTree());
 }
 String rpName = unescapeIdentifier(ast.getChild(0).getText());
 String poolPath = poolPath(ast.getChild(1));
@@ -1168,6 +1168,9 @@ public class DDLSemanticAnalyzer extends 
BaseSemanticAnalyzer {
 if (!pool.isSetAllocFraction()) {
   throw new SemanticException("alloc_fraction should be specified for a 
pool");
 }
+if (!pool.isSetQueryParallelism()) {
+  throw new SemanticException("query_parallelism should be specified for a 
pool");
+}
 CreateOrAlterWMPoolDesc desc = new CreateOrAlterWMPoolDesc(pool, poolPath, 
false);
 addServiceOutput();
 rootTasks.add(TaskFactory.get(

http://git-wip-us.apache.org/repos/asf/hive/blob/3df6bc28/ql/src/test/queries/clientpositive/resourceplan.q
--
diff --git a/ql/src/test/queries/clientpositive/resourceplan.q 
b/ql/src/test/queries/clientpositive/resourceplan.q
index 7314585..009816d 100644
--- a/ql/src/test/queries/clientpositive/resourceplan.q
+++ b/ql/src/test/queries/clientpositive/resourceplan.q
@@ -1,5 +1,6 @@
 -- Continue on errors, we do check some error conditions below.
 set hive.cli.errors.ignore=true;
+set hive.test.authz.sstd.hs2.mode=true;
 
 -- Prevent NPE in calcite.
 set hive.cbo.enable=false;
@@ -210,8 +211,9 @@ SELECT * FROM SYS.WM_TRIGGERS;
 CREATE POOL plan_1.default WITH
ALLOC_FRACTION=1.0, QUERY_PARALLELISM=5, SCHEDULING_POLICY='default';
 
-CREATE POOL plan_2.default WITH
-   ALLOC_FRACTION=1.0, QUERY_PARALLELISM=5, SCHEDULING_POLICY='default';
+CREATE POOL plan_2.default WITH QUERY_PARALLELISM=5, 
SCHEDULING_POLICY='default';
+CREATE POOL plan_2.default WITH ALLOC_FRACTION=1.0;
+CREATE POOL plan_2.default WITH ALLOC_FRACTION=1.0, QUERY_PARALLELISM=5;
 SELECT * FROM SYS.WM_POOLS;
 
 CREATE POOL plan_2.default.c1 WITH

http://git-wip-us.apache.org/repos/asf/hive/blob/3df6bc28/ql/src/test/results/clientpositive/llap/resourceplan.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/resourceplan.q.out 
b/ql/src/test/results/clientpositive/llap/resourceplan.q.out
index b23720d..d790b44 100644
--- a/ql/src/test/results/clientpositive/llap/resourceplan.q.out
+++ b/ql/src/test/results/clientpositive/llap/resourceplan.q.out
@@ -66,9 +66,9 @@ default   srcpart hive_test_user  USER
DELETE  true-1  hive_test_user
 defaultsrcpart hive_test_user  USERINSERT  true
-1  hive_test_user
 defaultsrcpart hive_test_user  USERSELECT  true
-1  hive_test_user
 defaultsrcpart hive_test_user  USERUPDATE  true
-1  hive_test_user
-PREHOOK: query: DROP DATABASE IF EXISTS SYS
+PREHOOK: query: DROP DATABASE IF EXISTS SYS CASCADE
 PREHOOK: type: DROPDATABASE

hive git commit: HIVE-18742: Vectorization acid/inputformat check should allow NullRowsInputFormat/OneNullRowInputFormat (Jason Dere, reviewed by Sergey Shelukhin)

2018-02-20 Thread jdere
Repository: hive
Updated Branches:
  refs/heads/master 111ed0964 -> e51f7c9d2


HIVE-18742: Vectorization acid/inputformat check should allow 
NullRowsInputFormat/OneNullRowInputFormat (Jason Dere, reviewed by Sergey 
Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e51f7c9d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e51f7c9d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e51f7c9d

Branch: refs/heads/master
Commit: e51f7c9d277c8a1a7a289063b9bcf43ad6de8e99
Parents: 111ed09
Author: Jason Dere 
Authored: Tue Feb 20 12:49:16 2018 -0800
Committer: Jason Dere 
Committed: Tue Feb 20 12:49:16 2018 -0800

--
 .../hive/ql/optimizer/physical/Vectorizer.java  |  12 +-
 .../test/queries/clientpositive/acid_nullscan.q |  17 ++
 .../results/clientpositive/acid_nullscan.q.out  | 162 +++
 3 files changed, 190 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 27b53b8..52ef2d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -101,6 +101,8 @@ import 
org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport.Support;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
+import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -353,6 +355,14 @@ public class Vectorizer implements PhysicalPlanResolver {
 vectorDeserializeTextSupportSet.addAll(Arrays.asList(Support.values()));
   }
 
+  private static final Set supportedAcidInputFormats = new 
TreeSet();
+  static {
+supportedAcidInputFormats.add(OrcInputFormat.class.getName());
+// For metadataonly or empty rows optimizations, null/onerow input format 
can be selected.
+supportedAcidInputFormats.add(NullRowsInputFormat.class.getName());
+supportedAcidInputFormats.add(OneNullRowInputFormat.class.getName());
+  }
+
   private BaseWork currentBaseWork;
   private Operator currentOperator;
   private Collection vectorizedInputFormatExcludes;
@@ -1201,7 +1211,7 @@ public class Vectorizer implements PhysicalPlanResolver {
 // Today, ACID tables are only ORC and that format is vectorizable.  
Verify these
 // assumptions.
 Preconditions.checkState(isInputFileFormatVectorized);
-
Preconditions.checkState(inputFileFormatClassName.equals(OrcInputFormat.class.getName()));
+
Preconditions.checkState(supportedAcidInputFormats.contains(inputFileFormatClassName));
 
 if (!useVectorizedInputFileFormat) {
   enabledConditionsNotMetList.add("Vectorizing ACID tables requires "

http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/test/queries/clientpositive/acid_nullscan.q
--
diff --git a/ql/src/test/queries/clientpositive/acid_nullscan.q 
b/ql/src/test/queries/clientpositive/acid_nullscan.q
new file mode 100644
index 000..d048231
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/acid_nullscan.q
@@ -0,0 +1,17 @@
+
+set hive.mapred.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.vectorized.execution.enabled=true;
+
+CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS 
STORED AS ORC TBLPROPERTIES ('transactional'='true');
+insert into table acid_vectorized select cint, cstring1 from alltypesorc where 
cint is not null order by cint limit 10;
+insert into table acid_vectorized values (1, 'bar');
+
+explain extended
+select sum(a) from acid_vectorized where false;
+
+select sum(a) from acid_vectorized where false;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/test/results/clientpositive/acid_nullscan.q.out
--
diff --git a/ql/src/test/results/clientpositive/acid_nullscan.q.out 

hive git commit: HIVE-18625: SessionState Not Checking For Directory Creation Result (Andrew Sherman, reviewed by Sahil Takiar)

2018-02-20 Thread stakiar
Repository: hive
Updated Branches:
  refs/heads/master 03a1e6247 -> 111ed0964


HIVE-18625: SessionState Not Checking For Directory Creation Result (Andrew 
Sherman, reviewed by Sahil Takiar)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/111ed096
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/111ed096
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/111ed096

Branch: refs/heads/master
Commit: 111ed096496c37674601173cfeaa40cbf948f878
Parents: 03a1e62
Author: Andrew Sherman 
Authored: Tue Feb 20 11:28:20 2018 -0800
Committer: Sahil Takiar 
Committed: Tue Feb 20 11:28:34 2018 -0800

--
 .../hadoop/hive/ql/session/SessionState.java|  7 ++-
 .../hive/ql/session/TestSessionState.java   | 53 
 2 files changed, 58 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/111ed096/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java 
b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index dfc2dfa..0071a9a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -740,7 +740,8 @@ public class SessionState {
* @return
* @throws IOException
*/
-  private static void createPath(HiveConf conf, Path path, String permission, 
boolean isLocal,
+  @VisibleForTesting
+  static void createPath(HiveConf conf, Path path, String permission, boolean 
isLocal,
   boolean isCleanUp) throws IOException {
 FsPermission fsPermission = new FsPermission(permission);
 FileSystem fs;
@@ -750,7 +751,9 @@ public class SessionState {
   fs = path.getFileSystem(conf);
 }
 if (!fs.exists(path)) {
-  fs.mkdirs(path, fsPermission);
+  if (!fs.mkdirs(path, fsPermission)) {
+throw new IOException("Failed to create directory " + path + " on fs " 
+ fs.getUri());
+  }
   String dirType = isLocal ? "local" : "HDFS";
   LOG.info("Created " + dirType + " directory: " + path.toString());
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/111ed096/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java 
b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
index 8750196..0fa1c81 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.session;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.io.IOException;
@@ -27,6 +29,10 @@ import java.util.Arrays;
 import java.util.Collection;
 
 import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.ParentNotDirectoryException;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -257,4 +263,51 @@ public class TestSessionState {
   }
 }
   }
+
+  /**
+   * Unit test for SessionState.createPath().
+   */
+  @Test
+  public void testCreatePath() throws Exception {
+HiveConf conf = new HiveConf();
+LocalFileSystem localFileSystem = FileSystem.getLocal(conf);
+
+Path repeatedCreate = new Path("repeatedCreate");
+SessionState.createPath(conf, repeatedCreate, "700", true, true);
+assertTrue(localFileSystem.exists(repeatedCreate));
+// second time will complete silently
+SessionState.createPath(conf, repeatedCreate, "700", true, true);
+
+Path fileNotDirectory = new Path("fileNotDirectory");
+localFileSystem.create(fileNotDirectory);
+localFileSystem.deleteOnExit(fileNotDirectory);
+
+// Show we cannot create a child of a file
+try {
+  SessionState.createPath(conf, new Path(fileNotDirectory, "child"), 
"700", true, true);
+  fail("did not get expected exception creating a child of a file");
+} catch (ParentNotDirectoryException e) {
+  assertTrue(e.getMessage().contains("Parent path is not a directory"));
+}
+
+// Show we cannot create a child of a null directory
+try {
+  //noinspection ConstantConditions
+  SessionState.createPath(conf, new Path((String) null, "child"), "700", 
true, true);
+  fail("did 

hive git commit: HIVE-18541 : Secure HS2 web UI with PAM (Oleksiy Sayankin via Szehon)

2018-02-20 Thread szehon
Repository: hive
Updated Branches:
  refs/heads/master e05e0fa19 -> 03a1e6247


HIVE-18541 : Secure HS2 web UI with PAM (Oleksiy Sayankin via Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/03a1e624
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/03a1e624
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/03a1e624

Branch: refs/heads/master
Commit: 03a1e624760f15c57bed04cba9ff6f3a5f1846c4
Parents: e05e0fa
Author: Szehon Ho 
Authored: Tue Feb 20 20:12:59 2018 +0100
Committer: Szehon Ho 
Committed: Tue Feb 20 20:12:59 2018 +0100

--
 common/pom.xml  |  20 ++
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +
 .../java/org/apache/hive/http/HttpServer.java   |  45 -
 .../hive/http/security/PamAuthenticator.java| 140 ++
 .../hive/http/security/PamConstraint.java   |  34 
 .../http/security/PamConstraintMapping.java |  27 +++
 .../hive/http/security/PamLoginService.java |  78 
 .../hive/http/security/PamUserIdentity.java |  38 
 .../apache/hive/service/server/HiveServer2.java |  25 +++
 .../service/server/TestHS2HttpServerPam.java| 182 +++
 .../TestHS2HttpServerPamConfiguration.java  | 128 +
 11 files changed, 718 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/03a1e624/common/pom.xml
--
diff --git a/common/pom.xml b/common/pom.xml
index aaeecc0..4da46f2 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -79,6 +79,11 @@
 
 
   org.eclipse.jetty
+  jetty-http
+  ${jetty.version}
+
+
+  org.eclipse.jetty
   jetty-rewrite
 
 
@@ -160,6 +165,21 @@
   

 
+  net.sf.jpam
+  jpam
+  ${jpam.version}
+  
+
+  org.slf4j
+  slf4j-log4j12
+
+
+  commmons-logging
+  commons-logging
+
+  
+
+
   org.apache.hadoop
   hadoop-mapreduce-client-core
   ${hadoop.version}

http://git-wip-us.apache.org/repos/asf/hive/blob/03a1e624/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 3d777f9..b51dc7e 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2450,6 +2450,8 @@ public class HiveConf extends Configuration {
 "the value of hive.server2.webui.host or the correct host name."),
 
HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES("hive.server2.webui.max.historic.queries",
 25,
 "The maximum number of past queries to show in HiverSever2 WebUI."),
+HIVE_SERVER2_WEBUI_USE_PAM("hive.server2.webui.use.pam", false,
+"If true, the HiveServer2 WebUI will be secured with PAM."),
 
 // Tez session settings
 HIVE_SERVER2_TEZ_INTERACTIVE_QUEUE("hive.server2.tez.interactive.queue", 
"",

http://git-wip-us.apache.org/repos/asf/hive/blob/03a1e624/common/src/java/org/apache/hive/http/HttpServer.java
--
diff --git a/common/src/java/org/apache/hive/http/HttpServer.java 
b/common/src/java/org/apache/hive/http/HttpServer.java
index 2a8f7ae..71b2668 100644
--- a/common/src/java/org/apache/hive/http/HttpServer.java
+++ b/common/src/java/org/apache/hive/http/HttpServer.java
@@ -21,6 +21,7 @@ package org.apache.hive.http;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.URL;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
@@ -45,6 +46,10 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authentication.server.AuthenticationFilter;
 import org.apache.hadoop.security.authorize.AccessControlList;
 import org.apache.hadoop.hive.common.classification.InterfaceAudience;
+import org.apache.hive.http.security.PamAuthenticator;
+import org.apache.hive.http.security.PamConstraint;
+import org.apache.hive.http.security.PamConstraintMapping;
+import org.apache.hive.http.security.PamLoginService;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.core.Appender;
 import org.apache.logging.log4j.core.Logger;
@@ -54,7 +59,11 @@ import org.apache.logging.log4j.core.appender.FileManager;
 import org.apache.logging.log4j.core.appender.OutputStreamManager;
 import org.eclipse.jetty.rewrite.handler.RewriteHandler;
 import 

hive git commit: HIVE-16125 : Split work between reducers. (Slim Bouguerra via Ashutosh Chauhan)

2018-02-20 Thread hashutosh
Repository: hive
Updated Branches:
  refs/heads/master e0bf12d98 -> e05e0fa19


HIVE-16125 : Split work between reducers. (Slim Bouguerra via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e05e0fa1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e05e0fa1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e05e0fa1

Branch: refs/heads/master
Commit: e05e0fa19d7fd7c48617c4a770fa579b7f01f40e
Parents: e0bf12d
Author: Slim Bouguerra 
Authored: Thu Feb 8 20:46:00 2018 -0800
Committer: Ashutosh Chauhan 
Committed: Tue Feb 20 10:34:11 2018 -0800

--
 .../org/apache/hadoop/hive/conf/Constants.java  |   3 +
 .../hadoop/hive/druid/io/DruidOutputFormat.java |  12 +-
 .../hadoop/hive/druid/io/DruidRecordWriter.java |  72 ++-
 .../hadoop/hive/druid/serde/DruidSerDe.java |  26 +-
 .../test/resources/testconfiguration.properties |   3 +-
 ...tedDynPartitionTimeGranularityOptimizer.java | 237 ---
 .../druidmini_dynamic_partition.q   | 170 +
 .../druid/druidmini_dynamic_partition.q.out | 625 +++
 8 files changed, 1038 insertions(+), 110 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e05e0fa1/common/src/java/org/apache/hadoop/hive/conf/Constants.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java 
b/common/src/java/org/apache/hadoop/hive/conf/Constants.java
index 51408b1..10aaee1 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java
@@ -33,7 +33,10 @@ public class Constants {
   public static final String DRUID_DATA_SOURCE = "druid.datasource";
   public static final String DRUID_SEGMENT_GRANULARITY = 
"druid.segment.granularity";
   public static final String DRUID_QUERY_GRANULARITY = 
"druid.query.granularity";
+  public static final String DRUID_TARGET_SHARDS_PER_GRANULARITY =
+  "druid.segment.targetShardsPerGranularity";
   public static final String DRUID_TIMESTAMP_GRANULARITY_COL_NAME = 
"__time_granularity";
+  public static final String DRUID_SHARD_KEY_COL_NAME = 
"__druid_extra_partition_key";
   public static final String DRUID_QUERY_JSON = "druid.query.json";
   public static final String DRUID_QUERY_TYPE = "druid.query.type";
   public static final String DRUID_QUERY_FETCH = "druid.query.fetch";

http://git-wip-us.apache.org/repos/asf/hive/blob/e05e0fa1/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java
--
diff --git 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java
index 8c25d62..b758efd 100644
--- 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java
+++ 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java
@@ -92,6 +92,10 @@ public class DruidOutputFormat implements 
HiveOutputFormat 0 ? -1 : 
HiveConf
+.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_PARTITION_SIZE);
 // If datasource is in the table properties, it is an INSERT/INSERT 
OVERWRITE as the datasource
 // name was already persisted. Otherwise, it is a CT/CTAS and we need to 
get the name from the
 // job properties that are set by configureOutputJobProperties in the 
DruidStorageHandler
@@ -191,8 +195,10 @@ public class DruidOutputFormat implements 
HiveOutputFormat