Repository: hive Updated Branches: refs/heads/master ee5566b75 -> 9b376a7b0
http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out index 9e59594..07616bb 100644 --- a/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out @@ -80,7 +80,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: interval_arithmetic_1 - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -91,7 +91,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8] selectExpressions: DateColSubtractIntervalYearMonthScalar(col 0:date, val 2-2) -> 3:date, DateColSubtractIntervalYearMonthScalar(col 0:date, val -2-2) -> 4:date, DateColAddIntervalYearMonthScalar(col 0:date, val 2-2) -> 5:date, DateColAddIntervalYearMonthScalar(col 0:date, val -2-2) -> 6:date, IntervalYearMonthScalarAddDateColumn(val -2-2, col 0:interval_year_month) -> 7:date, IntervalYearMonthScalarAddDateColumn(val 2-2, col 0:interval_year_month) -> 8:date - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 19600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) sort order: + @@ -99,7 +99,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 19600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs @@ -128,13 +128,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 19600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 19600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -262,7 +262,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: interval_arithmetic_1 - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -273,7 +273,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 3, 4, 5] selectExpressions: DateColSubtractDateScalar(col 0:date, val 1999-06-07) -> 3:interval_day_time, DateScalarSubtractDateColumn(val 1999-06-07, col 0:date) -> 4:interval_day_time, DateColSubtractDateColumn(col 0:date, col 0:date) -> 5:interval_day_time - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 4600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) sort order: + @@ -281,7 +281,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 4600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs @@ -310,13 +310,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 4600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 4600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -444,7 +444,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: interval_arithmetic_1 - Statistics: Num rows: 50 Data size: 1960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -455,7 +455,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3, 4, 5, 6, 7, 8] selectExpressions: TimestampColSubtractIntervalYearMonthScalar(col 1:timestamp, val 2-2) -> 3:timestamp, TimestampColSubtractIntervalYearMonthScalar(col 1:timestamp, val -2-2) -> 4:timestamp, TimestampColAddIntervalYearMonthScalar(col 1:timestamp, val 2-2) -> 5:timestamp, TimestampColAddIntervalYearMonthScalar(col 1:timestamp, val -2-2) -> 6:timestamp, IntervalYearMonthScalarAddTimestampColumn(val -2-2, col 1:interval_year_month) -> 7:timestamp, IntervalYearMonthScalarAddTimestampColumn(val 2-2, col 1:interval_year_month) -> 8:timestamp - Statistics: Num rows: 50 Data size: 1960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + @@ -463,7 +463,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 50 Data size: 1960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs @@ -492,13 +492,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 50 Data size: 1960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 1960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -733,7 +733,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: interval_arithmetic_1 - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -744,7 +744,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8] selectExpressions: DateColSubtractIntervalDayTimeScalar(col 0:date, val 99 11:22:33.123456789) -> 3:timestamp, DateColSubtractIntervalDayTimeScalar(col 0:date, val -99 11:22:33.123456789) -> 4:timestamp, DateColAddIntervalDayTimeScalar(col 0:date, val 99 11:22:33.123456789) -> 5:timestamp, DateColAddIntervalDayTimeScalar(col 0:date, val -99 11:22:33.123456789) -> 6:timestamp, IntervalDayTimeScalarAddDateColumn(val -99 11:22:33.123456789, col 0:date) -> 7:timestamp, IntervalDayTimeScalarAddDateColumn(val 99 11:22:33.123456789, col 0:date) -> 8:timestamp - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) sort order: + @@ -752,7 +752,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14800 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs @@ -781,13 +781,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -917,7 +917,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: interval_arithmetic_1 - Statistics: Num rows: 50 Data size: 4704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -928,7 +928,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 3, 4, 5] selectExpressions: DateColSubtractTimestampColumn(col 0:date, col 1:timestamp) -> 3:interval_day_time, TimestampColSubtractDateColumn(col 1:timestamp, col 0:date) -> 4:interval_day_time, TimestampColSubtractTimestampColumn(col 1:timestamp, col 1:timestamp) -> 5:interval_day_time - Statistics: Num rows: 50 Data size: 4704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 6600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) sort order: + @@ -936,7 +936,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 50 Data size: 4704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 6600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: timestamp), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time) Execution mode: vectorized, llap LLAP IO: all inputs @@ -965,13 +965,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4] - Statistics: Num rows: 50 Data size: 4704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 6600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 4704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 6600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1101,7 +1101,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: interval_arithmetic_1 - Statistics: Num rows: 50 Data size: 1960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1112,7 +1112,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3, 4, 5, 6, 7, 8] selectExpressions: TimestampColSubtractIntervalDayTimeScalar(col 1:timestamp, val 99 11:22:33.123456789) -> 3:timestamp, TimestampColSubtractIntervalDayTimeScalar(col 1:timestamp, val -99 11:22:33.123456789) -> 4:timestamp, TimestampColAddIntervalDayTimeScalar(col 1:timestamp, val 99 11:22:33.123456789) -> 5:timestamp, TimestampColAddIntervalDayTimeScalar(col 1:timestamp, val -99 11:22:33.123456789) -> 6:timestamp, IntervalDayTimeScalarAddTimestampColumn(val -99 11:22:33.123456789, col 1:timestamp) -> 7:timestamp, IntervalDayTimeScalarAddTimestampColumn(val 99 11:22:33.123456789, col 1:timestamp) -> 8:timestamp - Statistics: Num rows: 50 Data size: 1960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + @@ -1120,7 +1120,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 50 Data size: 1960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1149,13 +1149,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 50 Data size: 1960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 1960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 14000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out b/ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out index 0de0c33..db98194 100644 --- a/ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out @@ -128,7 +128,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vectortab2korc_n6 - Statistics: Num rows: 2000 Data size: 22812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 22860 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -138,7 +138,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3, 7] - Statistics: Num rows: 2000 Data size: 22812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 22860 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(b) Group By Vectorization: @@ -152,7 +152,7 @@ STAGE PLANS: keys: bo (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 22812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -161,7 +161,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2000 Data size: 22812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -196,7 +196,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 11406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: - @@ -204,7 +204,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 11406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -222,13 +222,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1000 Data size: 11406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1000 Data size: 11406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 57a1ea7..5dd8c98 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -1898,7 +1898,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_timestamp - Statistics: Num rows: 137 Data size: 96472 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1908,13 +1908,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2133,7 +2133,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_timestamp - Statistics: Num rows: 137 Data size: 96472 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -2143,7 +2143,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 96472 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ @@ -2151,7 +2151,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 137 Data size: 96472 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Execution mode: vectorized, llap @@ -2181,13 +2181,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ @@ -2195,7 +2195,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Reducer 3 @@ -2214,19 +2214,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4323,7 +4323,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_timestamp - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -4333,13 +4333,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4558,7 +4558,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_timestamp - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -4568,7 +4568,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ @@ -4576,7 +4576,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Execution mode: vectorized, llap @@ -4606,13 +4606,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ @@ -4620,7 +4620,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Reducer 3 @@ -4639,19 +4639,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out b/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out index a3ad696..f8e2e5c 100644 --- a/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out @@ -127,7 +127,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vectortab2korc_n7 - Statistics: Num rows: 2000 Data size: 15208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 15344 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -137,7 +137,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 2000 Data size: 15208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 15344 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -145,7 +145,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2000 Data size: 15208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 15344 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -173,13 +173,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 2000 Data size: 15208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 15344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2000 Data size: 15208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 15344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/ql/src/test/results/clientpositive/llap/vector_reduce2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_reduce2.q.out b/ql/src/test/results/clientpositive/llap/vector_reduce2.q.out index 8ff51ac..fb30ae1 100644 --- a/ql/src/test/results/clientpositive/llap/vector_reduce2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_reduce2.q.out @@ -127,7 +127,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vectortab2korc_n5 - Statistics: Num rows: 2000 Data size: 707172 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 387636 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -137,7 +137,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [8, 2, 9] - Statistics: Num rows: 2000 Data size: 707172 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 387636 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ @@ -145,7 +145,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2000 Data size: 707172 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 387636 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -173,13 +173,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 2000 Data size: 707172 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 387636 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2000 Data size: 707172 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 387636 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/ql/src/test/results/clientpositive/llap/vector_reduce3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_reduce3.q.out b/ql/src/test/results/clientpositive/llap/vector_reduce3.q.out index e26c8b2..03fa93e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_reduce3.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_reduce3.q.out @@ -127,7 +127,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vectortab2korc_n2 - Statistics: Num rows: 2000 Data size: 349784 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 188000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -137,7 +137,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [8] - Statistics: Num rows: 2000 Data size: 349784 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 188000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -145,7 +145,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2000 Data size: 349784 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 188000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -173,13 +173,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 2000 Data size: 349784 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 188000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2000 Data size: 349784 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 188000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index 39578d2..f938a09 100644 --- a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -340,7 +340,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vectortab2korc_n0 - Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 106288 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -351,11 +351,11 @@ STAGE PLANS: native: true projectedOutputColumnNums: [20] selectExpressions: StringGroupConcatColCol(col 18:string, col 19:string)(children: StringGroupColConcatStringScalar(col 19:string, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 18:string)(children: CastLongToString(col 14:int)(children: CastDoubleToLong(col 16:double)(children: DoubleColAddDoubleScalar(col 17:double, val 1.0)(children: DoubleColDivideDoubleScalar(col 16:double, val 3.0)(children: CastLongToDouble(col 15:int)(children: LongColSubtractLongScalar(col 14:int, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 14:int) -> 15:int) -> 16:double) -> 17:double) -> 16:double) -> 14:int) -> 18:string) -> 19:string) -> 18:string, CastLongToString(col 14:int)(children: VectorUDFYearDate(col 12, field YEAR) -> 14:int) -> 19:string) -> 20:string - Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 106288 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: + keys: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 106288 Basic stats: COMPLETE Column stats: COMPLETE top n: 50 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -372,7 +372,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -381,7 +381,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -414,7 +414,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 53228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -422,7 +422,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 53228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -440,19 +440,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1000 Data size: 53228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 50 Data size: 2650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 2650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index 15cd648..41f1a5e 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -261,7 +261,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_udf_flight_orc - Statistics: Num rows: 137 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 5480 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -272,13 +272,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 3:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 4:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 7:int, VectorUDFDayOfWeekTimestamp(col 1:timestamp, field DAY_OF_WEEK) -> 8:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, CastTimestampToDate(col 1:timestamp) -> 10:date, VectorUDFDateTimestamp(col 1:timestamp) -> 11:date, VectorUDFDateAddColScalar(col 1:timestamp, val 2) -> 12:date, VectorUDFDateSubColScalar(col 1:timestamp, val 2) -> 13:date, VectorUDFDateDiffColScalar(col 1:timestamp, val 2000-01-01) -> 14:int, VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 15:int, VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 16:int, VectorUDFDateDiffColScalar(col 1:times tamp, val NULL) -> 17:int, VectorUDFDateDiffColScalar(col 1:timestamp, val 2007-03-14) -> 18:int, VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 19:int, VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 20:int, VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 21:int - Statistics: Num rows: 137 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 44936 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 44936 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -553,7 +553,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_udf_flight_orc - Statistics: Num rows: 137 Data size: 7392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -564,13 +564,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 0, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] selectExpressions: VectorUDFUnixTimeStampDate(col 0) -> 3:bigint, VectorUDFYearDate(col 0, field YEAR) -> 4:int, VectorUDFMonthDate(col 0, field MONTH) -> 5:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 7:int, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 8:int, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 9:int, VectorUDFDateAddColScalar(col 0:date, val 2) -> 10:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 11:date, VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 12:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 13:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 14:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 15:int, VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 16:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 17:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 18:int, VectorUDFDateDiffColSca lar(col 0:date, val NULL) -> 19:int - Statistics: Num rows: 137 Data size: 7392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 47128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 7392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 47128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -849,7 +849,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_udf_flight_orc - Statistics: Num rows: 137 Data size: 12672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -860,13 +860,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 0, 5, 6, 7, 8, 9, 10, 4, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] selectExpressions: LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 3:int, VectorUDFYearDate(col 0, field YEAR) -> 4:int) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 3:int, VectorUDFMonthDate(col 0, field MONTH) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfWeekTimestamp(col 1:timestamp, field DAY_OF_WEEK) -> 3:int, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 4 :int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:date, col 0:date)(children: CastTimestampToDate(col 1:timestamp) -> 3:date) -> 4:boolean, LongColEqualLongColumn(col 3:date, col 0:date)(children: VectorUDFDateTimestamp(col 1:timestamp) -> 3:date, col 0:date) -> 11:boolean, LongColEqualLongColumn(col 3:date, col 12:date)(children: VectorUDFDateAddColScalar(col 1:timestamp, val 2) -> 3:date, VectorUDFDateAddColScalar(col 0:date, val 2) -> 12:date) -> 13:boolean, LongColEqualLongColumn(col 3:date, col 12:date)(children: VectorUDFDateSubColScalar(col 1:timestamp, val 2) -> 3:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 12:date) -> 14:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val 2000-01-01) -> 3:int, VectorUD FDateDiffColScalar(col 0:date, val 2000-01-01) -> 12:int) -> 15:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 16:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 17:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 18:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val 2007-03-14) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 12:int) -> 19:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDat eDiffColScalar(col 0:date, val NULL) -> 12:int) -> 20:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 21:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 22:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 23:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 24:boolean - Statistics: Num rows: 137 Data size: 12672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 24112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 12672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 24112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1117,7 +1117,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_udf_flight_orc - Statistics: Num rows: 137 Data size: 7392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1128,19 +1128,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 3, 4, 6, 7, 9] selectExpressions: VectorUDFDateAddColScalar(col 0:date, val 2) -> 3:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 4:date, VectorUDFDateDiffColCol(col 0:date, col 5:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 5:date) -> 6:int, VectorUDFDateDiffColCol(col 0:date, col 5:date)(children: VectorUDFDateSubColScalar(col 0:date, val 2) -> 5:date) -> 7:int, VectorUDFDateDiffColCol(col 5:date, col 8:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 5:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 8:date) -> 9:int - Statistics: Num rows: 137 Data size: 7392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 24660 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1244,7 +1244,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_udf_flight_orc - Statistics: Num rows: 137 Data size: 7392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1254,7 +1254,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 137 Data size: 7392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(fl_date), max(fl_date), count(fl_date), count() Group By Vectorization: @@ -1266,14 +1266,14 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1306,7 +1306,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) sort order: + @@ -1314,7 +1314,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1332,13 +1332,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/ql/src/test/results/clientpositive/test_teradatabinaryfile.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/test_teradatabinaryfile.q.out b/ql/src/test/results/clientpositive/test_teradatabinaryfile.q.out index 9db1372..a6ba774 100644 --- a/ql/src/test/results/clientpositive/test_teradatabinaryfile.q.out +++ b/ql/src/test/results/clientpositive/test_teradatabinaryfile.q.out @@ -444,7 +444,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"test_date\":\"true\",\"test_decimal\":\"true\",\"test_timestamp\":\"true\",\"test_tinyint\":\"true\"}} bucketing_version 2 numFiles 1 numRows 50 http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java index 3e0ce07..bcdb56d 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java @@ -19,23 +19,22 @@ package org.apache.hadoop.hive.metastore.columnstats.merge; +import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.dateInspectorFromStats; + import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Date; import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; -import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.dateInspectorFromStats; - public class DateColumnStatsMerger extends ColumnStatsMerger { @Override public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { DateColumnStatsDataInspector aggregateData = dateInspectorFromStats(aggregateColStats); DateColumnStatsDataInspector newData = dateInspectorFromStats(newColStats); - Date lowValue = aggregateData.getLowValue().compareTo(newData.getLowValue()) < 0 ? aggregateData - .getLowValue() : newData.getLowValue(); + + Date lowValue = min(aggregateData.getLowValue(), newData.getLowValue()); aggregateData.setLowValue(lowValue); - Date highValue = aggregateData.getHighValue().compareTo(newData.getHighValue()) >= 0 ? aggregateData - .getHighValue() : newData.getHighValue(); + Date highValue = max(aggregateData.getHighValue(), newData.getHighValue()); aggregateData.setHighValue(highValue); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { @@ -56,4 +55,26 @@ public class DateColumnStatsMerger extends ColumnStatsMerger { aggregateData.setNumDVs(ndv); } } + + private Date min(Date v1, Date v2) { + if (v1 == null || v2 == null) { + if (v1 != null) { + return v1; + } else { + return v2; + } + } + return v1.compareTo(v2) < 0 ? v1 : v2; + } + + private Date max(Date v1, Date v2) { + if (v1 == null || v2 == null) { + if (v1 != null) { + return v1; + } else { + return v2; + } + } + return v1.compareTo(v2) > 0 ? v1 : v2; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMergerTest.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMergerTest.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMergerTest.java new file mode 100644 index 0000000..e41339d --- /dev/null +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMergerTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.merge; + +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Date; +import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; +import org.junit.Assert; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(MetastoreUnitTest.class) +public class DateColumnStatsMergerTest { + + private static final Date DATE_1 = new Date(1); + private static final Date DATE_2 = new Date(2); + private static final Date DATE_3 = new Date(3); + + private ColumnStatsMerger merger = new DateColumnStatsMerger(); + + @Test + public void testMergeNullMinMaxValues() { + ColumnStatisticsObj old = new ColumnStatisticsObj(); + createData(old, null, null); + + merger.merge(old, old); + + Assert.assertNull(old.getStatsData().getDateStats().getLowValue()); + Assert.assertNull(old.getStatsData().getDateStats().getHighValue()); + } + + @Test + public void testMergeNulls() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, null, null); + + ColumnStatisticsObj newObj; + + newObj = new ColumnStatisticsObj(); + createData(newObj, null, null); + merger.merge(oldObj, newObj); + + Assert.assertEquals(null, oldObj.getStatsData().getDateStats().getLowValue()); + Assert.assertEquals(null, oldObj.getStatsData().getDateStats().getHighValue()); + + newObj = new ColumnStatisticsObj(); + createData(newObj, DATE_1, DATE_3); + merger.merge(oldObj, newObj); + + newObj = new ColumnStatisticsObj(); + createData(newObj, null, null); + merger.merge(oldObj, newObj); + + Assert.assertEquals(DATE_1, oldObj.getStatsData().getDateStats().getLowValue()); + Assert.assertEquals(DATE_3, oldObj.getStatsData().getDateStats().getHighValue()); + } + + @Test + public void testMergeNonNullAndNullLowerValuesNewIsNull() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, DATE_2, DATE_2); + + ColumnStatisticsObj newObj; + + newObj = new ColumnStatisticsObj(); + createData(newObj, DATE_3, DATE_3); + merger.merge(oldObj, newObj); + + newObj = new ColumnStatisticsObj(); + createData(newObj, DATE_1, DATE_1); + merger.merge(oldObj, newObj); + + Assert.assertEquals(DATE_1, oldObj.getStatsData().getDateStats().getLowValue()); + Assert.assertEquals(DATE_3, oldObj.getStatsData().getDateStats().getHighValue()); + } + + private DateColumnStatsDataInspector createData(ColumnStatisticsObj objNulls, Date lowValue, + Date highValue) { + ColumnStatisticsData statisticsData = new ColumnStatisticsData(); + DateColumnStatsDataInspector data = new DateColumnStatsDataInspector(); + + statisticsData.setDateStats(data); + objNulls.setStatsData(statisticsData); + + data.setLowValue(lowValue); + data.setHighValue(highValue); + return data; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/9b376a7b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java index 8ec3a2f..ca0a6c0 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java @@ -19,8 +19,6 @@ package org.apache.hadoop.hive.metastore.columnstats.merge; -import java.nio.ByteBuffer; - import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;