Repository: hive Updated Branches: refs/heads/master 64c96e1e9 -> 4156c5da5
HIVE-13621: compute stats in certain cases fails with NPE (Vikram Dixit K, Pengcheng Xiong, reviewed by Gunther Hagleitner) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4156c5da Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4156c5da Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4156c5da Branch: refs/heads/master Commit: 4156c5da5099e3fa9b220229fe99ef0d609cd7ac Parents: 64c96e1 Author: Pengcheng Xiong <[email protected]> Authored: Thu May 12 10:17:21 2016 -0700 Committer: Pengcheng Xiong <[email protected]> Committed: Thu May 12 10:17:21 2016 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../hadoop/hive/metastore/hbase/HBaseUtils.java | 27 +-- .../apache/hadoop/hive/ql/exec/Operator.java | 2 +- .../stats/annotation/StatsRulesProcFactory.java | 1 + .../test/queries/clientpositive/deleteAnalyze.q | 31 ++++ .../results/clientpositive/deleteAnalyze.q.out | 173 +++++++++++++++++++ .../clientpositive/tez/deleteAnalyze.q.out | 140 +++++++++++++++ 7 files changed, 363 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 5aedd10..c891d40 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -70,6 +70,7 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\ smb_mapjoin_8.q minitez.query.files.shared=acid_globallimit.q,\ + deleteAnalyze.q,\ empty_join.q,\ alter_merge_2_orc.q,\ alter_merge_orc.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java index e0b449b..d1cff06 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java @@ -1205,17 +1205,22 @@ public class HBaseUtils { if (decimalData.isSetBitVectors()) { builder.setBitVectors(decimalData.getBitVectors()); } - builder.setDecimalStats( - HbaseMetastoreProto.ColumnStats.DecimalStats - .newBuilder() - .setLowValue( - HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder() - .setUnscaled(ByteString.copyFrom(decimalData.getLowValue().getUnscaled())) - .setScale(decimalData.getLowValue().getScale()).build()) - .setHighValue( - HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder() - .setUnscaled(ByteString.copyFrom(decimalData.getHighValue().getUnscaled())) - .setScale(decimalData.getHighValue().getScale()).build())).build(); + if (decimalData.getLowValue() != null && decimalData.getHighValue() != null) { + builder.setDecimalStats( + HbaseMetastoreProto.ColumnStats.DecimalStats + .newBuilder() + .setLowValue( + HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder() + .setUnscaled(ByteString.copyFrom(decimalData.getLowValue().getUnscaled())) + .setScale(decimalData.getLowValue().getScale()).build()) + .setHighValue( + HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder() + .setUnscaled(ByteString.copyFrom(decimalData.getHighValue().getUnscaled())) + .setScale(decimalData.getHighValue().getScale()).build())).build(); + } else { + builder.setDecimalStats(HbaseMetastoreProto.ColumnStats.DecimalStats.newBuilder().clear() + .build()); + } break; default: http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index f330564..636f079 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -412,7 +412,7 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C } /** - * This metod can be used to retrieve the results from async operations + * This method can be used to retrieve the results from async operations * started at init time - before the operator pipeline is started. * * @param os http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 320dc10..3944e10 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1792,6 +1792,7 @@ public class StatsRulesProcFactory { } } + denom = denom == 0 ? 1 : denom; factor = (double) max / (double) denom; for (int i = 0; i < rowCountParents.size(); i++) { http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/ql/src/test/queries/clientpositive/deleteAnalyze.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/deleteAnalyze.q b/ql/src/test/queries/clientpositive/deleteAnalyze.q new file mode 100644 index 0000000..7e5371c --- /dev/null +++ b/ql/src/test/queries/clientpositive/deleteAnalyze.q @@ -0,0 +1,31 @@ +set hive.stats.autogather=true; + +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/testdeci2; + +create table testdeci2( +id int, +amount decimal(10,3), +sales_tax decimal(10,3), +item string) +stored as orc location '${system:test.tmp.dir}/testdeci2'; + +insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2'); + +describe formatted testdeci2; + +dfs -rmr ${system:test.tmp.dir}/testdeci2/000000_0; + +describe formatted testdeci2 amount; + +analyze table testdeci2 compute statistics for columns; + +set hive.stats.fetch.column.stats=true; + +analyze table testdeci2 compute statistics for columns; + +explain +select s.id, +coalesce(d.amount,0) as sales, +coalesce(d.sales_tax,0) as tax +from testdeci2 s join testdeci2 d +on s.item=d.item and d.id=2; http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/ql/src/test/results/clientpositive/deleteAnalyze.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out new file mode 100644 index 0000000..7b9391b --- /dev/null +++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -0,0 +1,173 @@ +PREHOOK: query: create table testdeci2( +id int, +amount decimal(10,3), +sales_tax decimal(10,3), +item string) +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@testdeci2 +POSTHOOK: query: create table testdeci2( +id int, +amount decimal(10,3), +sales_tax decimal(10,3), +item string) +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testdeci2 +PREHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@testdeci2 +POSTHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@testdeci2 +POSTHOOK: Lineage: testdeci2.amount EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: testdeci2.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: testdeci2.item SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: testdeci2.sales_tax EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: describe formatted testdeci2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testdeci2 +POSTHOOK: query: describe formatted testdeci2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testdeci2 +# col_name data_type comment + +id int +amount decimal(10,3) +sales_tax decimal(10,3) +item string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + totalSize 578 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +#### A masked pattern was here #### +PREHOOK: query: describe formatted testdeci2 amount +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testdeci2 +POSTHOOK: query: describe formatted testdeci2 amount +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testdeci2 +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +amount decimal(10,3) from deserializer +PREHOOK: query: analyze table testdeci2 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table testdeci2 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +PREHOOK: query: analyze table testdeci2 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table testdeci2 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +PREHOOK: query: explain +select s.id, +coalesce(d.amount,0) as sales, +coalesce(d.sales_tax,0) as tax +from testdeci2 s join testdeci2 d +on s.item=d.item and d.id=2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select s.id, +coalesce(d.amount,0) as sales, +coalesce(d.sales_tax,0) as tax +from testdeci2 s join testdeci2 d +on s.item=d.item and d.id=2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: item is not null (type: boolean) + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: int), item (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + TableScan + alias: s + Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((id = 2) and item is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: amount (type: decimal(10,3)), sales_tax (type: decimal(10,3)), item (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(10,3)), _col2 (type: decimal(10,3)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col3, _col4 + Statistics: Num rows: 5 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), COALESCE(_col3,0) (type: decimal(13,3)), COALESCE(_col4,0) (type: decimal(13,3)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/ql/src/test/results/clientpositive/tez/deleteAnalyze.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/tez/deleteAnalyze.q.out new file mode 100644 index 0000000..47f2a20 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/deleteAnalyze.q.out @@ -0,0 +1,140 @@ +PREHOOK: query: create table testdeci2( +id int, +amount decimal(10,3), +sales_tax decimal(10,3), +item string) +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@testdeci2 +POSTHOOK: query: create table testdeci2( +id int, +amount decimal(10,3), +sales_tax decimal(10,3), +item string) +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testdeci2 +PREHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@testdeci2 +POSTHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@testdeci2 +POSTHOOK: Lineage: testdeci2.amount EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: testdeci2.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: testdeci2.item SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: testdeci2.sales_tax EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: describe formatted testdeci2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testdeci2 +POSTHOOK: query: describe formatted testdeci2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testdeci2 +# col_name data_type comment + +id int +amount decimal(10,3) +sales_tax decimal(10,3) +item string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + totalSize 578 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +#### A masked pattern was here #### +PREHOOK: query: describe formatted testdeci2 amount +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testdeci2 +POSTHOOK: query: describe formatted testdeci2 amount +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testdeci2 +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +amount decimal(10,3) from deserializer +PREHOOK: query: analyze table testdeci2 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table testdeci2 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +PREHOOK: query: analyze table testdeci2 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table testdeci2 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testdeci2 +#### A masked pattern was here #### +PREHOOK: query: explain +select s.id, +coalesce(d.amount,0) as sales, +coalesce(d.sales_tax,0) as tax +from testdeci2 s join testdeci2 d +on s.item=d.item and d.id=2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select s.id, +coalesce(d.amount,0) as sales, +coalesce(d.sales_tax,0) as tax +from testdeci2 s join testdeci2 d +on s.item=d.item and d.id=2 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_10] + Select Operator [SEL_9] (rows=5 width=228) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_15] (rows=5 width=228) + Conds:RS_6._col1=RS_7._col3(Inner),Output:["_col0","_col3","_col4"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_6] + PartitionCols:_col1 + Select Operator [SEL_2] (rows=5 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_13] (rows=5 width=88) + predicate:item is not null + TableScan [TS_0] (rows=5 width=88) + default@testdeci2,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","item"] + <-Map 3 [SIMPLE_EDGE] + SHUFFLE [RS_7] + PartitionCols:_col3 + Select Operator [SEL_5] (rows=1 width=312) + Output:["_col1","_col2","_col3"] + Filter Operator [FIL_14] (rows=1 width=312) + predicate:((id = 2) and item is not null) + TableScan [TS_3] (rows=1 width=312) + default@testdeci2,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","amount","sales_tax","item"] +
