This is an automated email from the ASF dual-hosted git repository. dkuzmenko pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 29eff98 HIVE-22945: Hive ACID Data Corruption: Update command mess the other column data and produces incorrect result (Denys Kuzmenko, reviewed by Peter Vary) 29eff98 is described below commit 29eff986b74eb87c7040ca65b0d2de91488c93f6 Author: Denys Kuzmenko <dkuzme...@apache.org> AuthorDate: Wed Mar 11 12:44:23 2020 +0100 HIVE-22945: Hive ACID Data Corruption: Update command mess the other column data and produces incorrect result (Denys Kuzmenko, reviewed by Peter Vary) --- .../ql/optimizer/ConstantPropagateProcFactory.java | 6 +- .../constant_prop_timestamp_date_cast.q | 10 +++ .../constant_prop_timestamp_date_cast.q.out | 74 ++++++++++++++++++++++ 3 files changed, 87 insertions(+), 3 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index 1a26ca5..3346f41 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -192,9 +192,9 @@ public final class ConstantPropagateProcFactory { // We shouldn't cast strings to other types because that can break original data in cases of // leading zeros or zeros trailing after decimal point. // Example: "000126" => 126 => "126" - boolean brokenDataTypesCombination = unsafeConversionTypes.contains( - priti.getPrimitiveCategory()) && !unsafeConversionTypes.contains( - descti.getPrimitiveCategory()); + boolean brokenDataTypesCombination = unsafeConversionTypes.contains(priti.getPrimitiveCategory()) + && !unsafeConversionTypes.contains(descti.getPrimitiveCategory()) + || priti.getPrimitiveCategory() == PrimitiveCategory.TIMESTAMP && descti.getPrimitiveCategory() == PrimitiveCategory.DATE; if (performSafeTypeCast && brokenDataTypesCombination) { if (LOG.isDebugEnabled()) { LOG.debug("Unsupported cast " + priti + "; " + descti); diff --git a/ql/src/test/queries/clientpositive/constant_prop_timestamp_date_cast.q b/ql/src/test/queries/clientpositive/constant_prop_timestamp_date_cast.q new file mode 100644 index 0000000..5a5c7a9 --- /dev/null +++ b/ql/src/test/queries/clientpositive/constant_prop_timestamp_date_cast.q @@ -0,0 +1,10 @@ +set hive.mapred.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE constant_prop(start_dt timestamp, stop_dt timestamp) STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +set hive.test.currenttimestamp=2020-03-05 14:16:57; +set hive.cbo.enable=false; + +explain UPDATE constant_prop SET stop_dt = CURRENT_TIMESTAMP WHERE CAST(start_dt AS DATE) = CURRENT_DATE; diff --git a/ql/src/test/results/clientpositive/constant_prop_timestamp_date_cast.q.out b/ql/src/test/results/clientpositive/constant_prop_timestamp_date_cast.q.out new file mode 100644 index 0000000..cf04d30 --- /dev/null +++ b/ql/src/test/results/clientpositive/constant_prop_timestamp_date_cast.q.out @@ -0,0 +1,74 @@ +PREHOOK: query: CREATE TABLE constant_prop(start_dt timestamp, stop_dt timestamp) STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@constant_prop +POSTHOOK: query: CREATE TABLE constant_prop(start_dt timestamp, stop_dt timestamp) STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@constant_prop +PREHOOK: query: explain UPDATE constant_prop SET stop_dt = CURRENT_TIMESTAMP WHERE CAST(start_dt AS DATE) = CURRENT_DATE +PREHOOK: type: QUERY +PREHOOK: Input: default@constant_prop +PREHOOK: Output: default@constant_prop +POSTHOOK: query: explain UPDATE constant_prop SET stop_dt = CURRENT_TIMESTAMP WHERE CAST(start_dt AS DATE) = CURRENT_DATE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@constant_prop +POSTHOOK: Output: default@constant_prop +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: constant_prop + filterExpr: (CAST( start_dt AS DATE) = DATE'2020-03-05') (type: boolean) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (CAST( start_dt AS DATE) = DATE'2020-03-05') (type: boolean) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), start_dt (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: timestamp), TIMESTAMP'2020-03-05 14:16:57' (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.constant_prop + Write Type: UPDATE + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.constant_prop + Write Type: UPDATE + + Stage: Stage-2 + Stats Work + Basic Stats Work: +