This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 29eff98  HIVE-22945: Hive ACID Data Corruption: Update command mess 
the other column data and produces incorrect result (Denys Kuzmenko, reviewed 
by Peter Vary)
29eff98 is described below

commit 29eff986b74eb87c7040ca65b0d2de91488c93f6
Author: Denys Kuzmenko <dkuzme...@apache.org>
AuthorDate: Wed Mar 11 12:44:23 2020 +0100

    HIVE-22945: Hive ACID Data Corruption: Update command mess the other column 
data and produces incorrect result (Denys Kuzmenko, reviewed by Peter Vary)
---
 .../ql/optimizer/ConstantPropagateProcFactory.java |  6 +-
 .../constant_prop_timestamp_date_cast.q            | 10 +++
 .../constant_prop_timestamp_date_cast.q.out        | 74 ++++++++++++++++++++++
 3 files changed, 87 insertions(+), 3 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index 1a26ca5..3346f41 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -192,9 +192,9 @@ public final class ConstantPropagateProcFactory {
     // We shouldn't cast strings to other types because that can break 
original data in cases of
     // leading zeros or zeros trailing after decimal point.
     // Example: "000126" => 126 => "126"
-    boolean brokenDataTypesCombination = unsafeConversionTypes.contains(
-        priti.getPrimitiveCategory()) && !unsafeConversionTypes.contains(
-            descti.getPrimitiveCategory());
+    boolean brokenDataTypesCombination = 
unsafeConversionTypes.contains(priti.getPrimitiveCategory())
+        && !unsafeConversionTypes.contains(descti.getPrimitiveCategory())
+        || priti.getPrimitiveCategory() == PrimitiveCategory.TIMESTAMP && 
descti.getPrimitiveCategory() == PrimitiveCategory.DATE;
     if (performSafeTypeCast && brokenDataTypesCombination) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Unsupported cast " + priti + "; " + descti);
diff --git 
a/ql/src/test/queries/clientpositive/constant_prop_timestamp_date_cast.q 
b/ql/src/test/queries/clientpositive/constant_prop_timestamp_date_cast.q
new file mode 100644
index 0000000..5a5c7a9
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constant_prop_timestamp_date_cast.q
@@ -0,0 +1,10 @@
+set hive.mapred.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+CREATE TABLE constant_prop(start_dt timestamp, stop_dt timestamp) STORED AS 
ORC TBLPROPERTIES ('transactional'='true');
+
+set hive.test.currenttimestamp=2020-03-05 14:16:57;
+set hive.cbo.enable=false;
+
+explain UPDATE constant_prop SET stop_dt = CURRENT_TIMESTAMP WHERE 
CAST(start_dt AS DATE) = CURRENT_DATE;
diff --git 
a/ql/src/test/results/clientpositive/constant_prop_timestamp_date_cast.q.out 
b/ql/src/test/results/clientpositive/constant_prop_timestamp_date_cast.q.out
new file mode 100644
index 0000000..cf04d30
--- /dev/null
+++ b/ql/src/test/results/clientpositive/constant_prop_timestamp_date_cast.q.out
@@ -0,0 +1,74 @@
+PREHOOK: query: CREATE TABLE constant_prop(start_dt timestamp, stop_dt 
timestamp) STORED AS ORC TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@constant_prop
+POSTHOOK: query: CREATE TABLE constant_prop(start_dt timestamp, stop_dt 
timestamp) STORED AS ORC TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@constant_prop
+PREHOOK: query: explain UPDATE constant_prop SET stop_dt = CURRENT_TIMESTAMP 
WHERE CAST(start_dt AS DATE) = CURRENT_DATE
+PREHOOK: type: QUERY
+PREHOOK: Input: default@constant_prop
+PREHOOK: Output: default@constant_prop
+POSTHOOK: query: explain UPDATE constant_prop SET stop_dt = CURRENT_TIMESTAMP 
WHERE CAST(start_dt AS DATE) = CURRENT_DATE
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@constant_prop
+POSTHOOK: Output: default@constant_prop
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: constant_prop
+            filterExpr: (CAST( start_dt AS DATE) = DATE'2020-03-05') (type: 
boolean)
+            Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column 
stats: NONE
+            Filter Operator
+              predicate: (CAST( start_dt AS DATE) = DATE'2020-03-05') (type: 
boolean)
+              Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: ROW__ID (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>), start_dt (type: timestamp)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+                  Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col1 (type: timestamp)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: 
timestamp), TIMESTAMP'2020-03-05 14:16:57' (type: timestamp)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                name: default.constant_prop
+            Write Type: UPDATE
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.constant_prop
+          Write Type: UPDATE
+
+  Stage: Stage-2
+    Stats Work
+      Basic Stats Work:
+

Reply via email to