[06/31] hive git commit: HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth Jayachandran reviewed by Sergey Shelukhin)

2017-09-15 Thread sershe
HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth 
Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1acaf153
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1acaf153
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1acaf153

Branch: refs/heads/hive-14535
Commit: 1acaf1534769b149ee34e79208c324fe519e9990
Parents: 307a7cd
Author: Prasanth Jayachandran 
Authored: Mon Sep 11 13:18:36 2017 -0700
Committer: Prasanth Jayachandran 
Committed: Mon Sep 11 13:18:36 2017 -0700

--
 .../org/apache/hadoop/hive/ql/ErrorMsg.java |   9 +-
 .../hive/ql/exec/AbstractFileMergeOperator.java |  34 ++-
 .../apache/hadoop/hive/ql/exec/Utilities.java   |   5 +
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java |   4 +-
 .../hive/ql/parse/DDLSemanticAnalyzer.java  |  25 +-
 .../queries/clientnegative/merge_negative_4.q   |   6 +
 .../queries/clientnegative/merge_negative_5.q   |  14 ++
 .../test/queries/clientpositive/orc_merge13.q   |  44 
 .../clientnegative/merge_negative_3.q.out   |   2 +-
 .../clientnegative/merge_negative_4.q.out   |  19 ++
 .../clientnegative/merge_negative_5.q.out   |  67 +
 .../results/clientpositive/orc_merge13.q.out| 248 +++
 12 files changed, 462 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index b3ef916..6da8304 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -562,7 +562,14 @@ public enum ErrorMsg {
   "are set.  Table schema information is required to read ACID 
tables"),
   ACID_TABLES_MUST_BE_READ_WITH_ACID_READER(30021, "An ORC ACID reader 
required to read ACID tables"),
   ACID_TABLES_MUST_BE_READ_WITH_HIVEINPUTFORMAT(30022, "Must use 
HiveInputFormat to read ACID tables " +
-  "(set 
hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)")
+  "(set 
hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)"),
+
+  CONCATENATE_UNSUPPORTED_FILE_FORMAT(30030, "Concatenate/Merge only supported 
for RCFile and ORCFile formats"),
+  CONCATENATE_UNSUPPORTED_TABLE_BUCKETED(30031, "Concatenate/Merge can not be 
performed on bucketed tables"),
+  CONCATENATE_UNSUPPORTED_PARTITION_ARCHIVED(30032, "Concatenate/Merge can not 
be performed on archived partitions"),
+  CONCATENATE_UNSUPPORTED_TABLE_NON_NATIVE(30033, "Concatenate/Merge can not 
be performed on non-native tables"),
+  CONCATENATE_UNSUPPORTED_TABLE_NOT_MANAGED(30034, "Concatenate/Merge can only 
be performed on managed tables"),
+  CONCATENATE_UNSUPPORTED_TABLE_TRANSACTIONAL(30035, "Concatenate/Merge can 
not be performed on transactional tables")
   ;
 
   private int errorCode;

http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
index dfad6c1..71fb11f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
@@ -223,16 +223,40 @@ public abstract class AbstractFileMergeOperator
   + fss.getLen());
 }
 
+Path destDir = finalPath.getParent();
+Path destPath = destDir;
 // move any incompatible files to final path
 if (incompatFileSet != null && !incompatFileSet.isEmpty()) {
   for (Path incompatFile : incompatFileSet) {
-Path destDir = finalPath.getParent();
+// check if path conforms to Hive's file name convention. Hive 
expects filenames to be in specific format
+// like 00_0, but "LOAD DATA" commands can let you add any 
files to any partitions/tables without
+// renaming. This can cause MoveTask to remove files in some cases 
where MoveTask assumes the files are
+// are generated by speculatively executed tasks.
+// Example: MoveTask thinks the following files are same
+// part-m-0_1417075294718
+// part-m-1_1417075294718
+// Assumes 1417075294718 as taskId and retains only large file 
supposedly generated by speculative execution.
+// This can result in data 

hive git commit: HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth Jayachandran reviewed by Sergey Shelukhin)

2017-09-11 Thread prasanthj
Repository: hive
Updated Branches:
  refs/heads/branch-2 988c491dd -> de9ae6a9e


HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth 
Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/de9ae6a9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/de9ae6a9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/de9ae6a9

Branch: refs/heads/branch-2
Commit: de9ae6a9e43fdde05bbc394898b8f056c38ac4ab
Parents: 988c491
Author: Prasanth Jayachandran 
Authored: Mon Sep 11 13:18:36 2017 -0700
Committer: Prasanth Jayachandran 
Committed: Mon Sep 11 13:20:05 2017 -0700

--
 .../org/apache/hadoop/hive/ql/ErrorMsg.java |   9 +-
 .../hive/ql/exec/AbstractFileMergeOperator.java |  34 ++-
 .../apache/hadoop/hive/ql/exec/Utilities.java   |   5 +
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java |   4 +-
 .../hive/ql/parse/DDLSemanticAnalyzer.java  |  25 +-
 .../queries/clientnegative/merge_negative_4.q   |   6 +
 .../queries/clientnegative/merge_negative_5.q   |  14 ++
 .../test/queries/clientpositive/orc_merge13.q   |  44 
 .../clientnegative/merge_negative_3.q.out   |   2 +-
 .../clientnegative/merge_negative_4.q.out   |  19 ++
 .../clientnegative/merge_negative_5.q.out   |  67 +
 .../results/clientpositive/orc_merge13.q.out| 248 +++
 12 files changed, 462 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/de9ae6a9/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 7087022..b12dd93 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -537,7 +537,14 @@ public enum ErrorMsg {
   "are set.  Table schema information is required to read ACID 
tables"),
   ACID_TABLES_MUST_BE_READ_WITH_ACID_READER(30021, "An ORC ACID reader 
required to read ACID tables"),
   ACID_TABLES_MUST_BE_READ_WITH_HIVEINPUTFORMAT(30022, "Must use 
HiveInputFormat to read ACID tables " +
-  "(set 
hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)")
+  "(set 
hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)"),
+
+  CONCATENATE_UNSUPPORTED_FILE_FORMAT(30030, "Concatenate/Merge only supported 
for RCFile and ORCFile formats"),
+  CONCATENATE_UNSUPPORTED_TABLE_BUCKETED(30031, "Concatenate/Merge can not be 
performed on bucketed tables"),
+  CONCATENATE_UNSUPPORTED_PARTITION_ARCHIVED(30032, "Concatenate/Merge can not 
be performed on archived partitions"),
+  CONCATENATE_UNSUPPORTED_TABLE_NON_NATIVE(30033, "Concatenate/Merge can not 
be performed on non-native tables"),
+  CONCATENATE_UNSUPPORTED_TABLE_NOT_MANAGED(30034, "Concatenate/Merge can only 
be performed on managed tables"),
+  CONCATENATE_UNSUPPORTED_TABLE_TRANSACTIONAL(30035, "Concatenate/Merge can 
not be performed on transactional tables")
   ;
 
   private int errorCode;

http://git-wip-us.apache.org/repos/asf/hive/blob/de9ae6a9/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
index dfad6c1..71fb11f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
@@ -223,16 +223,40 @@ public abstract class AbstractFileMergeOperator
   + fss.getLen());
 }
 
+Path destDir = finalPath.getParent();
+Path destPath = destDir;
 // move any incompatible files to final path
 if (incompatFileSet != null && !incompatFileSet.isEmpty()) {
   for (Path incompatFile : incompatFileSet) {
-Path destDir = finalPath.getParent();
+// check if path conforms to Hive's file name convention. Hive 
expects filenames to be in specific format
+// like 00_0, but "LOAD DATA" commands can let you add any 
files to any partitions/tables without
+// renaming. This can cause MoveTask to remove files in some cases 
where MoveTask assumes the files are
+// are generated by speculatively executed tasks.
+// Example: MoveTask thinks the following files are same
+// part-m-0_1417075294718
+// part-m-1_1417075294718
+// Assumes 1417075294718 as taskId and retains only large file 

hive git commit: HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth Jayachandran reviewed by Sergey Shelukhin)

2017-09-11 Thread prasanthj
Repository: hive
Updated Branches:
  refs/heads/master 307a7cda3 -> 1acaf1534


HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth 
Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1acaf153
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1acaf153
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1acaf153

Branch: refs/heads/master
Commit: 1acaf1534769b149ee34e79208c324fe519e9990
Parents: 307a7cd
Author: Prasanth Jayachandran 
Authored: Mon Sep 11 13:18:36 2017 -0700
Committer: Prasanth Jayachandran 
Committed: Mon Sep 11 13:18:36 2017 -0700

--
 .../org/apache/hadoop/hive/ql/ErrorMsg.java |   9 +-
 .../hive/ql/exec/AbstractFileMergeOperator.java |  34 ++-
 .../apache/hadoop/hive/ql/exec/Utilities.java   |   5 +
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java |   4 +-
 .../hive/ql/parse/DDLSemanticAnalyzer.java  |  25 +-
 .../queries/clientnegative/merge_negative_4.q   |   6 +
 .../queries/clientnegative/merge_negative_5.q   |  14 ++
 .../test/queries/clientpositive/orc_merge13.q   |  44 
 .../clientnegative/merge_negative_3.q.out   |   2 +-
 .../clientnegative/merge_negative_4.q.out   |  19 ++
 .../clientnegative/merge_negative_5.q.out   |  67 +
 .../results/clientpositive/orc_merge13.q.out| 248 +++
 12 files changed, 462 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index b3ef916..6da8304 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -562,7 +562,14 @@ public enum ErrorMsg {
   "are set.  Table schema information is required to read ACID 
tables"),
   ACID_TABLES_MUST_BE_READ_WITH_ACID_READER(30021, "An ORC ACID reader 
required to read ACID tables"),
   ACID_TABLES_MUST_BE_READ_WITH_HIVEINPUTFORMAT(30022, "Must use 
HiveInputFormat to read ACID tables " +
-  "(set 
hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)")
+  "(set 
hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)"),
+
+  CONCATENATE_UNSUPPORTED_FILE_FORMAT(30030, "Concatenate/Merge only supported 
for RCFile and ORCFile formats"),
+  CONCATENATE_UNSUPPORTED_TABLE_BUCKETED(30031, "Concatenate/Merge can not be 
performed on bucketed tables"),
+  CONCATENATE_UNSUPPORTED_PARTITION_ARCHIVED(30032, "Concatenate/Merge can not 
be performed on archived partitions"),
+  CONCATENATE_UNSUPPORTED_TABLE_NON_NATIVE(30033, "Concatenate/Merge can not 
be performed on non-native tables"),
+  CONCATENATE_UNSUPPORTED_TABLE_NOT_MANAGED(30034, "Concatenate/Merge can only 
be performed on managed tables"),
+  CONCATENATE_UNSUPPORTED_TABLE_TRANSACTIONAL(30035, "Concatenate/Merge can 
not be performed on transactional tables")
   ;
 
   private int errorCode;

http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
index dfad6c1..71fb11f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
@@ -223,16 +223,40 @@ public abstract class AbstractFileMergeOperator
   + fss.getLen());
 }
 
+Path destDir = finalPath.getParent();
+Path destPath = destDir;
 // move any incompatible files to final path
 if (incompatFileSet != null && !incompatFileSet.isEmpty()) {
   for (Path incompatFile : incompatFileSet) {
-Path destDir = finalPath.getParent();
+// check if path conforms to Hive's file name convention. Hive 
expects filenames to be in specific format
+// like 00_0, but "LOAD DATA" commands can let you add any 
files to any partitions/tables without
+// renaming. This can cause MoveTask to remove files in some cases 
where MoveTask assumes the files are
+// are generated by speculatively executed tasks.
+// Example: MoveTask thinks the following files are same
+// part-m-0_1417075294718
+// part-m-1_1417075294718
+// Assumes 1417075294718 as taskId and retains only large file 
supposedly