[06/31] hive git commit: HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth Jayachandran reviewed by Sergey Shelukhin)
HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1acaf153 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1acaf153 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1acaf153 Branch: refs/heads/hive-14535 Commit: 1acaf1534769b149ee34e79208c324fe519e9990 Parents: 307a7cd Author: Prasanth JayachandranAuthored: Mon Sep 11 13:18:36 2017 -0700 Committer: Prasanth Jayachandran Committed: Mon Sep 11 13:18:36 2017 -0700 -- .../org/apache/hadoop/hive/ql/ErrorMsg.java | 9 +- .../hive/ql/exec/AbstractFileMergeOperator.java | 34 ++- .../apache/hadoop/hive/ql/exec/Utilities.java | 5 + .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 4 +- .../hive/ql/parse/DDLSemanticAnalyzer.java | 25 +- .../queries/clientnegative/merge_negative_4.q | 6 + .../queries/clientnegative/merge_negative_5.q | 14 ++ .../test/queries/clientpositive/orc_merge13.q | 44 .../clientnegative/merge_negative_3.q.out | 2 +- .../clientnegative/merge_negative_4.q.out | 19 ++ .../clientnegative/merge_negative_5.q.out | 67 + .../results/clientpositive/orc_merge13.q.out| 248 +++ 12 files changed, 462 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index b3ef916..6da8304 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -562,7 +562,14 @@ public enum ErrorMsg { "are set. Table schema information is required to read ACID tables"), ACID_TABLES_MUST_BE_READ_WITH_ACID_READER(30021, "An ORC ACID reader required to read ACID tables"), ACID_TABLES_MUST_BE_READ_WITH_HIVEINPUTFORMAT(30022, "Must use HiveInputFormat to read ACID tables " + - "(set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)") + "(set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)"), + + CONCATENATE_UNSUPPORTED_FILE_FORMAT(30030, "Concatenate/Merge only supported for RCFile and ORCFile formats"), + CONCATENATE_UNSUPPORTED_TABLE_BUCKETED(30031, "Concatenate/Merge can not be performed on bucketed tables"), + CONCATENATE_UNSUPPORTED_PARTITION_ARCHIVED(30032, "Concatenate/Merge can not be performed on archived partitions"), + CONCATENATE_UNSUPPORTED_TABLE_NON_NATIVE(30033, "Concatenate/Merge can not be performed on non-native tables"), + CONCATENATE_UNSUPPORTED_TABLE_NOT_MANAGED(30034, "Concatenate/Merge can only be performed on managed tables"), + CONCATENATE_UNSUPPORTED_TABLE_TRANSACTIONAL(30035, "Concatenate/Merge can not be performed on transactional tables") ; private int errorCode; http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java index dfad6c1..71fb11f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java @@ -223,16 +223,40 @@ public abstract class AbstractFileMergeOperator + fss.getLen()); } +Path destDir = finalPath.getParent(); +Path destPath = destDir; // move any incompatible files to final path if (incompatFileSet != null && !incompatFileSet.isEmpty()) { for (Path incompatFile : incompatFileSet) { -Path destDir = finalPath.getParent(); +// check if path conforms to Hive's file name convention. Hive expects filenames to be in specific format +// like 00_0, but "LOAD DATA" commands can let you add any files to any partitions/tables without +// renaming. This can cause MoveTask to remove files in some cases where MoveTask assumes the files are +// are generated by speculatively executed tasks. +// Example: MoveTask thinks the following files are same +// part-m-0_1417075294718 +// part-m-1_1417075294718 +// Assumes 1417075294718 as taskId and retains only large file supposedly generated by speculative execution. +// This can result in data
hive git commit: HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/branch-2 988c491dd -> de9ae6a9e HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/de9ae6a9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/de9ae6a9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/de9ae6a9 Branch: refs/heads/branch-2 Commit: de9ae6a9e43fdde05bbc394898b8f056c38ac4ab Parents: 988c491 Author: Prasanth JayachandranAuthored: Mon Sep 11 13:18:36 2017 -0700 Committer: Prasanth Jayachandran Committed: Mon Sep 11 13:20:05 2017 -0700 -- .../org/apache/hadoop/hive/ql/ErrorMsg.java | 9 +- .../hive/ql/exec/AbstractFileMergeOperator.java | 34 ++- .../apache/hadoop/hive/ql/exec/Utilities.java | 5 + .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 4 +- .../hive/ql/parse/DDLSemanticAnalyzer.java | 25 +- .../queries/clientnegative/merge_negative_4.q | 6 + .../queries/clientnegative/merge_negative_5.q | 14 ++ .../test/queries/clientpositive/orc_merge13.q | 44 .../clientnegative/merge_negative_3.q.out | 2 +- .../clientnegative/merge_negative_4.q.out | 19 ++ .../clientnegative/merge_negative_5.q.out | 67 + .../results/clientpositive/orc_merge13.q.out| 248 +++ 12 files changed, 462 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/de9ae6a9/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 7087022..b12dd93 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -537,7 +537,14 @@ public enum ErrorMsg { "are set. Table schema information is required to read ACID tables"), ACID_TABLES_MUST_BE_READ_WITH_ACID_READER(30021, "An ORC ACID reader required to read ACID tables"), ACID_TABLES_MUST_BE_READ_WITH_HIVEINPUTFORMAT(30022, "Must use HiveInputFormat to read ACID tables " + - "(set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)") + "(set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)"), + + CONCATENATE_UNSUPPORTED_FILE_FORMAT(30030, "Concatenate/Merge only supported for RCFile and ORCFile formats"), + CONCATENATE_UNSUPPORTED_TABLE_BUCKETED(30031, "Concatenate/Merge can not be performed on bucketed tables"), + CONCATENATE_UNSUPPORTED_PARTITION_ARCHIVED(30032, "Concatenate/Merge can not be performed on archived partitions"), + CONCATENATE_UNSUPPORTED_TABLE_NON_NATIVE(30033, "Concatenate/Merge can not be performed on non-native tables"), + CONCATENATE_UNSUPPORTED_TABLE_NOT_MANAGED(30034, "Concatenate/Merge can only be performed on managed tables"), + CONCATENATE_UNSUPPORTED_TABLE_TRANSACTIONAL(30035, "Concatenate/Merge can not be performed on transactional tables") ; private int errorCode; http://git-wip-us.apache.org/repos/asf/hive/blob/de9ae6a9/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java index dfad6c1..71fb11f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java @@ -223,16 +223,40 @@ public abstract class AbstractFileMergeOperator + fss.getLen()); } +Path destDir = finalPath.getParent(); +Path destPath = destDir; // move any incompatible files to final path if (incompatFileSet != null && !incompatFileSet.isEmpty()) { for (Path incompatFile : incompatFileSet) { -Path destDir = finalPath.getParent(); +// check if path conforms to Hive's file name convention. Hive expects filenames to be in specific format +// like 00_0, but "LOAD DATA" commands can let you add any files to any partitions/tables without +// renaming. This can cause MoveTask to remove files in some cases where MoveTask assumes the files are +// are generated by speculatively executed tasks. +// Example: MoveTask thinks the following files are same +// part-m-0_1417075294718 +// part-m-1_1417075294718 +// Assumes 1417075294718 as taskId and retains only large file
hive git commit: HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master 307a7cda3 -> 1acaf1534 HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1acaf153 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1acaf153 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1acaf153 Branch: refs/heads/master Commit: 1acaf1534769b149ee34e79208c324fe519e9990 Parents: 307a7cd Author: Prasanth JayachandranAuthored: Mon Sep 11 13:18:36 2017 -0700 Committer: Prasanth Jayachandran Committed: Mon Sep 11 13:18:36 2017 -0700 -- .../org/apache/hadoop/hive/ql/ErrorMsg.java | 9 +- .../hive/ql/exec/AbstractFileMergeOperator.java | 34 ++- .../apache/hadoop/hive/ql/exec/Utilities.java | 5 + .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 4 +- .../hive/ql/parse/DDLSemanticAnalyzer.java | 25 +- .../queries/clientnegative/merge_negative_4.q | 6 + .../queries/clientnegative/merge_negative_5.q | 14 ++ .../test/queries/clientpositive/orc_merge13.q | 44 .../clientnegative/merge_negative_3.q.out | 2 +- .../clientnegative/merge_negative_4.q.out | 19 ++ .../clientnegative/merge_negative_5.q.out | 67 + .../results/clientpositive/orc_merge13.q.out| 248 +++ 12 files changed, 462 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index b3ef916..6da8304 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -562,7 +562,14 @@ public enum ErrorMsg { "are set. Table schema information is required to read ACID tables"), ACID_TABLES_MUST_BE_READ_WITH_ACID_READER(30021, "An ORC ACID reader required to read ACID tables"), ACID_TABLES_MUST_BE_READ_WITH_HIVEINPUTFORMAT(30022, "Must use HiveInputFormat to read ACID tables " + - "(set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)") + "(set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)"), + + CONCATENATE_UNSUPPORTED_FILE_FORMAT(30030, "Concatenate/Merge only supported for RCFile and ORCFile formats"), + CONCATENATE_UNSUPPORTED_TABLE_BUCKETED(30031, "Concatenate/Merge can not be performed on bucketed tables"), + CONCATENATE_UNSUPPORTED_PARTITION_ARCHIVED(30032, "Concatenate/Merge can not be performed on archived partitions"), + CONCATENATE_UNSUPPORTED_TABLE_NON_NATIVE(30033, "Concatenate/Merge can not be performed on non-native tables"), + CONCATENATE_UNSUPPORTED_TABLE_NOT_MANAGED(30034, "Concatenate/Merge can only be performed on managed tables"), + CONCATENATE_UNSUPPORTED_TABLE_TRANSACTIONAL(30035, "Concatenate/Merge can not be performed on transactional tables") ; private int errorCode; http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java index dfad6c1..71fb11f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java @@ -223,16 +223,40 @@ public abstract class AbstractFileMergeOperator + fss.getLen()); } +Path destDir = finalPath.getParent(); +Path destPath = destDir; // move any incompatible files to final path if (incompatFileSet != null && !incompatFileSet.isEmpty()) { for (Path incompatFile : incompatFileSet) { -Path destDir = finalPath.getParent(); +// check if path conforms to Hive's file name convention. Hive expects filenames to be in specific format +// like 00_0, but "LOAD DATA" commands can let you add any files to any partitions/tables without +// renaming. This can cause MoveTask to remove files in some cases where MoveTask assumes the files are +// are generated by speculatively executed tasks. +// Example: MoveTask thinks the following files are same +// part-m-0_1417075294718 +// part-m-1_1417075294718 +// Assumes 1417075294718 as taskId and retains only large file supposedly