Repository: hive Updated Branches: refs/heads/master 307a7cda3 -> 1acaf1534
HIVE-17403: Fail concatenation for unmanaged and transactional tables (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1acaf153 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1acaf153 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1acaf153 Branch: refs/heads/master Commit: 1acaf1534769b149ee34e79208c324fe519e9990 Parents: 307a7cd Author: Prasanth Jayachandran <prasan...@apache.org> Authored: Mon Sep 11 13:18:36 2017 -0700 Committer: Prasanth Jayachandran <prasan...@apache.org> Committed: Mon Sep 11 13:18:36 2017 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/ql/ErrorMsg.java | 9 +- .../hive/ql/exec/AbstractFileMergeOperator.java | 34 ++- .../apache/hadoop/hive/ql/exec/Utilities.java | 5 + .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 4 +- .../hive/ql/parse/DDLSemanticAnalyzer.java | 25 +- .../queries/clientnegative/merge_negative_4.q | 6 + .../queries/clientnegative/merge_negative_5.q | 14 ++ .../test/queries/clientpositive/orc_merge13.q | 44 ++++ .../clientnegative/merge_negative_3.q.out | 2 +- .../clientnegative/merge_negative_4.q.out | 19 ++ .../clientnegative/merge_negative_5.q.out | 67 +++++ .../results/clientpositive/orc_merge13.q.out | 248 +++++++++++++++++++ 12 files changed, 462 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index b3ef916..6da8304 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -562,7 +562,14 @@ public enum ErrorMsg { "are set. Table schema information is required to read ACID tables"), ACID_TABLES_MUST_BE_READ_WITH_ACID_READER(30021, "An ORC ACID reader required to read ACID tables"), ACID_TABLES_MUST_BE_READ_WITH_HIVEINPUTFORMAT(30022, "Must use HiveInputFormat to read ACID tables " + - "(set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)") + "(set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat)"), + + CONCATENATE_UNSUPPORTED_FILE_FORMAT(30030, "Concatenate/Merge only supported for RCFile and ORCFile formats"), + CONCATENATE_UNSUPPORTED_TABLE_BUCKETED(30031, "Concatenate/Merge can not be performed on bucketed tables"), + CONCATENATE_UNSUPPORTED_PARTITION_ARCHIVED(30032, "Concatenate/Merge can not be performed on archived partitions"), + CONCATENATE_UNSUPPORTED_TABLE_NON_NATIVE(30033, "Concatenate/Merge can not be performed on non-native tables"), + CONCATENATE_UNSUPPORTED_TABLE_NOT_MANAGED(30034, "Concatenate/Merge can only be performed on managed tables"), + CONCATENATE_UNSUPPORTED_TABLE_TRANSACTIONAL(30035, "Concatenate/Merge can not be performed on transactional tables") ; private int errorCode; http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java index dfad6c1..71fb11f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java @@ -223,16 +223,40 @@ public abstract class AbstractFileMergeOperator<T extends FileMergeDesc> + fss.getLen()); } + Path destDir = finalPath.getParent(); + Path destPath = destDir; // move any incompatible files to final path if (incompatFileSet != null && !incompatFileSet.isEmpty()) { for (Path incompatFile : incompatFileSet) { - Path destDir = finalPath.getParent(); + // check if path conforms to Hive's file name convention. Hive expects filenames to be in specific format + // like 000000_0, but "LOAD DATA" commands can let you add any files to any partitions/tables without + // renaming. This can cause MoveTask to remove files in some cases where MoveTask assumes the files are + // are generated by speculatively executed tasks. + // Example: MoveTask thinks the following files are same + // part-m-00000_1417075294718 + // part-m-00001_1417075294718 + // Assumes 1417075294718 as taskId and retains only large file supposedly generated by speculative execution. + // This can result in data loss in case of CONCATENATE/merging. Filter out files that does not match Hive's + // filename convention. + if (!Utilities.isHiveManagedFile(incompatFile)) { + // rename un-managed files to conform to Hive's naming standard + // Example: + // /warehouse/table/part-m-00000_1417075294718 will get renamed to /warehouse/table/.hive-staging/000000_0 + // If staging directory already contains the file, taskId_copy_N naming will be used. + final String taskId = Utilities.getTaskId(jc); + Path destFilePath = new Path(destDir, new Path(taskId)); + for (int counter = 1; fs.exists(destFilePath); counter++) { + destFilePath = new Path(destDir, taskId + (Utilities.COPY_KEYWORD + counter)); + } + LOG.warn("Path doesn't conform to Hive's expectation. Renaming {} to {}", incompatFile, destFilePath); + destPath = destFilePath; + } + try { - Utilities.renameOrMoveFiles(fs, incompatFile, destDir); - LOG.info("Moved incompatible file " + incompatFile + " to " + - destDir); + Utilities.renameOrMoveFiles(fs, incompatFile, destPath); + LOG.info("Moved incompatible file " + incompatFile + " to " + destPath); } catch (HiveException e) { - LOG.error("Unable to move " + incompatFile + " to " + destDir); + LOG.error("Unable to move " + incompatFile + " to " + destPath); throw new IOException(e); } } http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index aca99f2..4322cc6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -3954,4 +3954,9 @@ public final class Utilities { } return aclConf.toAclString(); } + + public static boolean isHiveManagedFile(Path path) { + return AcidUtils.ORIGINAL_PATTERN.matcher(path.getName()).matches() || + AcidUtils.ORIGINAL_PATTERN_COPY.matcher(path.getName()).matches(); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index feacdd8..fa0ba63 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -132,12 +132,12 @@ public class AcidUtils { } private static final Logger LOG = LoggerFactory.getLogger(AcidUtils.class); - private static final Pattern ORIGINAL_PATTERN = + public static final Pattern ORIGINAL_PATTERN = Pattern.compile("[0-9]+_[0-9]+"); /** * @see org.apache.hadoop.hive.ql.exec.Utilities#COPY_KEYWORD */ - private static final Pattern ORIGINAL_PATTERN_COPY = + public static final Pattern ORIGINAL_PATTERN_COPY = Pattern.compile("[0-9]+_[0-9]+" + COPY_KEYWORD + "[0-9]+"); public static final PathFilter hiddenFileFilter = new PathFilter(){ http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 230ca47..251deca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -23,6 +23,7 @@ import com.google.common.collect.Lists; import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.Tree; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileSystem; @@ -1673,23 +1674,35 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { // throw a HiveException for other than rcfile and orcfile. if (!((inputFormatClass.equals(RCFileInputFormat.class) || (inputFormatClass.equals(OrcInputFormat.class))))) { - throw new SemanticException( - "Only RCFile and ORCFile Formats are supported right now."); + throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_FILE_FORMAT.getMsg()); } mergeDesc.setInputFormatClass(inputFormatClass); // throw a HiveException if the table/partition is bucketized if (bucketCols != null && bucketCols.size() > 0) { - throw new SemanticException( - "Merge can not perform on bucketized partition/table."); + throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_BUCKETED.getMsg()); } // throw a HiveException if the table/partition is archived if (isArchived) { - throw new SemanticException( - "Merge can not perform on archived partitions."); + throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_PARTITION_ARCHIVED.getMsg()); + } + + // non-native and non-managed tables are not supported as MoveTask requires filenames to be in specific format, + // violating which can cause data loss + if (tblObj.isNonNative()) { + throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_NON_NATIVE.getMsg()); + } + + if (tblObj.getTableType() != TableType.MANAGED_TABLE) { + throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_NOT_MANAGED.getMsg()); } + // transactional tables are compacted and no longer needs to be bucketed, so not safe for merge/concatenation + boolean isAcid = AcidUtils.isAcidTable(tblObj); + if (isAcid) { + throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_TRANSACTIONAL.getMsg()); + } inputDir.add(oldTblPartLoc); mergeDesc.setInputDir(inputDir); http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/test/queries/clientnegative/merge_negative_4.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/merge_negative_4.q b/ql/src/test/queries/clientnegative/merge_negative_4.q new file mode 100644 index 0000000..c2b9254 --- /dev/null +++ b/ql/src/test/queries/clientnegative/merge_negative_4.q @@ -0,0 +1,6 @@ + + + +create external table srcpart2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets stored as RCFILE; +insert overwrite table srcpart2 partition (ds='2011') select * from src; +alter table srcpart2 partition (ds = '2011') concatenate; http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/test/queries/clientnegative/merge_negative_5.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/merge_negative_5.q b/ql/src/test/queries/clientnegative/merge_negative_5.q new file mode 100644 index 0000000..8039676 --- /dev/null +++ b/ql/src/test/queries/clientnegative/merge_negative_5.q @@ -0,0 +1,14 @@ +set hive.mapred.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.vectorized.execution.enabled=false; +set hive.explain.user=false; +set hive.merge.cardinality.check=true; + +drop table if exists srcpart_acid; +CREATE TABLE srcpart_acid (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); +insert into srcpart_acid PARTITION (ds, hr) select * from srcpart; +insert into srcpart_acid PARTITION (ds, hr) select * from srcpart; + +alter table srcpart_acid partition(ds='2008-04-08',hr=='11') concatenate; http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/test/queries/clientpositive/orc_merge13.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_merge13.q b/ql/src/test/queries/clientpositive/orc_merge13.q new file mode 100644 index 0000000..8f013a0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_merge13.q @@ -0,0 +1,44 @@ +drop table aa; +create table aa (a string, b int) stored as orc; +insert into table aa values("b",2); +insert into table aa values("c",3); + +-- SORT_QUERY_RESULTS + +dfs -mv ${hiveconf:hive.metastore.warehouse.dir}/aa/000000_0 ${hiveconf:hive.metastore.warehouse.dir}/aa/part-00000; +dfs -mv ${hiveconf:hive.metastore.warehouse.dir}/aa/000000_0_copy_1 ${hiveconf:hive.metastore.warehouse.dir}/aa/part-00000_copy_1; + +select * from aa; + +alter table aa add columns(aa string, bb int); + +insert into table aa values("b",2,"b",2); +insert into table aa values("c",3,"c",3); + +dfs -mv ${hiveconf:hive.metastore.warehouse.dir}/aa/000000_0 ${hiveconf:hive.metastore.warehouse.dir}/aa/part-00001; +dfs -mv ${hiveconf:hive.metastore.warehouse.dir}/aa/000000_0_copy_1 ${hiveconf:hive.metastore.warehouse.dir}/aa/part-00001_copy_1; + +select * from aa; +select count(*) from aa; +select sum(hash(*)) from aa; + +-- try concatenate multiple times (order of files chosen for concatenation is not guaranteed) +alter table aa concatenate; +select * from aa; +select count(*) from aa; +select sum(hash(*)) from aa; + +alter table aa concatenate; +select * from aa; +select count(*) from aa; +select sum(hash(*)) from aa; + +alter table aa concatenate; +select * from aa; +select count(*) from aa; +select sum(hash(*)) from aa; + +alter table aa concatenate; +select * from aa; +select count(*) from aa; +select sum(hash(*)) from aa; http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/test/results/clientnegative/merge_negative_3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/merge_negative_3.q.out b/ql/src/test/results/clientnegative/merge_negative_3.q.out index 906336d..02c2ad1 100644 --- a/ql/src/test/results/clientnegative/merge_negative_3.q.out +++ b/ql/src/test/results/clientnegative/merge_negative_3.q.out @@ -16,4 +16,4 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@srcpart2@ds=2011 POSTHOOK: Lineage: srcpart2 PARTITION(ds=2011).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart2 PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: Merge can not perform on bucketized partition/table. +FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: Concatenate/Merge can not be performed on bucketed tables http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/test/results/clientnegative/merge_negative_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/merge_negative_4.q.out b/ql/src/test/results/clientnegative/merge_negative_4.q.out new file mode 100644 index 0000000..975422e --- /dev/null +++ b/ql/src/test/results/clientnegative/merge_negative_4.q.out @@ -0,0 +1,19 @@ +PREHOOK: query: create external table srcpart2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets stored as RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart2 +POSTHOOK: query: create external table srcpart2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets stored as RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart2 +PREHOOK: query: insert overwrite table srcpart2 partition (ds='2011') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcpart2@ds=2011 +POSTHOOK: query: insert overwrite table srcpart2 partition (ds='2011') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcpart2@ds=2011 +POSTHOOK: Lineage: srcpart2 PARTITION(ds=2011).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart2 PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: Concatenate/Merge can not be performed on bucketed tables http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/test/results/clientnegative/merge_negative_5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/merge_negative_5.q.out b/ql/src/test/results/clientnegative/merge_negative_5.q.out new file mode 100644 index 0000000..56a21a0 --- /dev/null +++ b/ql/src/test/results/clientnegative/merge_negative_5.q.out @@ -0,0 +1,67 @@ +PREHOOK: query: drop table if exists srcpart_acid +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists srcpart_acid +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcpart_acid (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_acid +POSTHOOK: query: CREATE TABLE srcpart_acid (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_acid +PREHOOK: query: insert into srcpart_acid PARTITION (ds, hr) select * from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid +POSTHOOK: query: insert into srcpart_acid PARTITION (ds, hr) select * from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into srcpart_acid PARTITION (ds, hr) select * from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid +POSTHOOK: query: insert into srcpart_acid PARTITION (ds, hr) select * from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: Concatenate/Merge can not be performed on transactional tables http://git-wip-us.apache.org/repos/asf/hive/blob/1acaf153/ql/src/test/results/clientpositive/orc_merge13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/orc_merge13.q.out b/ql/src/test/results/clientpositive/orc_merge13.q.out new file mode 100644 index 0000000..70ae731 --- /dev/null +++ b/ql/src/test/results/clientpositive/orc_merge13.q.out @@ -0,0 +1,248 @@ +PREHOOK: query: drop table aa +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table aa +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table aa (a string, b int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@aa +POSTHOOK: query: create table aa (a string, b int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@aa +PREHOOK: query: insert into table aa values("b",2) +PREHOOK: type: QUERY +PREHOOK: Output: default@aa +POSTHOOK: query: insert into table aa values("b",2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@aa +POSTHOOK: Lineage: aa.a SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: aa.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into table aa values("c",3) +PREHOOK: type: QUERY +PREHOOK: Output: default@aa +POSTHOOK: query: insert into table aa values("c",3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@aa +POSTHOOK: Lineage: aa.a SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: aa.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select * from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select * from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +b 2 +c 3 +PREHOOK: query: alter table aa add columns(aa string, bb int) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@aa +PREHOOK: Output: default@aa +POSTHOOK: query: alter table aa add columns(aa string, bb int) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@aa +POSTHOOK: Output: default@aa +PREHOOK: query: insert into table aa values("b",2,"b",2) +PREHOOK: type: QUERY +PREHOOK: Output: default@aa +POSTHOOK: query: insert into table aa values("b",2,"b",2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@aa +POSTHOOK: Lineage: aa.a SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: aa.aa SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: aa.b EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: aa.bb EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: insert into table aa values("c",3,"c",3) +PREHOOK: type: QUERY +PREHOOK: Output: default@aa +POSTHOOK: query: insert into table aa values("c",3,"c",3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@aa +POSTHOOK: Lineage: aa.a SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: aa.aa SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: aa.b EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: aa.bb EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: select * from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select * from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +b 2 NULL NULL +b 2 b 2 +c 3 NULL NULL +c 3 c 3 +PREHOOK: query: select count(*) from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +4 +PREHOOK: query: select sum(hash(*)) from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +11753376 +PREHOOK: query: alter table aa concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@aa +PREHOOK: Output: default@aa +POSTHOOK: query: alter table aa concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@aa +POSTHOOK: Output: default@aa +PREHOOK: query: select * from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select * from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +b 2 NULL NULL +b 2 b 2 +c 3 NULL NULL +c 3 c 3 +PREHOOK: query: select count(*) from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +4 +PREHOOK: query: select sum(hash(*)) from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +11753376 +PREHOOK: query: alter table aa concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@aa +PREHOOK: Output: default@aa +POSTHOOK: query: alter table aa concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@aa +POSTHOOK: Output: default@aa +PREHOOK: query: select * from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select * from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +b 2 NULL NULL +b 2 b 2 +c 3 NULL NULL +c 3 c 3 +PREHOOK: query: select count(*) from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +4 +PREHOOK: query: select sum(hash(*)) from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +11753376 +PREHOOK: query: alter table aa concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@aa +PREHOOK: Output: default@aa +POSTHOOK: query: alter table aa concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@aa +POSTHOOK: Output: default@aa +PREHOOK: query: select * from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select * from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +b 2 NULL NULL +b 2 b 2 +c 3 NULL NULL +c 3 c 3 +PREHOOK: query: select count(*) from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +4 +PREHOOK: query: select sum(hash(*)) from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +11753376 +PREHOOK: query: alter table aa concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@aa +PREHOOK: Output: default@aa +POSTHOOK: query: alter table aa concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@aa +POSTHOOK: Output: default@aa +PREHOOK: query: select * from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select * from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +b 2 NULL NULL +b 2 b 2 +c 3 NULL NULL +c 3 c 3 +PREHOOK: query: select count(*) from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +4 +PREHOOK: query: select sum(hash(*)) from aa +PREHOOK: type: QUERY +PREHOOK: Input: default@aa +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from aa +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa +#### A masked pattern was here #### +11753376