This is an automated email from the ASF dual-hosted git repository.
dengzh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 5bce6de3463 HIVE-27536: Merge task must be invoked after optimisation
for external CTAS queries (Sourabh Badhya, reviewed by Attila Turoczy, Denys
Kuzmenko, Zhihua Deng)
5bce6de3463 is described below
commit 5bce6de3463b2381a18d554901b4ed634dbd7b76
Author: Sourabh Badhya <[email protected]>
AuthorDate: Mon Aug 28 10:28:12 2023 +0530
HIVE-27536: Merge task must be invoked after optimisation for external CTAS
queries (Sourabh Badhya, reviewed by Attila Turoczy, Denys Kuzmenko, Zhihua
Deng)
Closes #4520
---
.../org/apache/hadoop/hive/ql/exec/Utilities.java | 12 +-
.../hadoop/hive/ql/io/merge/MergeFileWork.java | 11 -
.../ql/plan/ConditionalResolverMergeFiles.java | 197 +++++--
.../org/apache/hadoop/hive/ql/plan/MapWork.java | 20 +
.../hadoop/hive/ql/exec/TestFileSinkOperator.java | 3 +-
ql/src/test/queries/clientpositive/merge_ctas.q | 101 ++++
.../results/clientpositive/llap/merge_ctas.q.out | 650 +++++++++++++++++++++
7 files changed, 927 insertions(+), 67 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 42d91d536b6..470d052d898 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -1039,7 +1039,8 @@ public final class Utilities {
return src;
}
- private static final String tmpPrefix = "_tmp.";
+ private static final String hadoopTmpPrefix = "_tmp.";
+ private static final String tmpPrefix = "-tmp.";
private static final String taskTmpPrefix = "_task_tmp.";
public static Path toTaskTempPath(Path orig) {
@@ -1070,7 +1071,7 @@ public final class Utilities {
String name = file.getPath().getName();
// in addition to detecting hive temporary files, we also check hadoop
// temporary folders that used to show up in older releases
- return (name.startsWith("_task") || name.startsWith(tmpPrefix));
+ return (name.startsWith("_task") || name.startsWith(tmpPrefix) ||
name.startsWith(hadoopTmpPrefix));
}
/**
@@ -1393,7 +1394,7 @@ public final class Utilities {
}
- private static boolean shouldAvoidRename(FileSinkDesc conf, Configuration
hConf) {
+ public static boolean shouldAvoidRename(FileSinkDesc conf, Configuration
hConf) {
// we are avoiding rename/move only if following conditions are met
// * execution engine is tez
// * if it is select query
@@ -3524,6 +3525,9 @@ public final class Utilities {
Set<Path> pathsProcessed = new HashSet<Path>();
List<Path> pathsToAdd = new LinkedList<Path>();
DriverState driverState = DriverState.getDriverState();
+ if (work.isUseInputPathsDirectly() && work.getInputPaths() != null) {
+ return work.getInputPaths();
+ }
// AliasToWork contains all the aliases
Collection<String> aliasToWork = work.getAliasToWork().keySet();
if (!skipDummy) {
@@ -4555,7 +4559,7 @@ public final class Utilities {
if (isDelete) {
deltaDir = AcidUtils.deleteDeltaSubdir(writeId, writeId, stmtId);
}
- Path manifestPath = new Path(manifestRoot, "_tmp." + deltaDir);
+ Path manifestPath = new Path(manifestRoot, Utilities.toTempPath(deltaDir));
if (isInsertOverwrite) {
// When doing a multi-statement insert overwrite query with dynamic
partitioning, the
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java
index 594289eda4f..53a4f3a8431 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java
@@ -46,7 +46,6 @@ import java.util.List;
public class MergeFileWork extends MapWork {
private static final Logger LOG =
LoggerFactory.getLogger(MergeFileWork.class);
- private List<Path> inputPaths;
private Path outputDir;
private boolean hasDynamicPartitions;
private boolean isListBucketingAlterTableConcatenate;
@@ -84,14 +83,6 @@ public class MergeFileWork extends MapWork {
this.isListBucketingAlterTableConcatenate = false;
}
- public List<Path> getInputPaths() {
- return inputPaths;
- }
-
- public void setInputPaths(List<Path> inputPaths) {
- this.inputPaths = inputPaths;
- }
-
public Path getOutputDir() {
return outputDir;
}
@@ -137,8 +128,6 @@ public class MergeFileWork extends MapWork {
aliases, partDesc);
// set internal input format for all partition descriptors
partDesc.setInputFileFormatClass(internalInputFormat);
- // Add the DP path to the list of input paths
- inputPaths.add(path);
}
/**
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
index da20eceba30..0e6816ae405 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
@@ -20,18 +20,27 @@ package org.apache.hadoop.hive.ql.plan;
import java.io.IOException;
import java.io.Serializable;
+import java.nio.charset.Charset;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
+import java.util.LongSummaryStatistics;
import java.util.Map;
+import java.util.stream.Collectors;
+import com.google.common.collect.Lists;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.utils.FileUtils;
+import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.slf4j.Logger;
@@ -151,6 +160,11 @@ public class ConditionalResolverMergeFiles implements
ConditionalResolver,
}
int lbLevel = (ctx.getLbCtx() == null) ? 0 :
ctx.getLbCtx().calculateListBucketingLevel();
+ boolean manifestFilePresent = false;
+ FileSystem manifestFs = dirPath.getFileSystem(conf);
+ if (manifestFs.exists(new Path(dirPath,
Utilities.BLOB_MANIFEST_FILE))) {
+ manifestFilePresent = true;
+ }
/**
* In order to make code easier to read, we write the following in the
way:
@@ -168,15 +182,25 @@ public class ConditionalResolverMergeFiles implements
ConditionalResolver,
int dpLbLevel = numDPCols + lbLevel;
generateActualTasks(conf, resTsks, trgtSize, avgConditionSize,
mvTask, mrTask,
- mrAndMvTask, dirPath, inpFs, ctx, work, dpLbLevel);
+ mrAndMvTask, dirPath, inpFs, ctx, work, dpLbLevel,
manifestFilePresent);
} else { // no dynamic partitions
if(lbLevel == 0) {
// static partition without list bucketing
- long totalSz = getMergeSize(inpFs, dirPath, avgConditionSize);
- Utilities.FILE_OP_LOGGER.debug("merge resolve simple case -
totalSz " + totalSz + " from " + dirPath);
+ List<FileStatus> manifestFilePaths = new ArrayList<>();
+ long totalSize;
+ if (manifestFilePresent) {
+ manifestFilePaths = getManifestFilePaths(conf, dirPath);
+ totalSize = getMergeSize(manifestFilePaths, avgConditionSize);
+ } else {
+ totalSize = getMergeSize(inpFs, dirPath, avgConditionSize);
+ Utilities.FILE_OP_LOGGER.debug("merge resolve simple case -
totalSize " + totalSize + " from " + dirPath);
+ }
- if (totalSz >= 0) { // add the merge job
- setupMapRedWork(conf, work, trgtSize, totalSz);
+ if (totalSize >= 0) { // add the merge job
+ if (manifestFilePresent) {
+ setupWorkWhenUsingManifestFile(work, manifestFilePaths,
dirPath, true);
+ }
+ setupMapRedWork(conf, work, trgtSize, totalSize);
resTsks.add(mrTask);
} else { // don't need to merge, add the move job
resTsks.add(mvTask);
@@ -184,7 +208,7 @@ public class ConditionalResolverMergeFiles implements
ConditionalResolver,
} else {
// static partition and list bucketing
generateActualTasks(conf, resTsks, trgtSize, avgConditionSize,
mvTask, mrTask,
- mrAndMvTask, dirPath, inpFs, ctx, work, lbLevel);
+ mrAndMvTask, dirPath, inpFs, ctx, work, lbLevel,
manifestFilePresent);
}
}
} else {
@@ -229,11 +253,22 @@ public class ConditionalResolverMergeFiles implements
ConditionalResolver,
private void generateActualTasks(HiveConf conf, List<Task<?>> resTsks,
long trgtSize, long avgConditionSize, Task<?> mvTask,
Task<?> mrTask, Task<?> mrAndMvTask, Path dirPath,
- FileSystem inpFs, ConditionalResolverMergeFilesCtx ctx, MapWork work,
int dpLbLevel)
+ FileSystem inpFs, ConditionalResolverMergeFilesCtx ctx, MapWork work,
int dpLbLevel,
+ boolean manifestFilePresent)
throws IOException {
DynamicPartitionCtx dpCtx = ctx.getDPCtx();
- // get list of dynamic partitions
- List<FileStatus> statusList = HiveStatsUtils.getFileStatusRecurse(dirPath,
dpLbLevel, inpFs);
+ List<FileStatus> statusList;
+ Map<FileStatus, List<FileStatus>> manifestDirToFile = new HashMap<>();
+ if (manifestFilePresent) {
+ // Get the list of files from manifest file.
+ List<FileStatus> fileStatuses = getManifestFilePaths(conf, dirPath);
+ // Setup the work to include all the files present in the manifest.
+ setupWorkWhenUsingManifestFile(work, fileStatuses, dirPath, false);
+ manifestDirToFile = getManifestDirs(inpFs, fileStatuses);
+ statusList = new ArrayList<>(manifestDirToFile.keySet());
+ } else {
+ statusList = HiveStatsUtils.getFileStatusRecurse(dirPath, dpLbLevel,
inpFs);
+ }
FileStatus[] status = statusList.toArray(new
FileStatus[statusList.size()]);
// cleanup pathToPartitionInfo
@@ -253,15 +288,21 @@ public class ConditionalResolverMergeFiles implements
ConditionalResolver,
work.removePathToAlias(path); // the root path is not useful anymore
// populate pathToPartitionInfo and pathToAliases w/ DP paths
- long totalSz = 0;
+ long totalSize = 0;
boolean doMerge = false;
// list of paths that don't need to merge but need to move to the dest
location
- List<Path> toMove = new ArrayList<Path>();
+ List<Path> toMove = new ArrayList<>();
+ List<Path> toMerge = new ArrayList<>();
for (int i = 0; i < status.length; ++i) {
- long len = getMergeSize(inpFs, status[i].getPath(), avgConditionSize);
+ long len;
+ if (manifestFilePresent) {
+ len = getMergeSize(manifestDirToFile.get(status[i]), avgConditionSize);
+ } else {
+ len = getMergeSize(inpFs, status[i].getPath(), avgConditionSize);
+ }
if (len >= 0) {
doMerge = true;
- totalSz += len;
+ totalSize += len;
PartitionDesc pDesc = (dpCtx != null) ? generateDPFullPartSpec(dpCtx,
status, tblDesc, i)
: partDesc;
if (pDesc == null) {
@@ -271,6 +312,13 @@ public class ConditionalResolverMergeFiles implements
ConditionalResolver,
Utilities.FILE_OP_LOGGER.debug("merge resolver will merge " +
status[i].getPath());
work.resolveDynamicPartitionStoredAsSubDirsMerge(conf,
status[i].getPath(), tblDesc,
aliases, pDesc);
+ // Do not add input file since its already added when the manifest
file is present.
+ if (manifestFilePresent) {
+ toMerge.addAll(manifestDirToFile.get(status[i])
+ .stream().map(FileStatus::getPath).collect(Collectors.toList()));
+ } else {
+ toMerge.add(status[i].getPath());
+ }
} else {
Utilities.FILE_OP_LOGGER.debug("merge resolver will move " +
status[i].getPath());
@@ -278,8 +326,13 @@ public class ConditionalResolverMergeFiles implements
ConditionalResolver,
}
}
if (doMerge) {
+ // Set paths appropriately.
+ if (work.getInputPaths() != null && !work.getInputPaths().isEmpty()) {
+ toMerge.addAll(work.getInputPaths());
+ }
+ work.setInputPaths(toMerge);
// add the merge MR job
- setupMapRedWork(conf, work, trgtSize, totalSz);
+ setupMapRedWork(conf, work, trgtSize, totalSize);
// add the move task for those partitions that do not need merging
if (toMove.size() > 0) {
@@ -359,11 +412,11 @@ public class ConditionalResolverMergeFiles implements
ConditionalResolver,
mWork.setIsMergeFromResolver(true);
}
- private static class AverageSize {
+ private static class FileSummary {
private final long totalSize;
- private final int numFiles;
+ private final long numFiles;
- public AverageSize(long totalSize, int numFiles) {
+ public FileSummary(long totalSize, long numFiles) {
this.totalSize = totalSize;
this.numFiles = numFiles;
}
@@ -372,64 +425,106 @@ public class ConditionalResolverMergeFiles implements
ConditionalResolver,
return totalSize;
}
- public int getNumFiles() {
+ public long getNumFiles() {
return numFiles;
}
}
- private AverageSize getAverageSize(FileSystem inpFs, Path dirPath) {
- AverageSize error = new AverageSize(-1, -1);
- try {
- FileStatus[] fStats = inpFs.listStatus(dirPath);
-
- long totalSz = 0;
- int numFiles = 0;
- for (FileStatus fStat : fStats) {
- Utilities.FILE_OP_LOGGER.debug("Resolver looking at " +
fStat.getPath());
- if (fStat.isDir()) {
- AverageSize avgSzDir = getAverageSize(inpFs, fStat.getPath());
- if (avgSzDir.getTotalSize() < 0) {
- return error;
- }
- totalSz += avgSzDir.getTotalSize();
- numFiles += avgSzDir.getNumFiles();
- }
- else {
- totalSz += fStat.getLen();
- numFiles++;
- }
- }
+ private FileSummary getFileSummary(List<FileStatus> fileStatusList) {
+ LongSummaryStatistics stats =
fileStatusList.stream().filter(FileStatus::isFile)
+ .mapToLong(FileStatus::getLen).summaryStatistics();
+ return new FileSummary(stats.getSum(), stats.getCount());
+ }
- return new AverageSize(totalSz, numFiles);
- } catch (IOException e) {
- return error;
+ private List<FileStatus> getManifestFilePaths(HiveConf conf, Path dirPath)
throws IOException {
+ FileSystem manifestFs = dirPath.getFileSystem(conf);
+ List<String> filesKept;
+ List<FileStatus> pathsKept = new ArrayList<>();
+ try (FSDataInputStream inStream = manifestFs.open(new Path(dirPath,
Utilities.BLOB_MANIFEST_FILE))) {
+ String paths = IOUtils.toString(inStream, Charset.defaultCharset());
+ filesKept = Lists.newArrayList(paths.split(System.lineSeparator()));
}
+ // The first string contains the directory information. Not useful.
+ filesKept.remove(0);
+
+ for (String file : filesKept) {
+ pathsKept.add(manifestFs.getFileStatus(new Path(file)));
+ }
+ return pathsKept;
+ }
+
+ private long getMergeSize(FileSystem inpFs, Path dirPath, long avgSize) {
+ List<FileStatus> result = FileUtils.getFileStatusRecurse(dirPath, inpFs);
+ return getMergeSize(result, avgSize);
}
/**
* Whether to merge files inside directory given the threshold of the
average file size.
*
- * @param inpFs input file system.
- * @param dirPath input file directory.
+ * @param fileStatuses a list of FileStatus instances.
* @param avgSize threshold of average file size.
* @return -1 if not need to merge (either because of there is only 1 file
or the
* average size is larger than avgSize). Otherwise the size of the total
size of files.
* If return value is 0 that means there are multiple files each of which is
an empty file.
* This could be true when the table is bucketized and all buckets are empty.
*/
- private long getMergeSize(FileSystem inpFs, Path dirPath, long avgSize) {
- AverageSize averageSize = getAverageSize(inpFs, dirPath);
- if (averageSize.getTotalSize() < 0) {
+ private long getMergeSize(List<FileStatus> fileStatuses, long avgSize) {
+ FileSummary fileSummary = getFileSummary(fileStatuses);
+ if (fileSummary.getTotalSize() <= 0) {
return -1;
}
- if (averageSize.getNumFiles() <= 1) {
+ if (fileSummary.getNumFiles() <= 1) {
return -1;
}
- if (averageSize.getTotalSize()/averageSize.getNumFiles() < avgSize) {
- return averageSize.getTotalSize();
+ if (fileSummary.getTotalSize() / fileSummary.getNumFiles() < avgSize) {
+ return fileSummary.getTotalSize();
}
return -1;
}
+
+ private void setupWorkWhenUsingManifestFile(MapWork mapWork,
List<FileStatus> fileStatuses, Path dirPath,
+ boolean isTblLevel) {
+ Map<String, Operator<? extends OperatorDesc>> aliasToWork =
mapWork.getAliasToWork();
+ Map<Path, PartitionDesc> pathToPartitionInfo =
mapWork.getPathToPartitionInfo();
+ Operator<? extends OperatorDesc> op = aliasToWork.get(dirPath.toString());
+ PartitionDesc partitionDesc = pathToPartitionInfo.get(dirPath);
+ Path tmpDirPath = Utilities.toTempPath(dirPath);
+ if (op != null) {
+ aliasToWork.remove(dirPath.toString());
+ aliasToWork.put(tmpDirPath.toString(), op);
+ mapWork.setAliasToWork(aliasToWork);
+ }
+ if (partitionDesc != null) {
+ pathToPartitionInfo.remove(dirPath);
+ pathToPartitionInfo.put(tmpDirPath, partitionDesc);
+ mapWork.setPathToPartitionInfo(pathToPartitionInfo);
+ }
+ mapWork.removePathToAlias(dirPath);
+ mapWork.addPathToAlias(tmpDirPath, tmpDirPath.toString());
+ if (isTblLevel) {
+ List<Path> inputPaths = fileStatuses.stream()
+ .filter(FileStatus::isFile)
+ .map(FileStatus::getPath).collect(Collectors.toList());
+ mapWork.setInputPaths(inputPaths);
+ }
+ mapWork.setUseInputPathsDirectly(true);
+ }
+
+ private Map<FileStatus, List<FileStatus>> getManifestDirs(FileSystem inpFs,
List<FileStatus> fileStatuses)
+ throws IOException {
+ Map<FileStatus, List<FileStatus>> manifestDirsToPaths = new HashMap<>();
+ for (FileStatus fileStatus : fileStatuses) {
+ if (!fileStatus.isDirectory()) {
+ FileStatus parentDir =
inpFs.getFileStatus(fileStatus.getPath().getParent());
+ List<FileStatus> fileStatusList = Lists.newArrayList(fileStatus);
+ manifestDirsToPaths.merge(parentDir, fileStatusList, (oldValue,
newValue) -> {
+ oldValue.addAll(newValue);
+ return oldValue;
+ });
+ }
+ }
+ return manifestDirsToPaths;
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
index 17e105310c9..076ef0a99b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
@@ -180,6 +180,10 @@ public class MapWork extends BaseWork {
private ProbeDecodeContext probeDecodeContext = null;
+ protected List<Path> inputPaths;
+
+ private boolean useInputPathsDirectly;
+
public MapWork() {}
public MapWork(String name) {
@@ -934,4 +938,20 @@ public class MapWork extends BaseWork {
}
return new MapExplainVectorization(this);
}
+
+ public List<Path> getInputPaths() {
+ return inputPaths;
+ }
+
+ public void setInputPaths(List<Path> inputPaths) {
+ this.inputPaths = inputPaths;
+ }
+
+ public void setUseInputPathsDirectly(boolean useInputPathsDirectly) {
+ this.useInputPathsDirectly = useInputPathsDirectly;
+ }
+
+ public boolean isUseInputPathsDirectly() {
+ return useInputPathsDirectly;
+ }
}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
index 9b2fd95442e..eae7f69fbca 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
@@ -91,6 +91,7 @@ import java.util.stream.Collectors;
*/
public class TestFileSinkOperator {
private static String PARTCOL_NAME = "partval";
+ private static final String tmpPrefix = "-tmp.";
static final private Logger LOG =
LoggerFactory.getLogger(TestFileSinkOperator.class.getName());
private static File tmpdir;
@@ -462,7 +463,7 @@ public class TestFileSinkOperator {
private Path[] findFilesInBasePath() throws IOException {
Path parent = basePath.getParent();
String last = basePath.getName();
- Path tmpPath = new Path(parent, "_tmp." + last);
+ Path tmpPath = new Path(parent, tmpPrefix + last);
FileSystem fs = basePath.getFileSystem(jc);
List<Path> paths = new ArrayList<Path>();
recurseOnPath(tmpPath, fs, paths);
diff --git a/ql/src/test/queries/clientpositive/merge_ctas.q
b/ql/src/test/queries/clientpositive/merge_ctas.q
new file mode 100644
index 00000000000..3ea07459908
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/merge_ctas.q
@@ -0,0 +1,101 @@
+--! qt:dataset:src
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.merge.mapredfiles=true;
+set hive.merge.mapfiles=true;
+set hive.merge.tezfiles=true;
+set hive.blobstore.supported.schemes=hdfs,file;
+
+-- SORT_QUERY_RESULTS
+
+create table part_source(key string, value string) partitioned by (ds string);
+create table source(key string);
+create table one_part_source(key string, value string) partitioned by (ds
string);
+
+-- The partitioned table must have 2 files per partition (necessary for merge
task)
+insert overwrite table part_source partition(ds='102') select * from src;
+insert into table part_source partition(ds='102') select * from src;
+insert overwrite table part_source partition(ds='103') select * from src;
+insert into table part_source partition(ds='103') select * from src;
+
+-- The unpartitioned table must have 2 files.
+insert overwrite table source select key from src;
+insert into table source select key from src;
+
+-- The partitioned table must have 1 file on one partition and 2 files on
another partition (check merge and move task execution)
+insert overwrite table one_part_source partition(ds='102') select * from src;
+insert overwrite table one_part_source partition(ds='103') select * from src;
+insert into table one_part_source partition(ds='103') select * from src;
+
+select count(*) from source;
+select count(*) from part_source;
+select count(*) from one_part_source;
+
+-- Create CTAS tables both for unpartitioned and partitioned cases for ORC
formats.
+create external table ctas_table stored as orc as select * from source;
+create external table ctas_part_table partitioned by (ds) stored as orc as
select * from part_source;
+create external table ctas_one_part_table partitioned by (ds) stored as orc as
select * from one_part_source;
+
+select count(*) from ctas_table;
+select count(*) from ctas_part_table;
+select count(*) from ctas_one_part_table;
+
+-- This must be 1 indicating there is 1 file after merge.
+select count(distinct(INPUT__FILE__NAME)) from ctas_table;
+-- This must be 2 indicating there is 1 file per partition after merge.
+select count(distinct(INPUT__FILE__NAME)) from ctas_part_table;
+-- This must be 2 indicating there is 1 file per partition after merge.
+select count(distinct(INPUT__FILE__NAME)) from ctas_one_part_table;
+
+-- Create CTAS tables both for unpartitioned and partitioned cases for non-ORC
formats.
+create external table ctas_table_non_orc as select * from source;
+create external table ctas_part_table_non_orc partitioned by (ds) as select *
from part_source;
+create external table ctas_one_part_table_non_orc partitioned by (ds) stored
as orc as select * from one_part_source;
+
+select count(*) from ctas_table_non_orc;
+select count(*) from ctas_part_table_non_orc;
+select count(*) from ctas_one_part_table_non_orc;
+
+-- This must be 1 indicating there is 1 file after merge.
+select count(distinct(INPUT__FILE__NAME)) from ctas_table_non_orc;
+-- This must be 2 indicating there is 1 file per partition after merge.
+select count(distinct(INPUT__FILE__NAME)) from ctas_part_table_non_orc;
+-- This must be 2 indicating there is 1 file per partition after merge.
+select count(distinct(INPUT__FILE__NAME)) from ctas_one_part_table_non_orc;
+
+-- Create CTAS tables with union both for unpartitioned and partitioned cases
for ORC formats.
+create external table ctas_table_orc_union stored as orc as ((select * from
part_source where ds = '102') union (select * from part_source where ds =
'103'));
+create external table ctas_table_part_orc_union partitioned by (ds) stored as
orc as ((select * from part_source where ds = '102') union (select * from
part_source where ds = '103'));
+
+select count(*) from ((select * from part_source where ds = '102') union
(select * from part_source where ds = '103')) count_table;
+
+select count(*) from ctas_table_orc_union;
+select count(*) from ctas_table_part_orc_union;
+
+-- This must be 1 indicating there is 1 file after merge.
+select count(distinct(INPUT__FILE__NAME)) from ctas_table_orc_union;
+-- This must be 2 indicating there is 1 file per partition after merge.
+select count(distinct(INPUT__FILE__NAME)) from ctas_table_part_orc_union;
+
+-- Create CTAS tables with union both for unpartitioned and partitioned cases
for non-ORC formats.
+create external table ctas_table_non_orc_union as ((select * from part_source
where ds = '102') union (select * from part_source where ds = '103'));
+create external table ctas_table_part_non_orc_union partitioned by (ds) as
((select * from part_source where ds = '102') union (select * from part_source
where ds = '103'));
+
+select count(*) from ctas_table_non_orc_union;
+select count(*) from ctas_table_part_non_orc_union;
+
+-- This must be 1 indicating there is 1 file after merge.
+select count(distinct(INPUT__FILE__NAME)) from ctas_table_non_orc_union;
+-- This must be 2 indicating there is 1 file per partition after merge.
+select count(distinct(INPUT__FILE__NAME)) from ctas_table_part_non_orc_union;
+
+-- Create CTAS tables with union-all clause for unpartitioned table for both
ORC and non-ORC format (Note: This doesn't create the manifest file as of this
commit).
+create external table ctas_table_orc_union_all stored as orc as ((select *
from part_source where ds = '102') union all (select * from part_source where
ds = '103'));
+create external table ctas_table_non_orc_union_all as ((select * from
part_source where ds = '102') union all (select * from part_source where ds =
'103'));
+
+select count(*) from ctas_table_orc_union_all;
+select count(*) from ctas_table_non_orc_union_all;
+
+-- This must be 1 indicating there is 1 file after merge in both cases.
+select count(distinct(INPUT__FILE__NAME)) from ctas_table_orc_union_all;
+select count(distinct(INPUT__FILE__NAME)) from ctas_table_non_orc_union_all;
diff --git a/ql/src/test/results/clientpositive/llap/merge_ctas.q.out
b/ql/src/test/results/clientpositive/llap/merge_ctas.q.out
new file mode 100644
index 00000000000..7bc88bccec7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/merge_ctas.q.out
@@ -0,0 +1,650 @@
+PREHOOK: query: create table part_source(key string, value string) partitioned
by (ds string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_source
+POSTHOOK: query: create table part_source(key string, value string)
partitioned by (ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_source
+PREHOOK: query: create table source(key string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@source
+POSTHOOK: query: create table source(key string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@source
+PREHOOK: query: create table one_part_source(key string, value string)
partitioned by (ds string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@one_part_source
+POSTHOOK: query: create table one_part_source(key string, value string)
partitioned by (ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@one_part_source
+PREHOOK: query: insert overwrite table part_source partition(ds='102') select
* from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@part_source@ds=102
+POSTHOOK: query: insert overwrite table part_source partition(ds='102') select
* from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@part_source@ds=102
+POSTHOOK: Lineage: part_source PARTITION(ds=102).key SIMPLE
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: part_source PARTITION(ds=102).value SIMPLE
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert into table part_source partition(ds='102') select *
from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@part_source@ds=102
+POSTHOOK: query: insert into table part_source partition(ds='102') select *
from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@part_source@ds=102
+POSTHOOK: Lineage: part_source PARTITION(ds=102).key SIMPLE
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: part_source PARTITION(ds=102).value SIMPLE
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table part_source partition(ds='103') select
* from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@part_source@ds=103
+POSTHOOK: query: insert overwrite table part_source partition(ds='103') select
* from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@part_source@ds=103
+POSTHOOK: Lineage: part_source PARTITION(ds=103).key SIMPLE
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: part_source PARTITION(ds=103).value SIMPLE
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert into table part_source partition(ds='103') select *
from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@part_source@ds=103
+POSTHOOK: query: insert into table part_source partition(ds='103') select *
from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@part_source@ds=103
+POSTHOOK: Lineage: part_source PARTITION(ds=103).key SIMPLE
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: part_source PARTITION(ds=103).value SIMPLE
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table source select key from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@source
+POSTHOOK: query: insert overwrite table source select key from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@source
+POSTHOOK: Lineage: source.key SIMPLE [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+PREHOOK: query: insert into table source select key from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@source
+POSTHOOK: query: insert into table source select key from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@source
+POSTHOOK: Lineage: source.key SIMPLE [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+PREHOOK: query: insert overwrite table one_part_source partition(ds='102')
select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@one_part_source@ds=102
+POSTHOOK: query: insert overwrite table one_part_source partition(ds='102')
select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@one_part_source@ds=102
+POSTHOOK: Lineage: one_part_source PARTITION(ds=102).key SIMPLE
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: one_part_source PARTITION(ds=102).value SIMPLE
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table one_part_source partition(ds='103')
select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@one_part_source@ds=103
+POSTHOOK: query: insert overwrite table one_part_source partition(ds='103')
select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@one_part_source@ds=103
+POSTHOOK: Lineage: one_part_source PARTITION(ds=103).key SIMPLE
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: one_part_source PARTITION(ds=103).value SIMPLE
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert into table one_part_source partition(ds='103') select *
from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@one_part_source@ds=103
+POSTHOOK: query: insert into table one_part_source partition(ds='103') select
* from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@one_part_source@ds=103
+POSTHOOK: Lineage: one_part_source PARTITION(ds=103).key SIMPLE
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: one_part_source PARTITION(ds=103).value SIMPLE
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select count(*) from source
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source
+#### A masked pattern was here ####
+1000
+PREHOOK: query: select count(*) from part_source
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_source
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from part_source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_source
+#### A masked pattern was here ####
+2000
+PREHOOK: query: select count(*) from one_part_source
+PREHOOK: type: QUERY
+PREHOOK: Input: default@one_part_source
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from one_part_source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@one_part_source
+#### A masked pattern was here ####
+1500
+PREHOOK: query: create external table ctas_table stored as orc as select *
from source
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@source
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_table
+POSTHOOK: query: create external table ctas_table stored as orc as select *
from source
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@source
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_table
+POSTHOOK: Lineage: ctas_table.key SIMPLE [(source)source.FieldSchema(name:key,
type:string, comment:null), ]
+PREHOOK: query: create external table ctas_part_table partitioned by (ds)
stored as orc as select * from part_source
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@part_source
+PREHOOK: Input: default@part_source@ds=102
+PREHOOK: Input: default@part_source@ds=103
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_part_table
+PREHOOK: Output: default@ctas_part_table
+POSTHOOK: query: create external table ctas_part_table partitioned by (ds)
stored as orc as select * from part_source
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@part_source
+POSTHOOK: Input: default@part_source@ds=102
+POSTHOOK: Input: default@part_source@ds=103
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_part_table
+POSTHOOK: Output: default@ctas_part_table
+POSTHOOK: Output: default@ctas_part_table@ds=102
+POSTHOOK: Output: default@ctas_part_table@ds=103
+POSTHOOK: Lineage: ctas_part_table.key SIMPLE
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_part_table.value SIMPLE
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_part_table PARTITION(ds=102).key SIMPLE
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_part_table PARTITION(ds=102).value SIMPLE
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_part_table PARTITION(ds=103).key SIMPLE
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_part_table PARTITION(ds=103).value SIMPLE
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: create external table ctas_one_part_table partitioned by (ds)
stored as orc as select * from one_part_source
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@one_part_source
+PREHOOK: Input: default@one_part_source@ds=102
+PREHOOK: Input: default@one_part_source@ds=103
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_one_part_table
+PREHOOK: Output: default@ctas_one_part_table
+POSTHOOK: query: create external table ctas_one_part_table partitioned by (ds)
stored as orc as select * from one_part_source
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@one_part_source
+POSTHOOK: Input: default@one_part_source@ds=102
+POSTHOOK: Input: default@one_part_source@ds=103
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_one_part_table
+POSTHOOK: Output: default@ctas_one_part_table
+POSTHOOK: Output: default@ctas_one_part_table@ds=102
+POSTHOOK: Output: default@ctas_one_part_table@ds=103
+POSTHOOK: Lineage: ctas_one_part_table.key SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:key, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_one_part_table.value SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:value, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_one_part_table PARTITION(ds=102).key SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:key, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_one_part_table PARTITION(ds=102).value SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:value, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_one_part_table PARTITION(ds=103).key SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:key, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_one_part_table PARTITION(ds=103).value SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:value, type:string,
comment:null), ]
+PREHOOK: query: select count(*) from ctas_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table
+#### A masked pattern was here ####
+1000
+PREHOOK: query: select count(*) from ctas_part_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_part_table
+PREHOOK: Input: default@ctas_part_table@ds=102
+PREHOOK: Input: default@ctas_part_table@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_part_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_part_table
+POSTHOOK: Input: default@ctas_part_table@ds=102
+POSTHOOK: Input: default@ctas_part_table@ds=103
+#### A masked pattern was here ####
+2000
+PREHOOK: query: select count(*) from ctas_one_part_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_one_part_table
+PREHOOK: Input: default@ctas_one_part_table@ds=102
+PREHOOK: Input: default@ctas_one_part_table@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_one_part_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_one_part_table
+POSTHOOK: Input: default@ctas_one_part_table@ds=102
+POSTHOOK: Input: default@ctas_one_part_table@ds=103
+#### A masked pattern was here ####
+1500
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from ctas_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from ctas_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from ctas_part_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_part_table
+PREHOOK: Input: default@ctas_part_table@ds=102
+PREHOOK: Input: default@ctas_part_table@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from ctas_part_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_part_table
+POSTHOOK: Input: default@ctas_part_table@ds=102
+POSTHOOK: Input: default@ctas_part_table@ds=103
+#### A masked pattern was here ####
+2
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_one_part_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_one_part_table
+PREHOOK: Input: default@ctas_one_part_table@ds=102
+PREHOOK: Input: default@ctas_one_part_table@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_one_part_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_one_part_table
+POSTHOOK: Input: default@ctas_one_part_table@ds=102
+POSTHOOK: Input: default@ctas_one_part_table@ds=103
+#### A masked pattern was here ####
+2
+PREHOOK: query: create external table ctas_table_non_orc as select * from
source
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@source
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_table_non_orc
+POSTHOOK: query: create external table ctas_table_non_orc as select * from
source
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@source
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_table_non_orc
+POSTHOOK: Lineage: ctas_table_non_orc.key SIMPLE
[(source)source.FieldSchema(name:key, type:string, comment:null), ]
+PREHOOK: query: create external table ctas_part_table_non_orc partitioned by
(ds) as select * from part_source
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@part_source
+PREHOOK: Input: default@part_source@ds=102
+PREHOOK: Input: default@part_source@ds=103
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_part_table_non_orc
+PREHOOK: Output: default@ctas_part_table_non_orc
+POSTHOOK: query: create external table ctas_part_table_non_orc partitioned by
(ds) as select * from part_source
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@part_source
+POSTHOOK: Input: default@part_source@ds=102
+POSTHOOK: Input: default@part_source@ds=103
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_part_table_non_orc
+POSTHOOK: Output: default@ctas_part_table_non_orc
+POSTHOOK: Output: default@ctas_part_table_non_orc@ds=102
+POSTHOOK: Output: default@ctas_part_table_non_orc@ds=103
+POSTHOOK: Lineage: ctas_part_table_non_orc.key SIMPLE
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_part_table_non_orc.value SIMPLE
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_part_table_non_orc PARTITION(ds=102).key SIMPLE
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_part_table_non_orc PARTITION(ds=102).value SIMPLE
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_part_table_non_orc PARTITION(ds=103).key SIMPLE
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_part_table_non_orc PARTITION(ds=103).value SIMPLE
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: create external table ctas_one_part_table_non_orc partitioned
by (ds) stored as orc as select * from one_part_source
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@one_part_source
+PREHOOK: Input: default@one_part_source@ds=102
+PREHOOK: Input: default@one_part_source@ds=103
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_one_part_table_non_orc
+PREHOOK: Output: default@ctas_one_part_table_non_orc
+POSTHOOK: query: create external table ctas_one_part_table_non_orc partitioned
by (ds) stored as orc as select * from one_part_source
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@one_part_source
+POSTHOOK: Input: default@one_part_source@ds=102
+POSTHOOK: Input: default@one_part_source@ds=103
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_one_part_table_non_orc
+POSTHOOK: Output: default@ctas_one_part_table_non_orc
+POSTHOOK: Output: default@ctas_one_part_table_non_orc@ds=102
+POSTHOOK: Output: default@ctas_one_part_table_non_orc@ds=103
+POSTHOOK: Lineage: ctas_one_part_table_non_orc.key SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:key, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_one_part_table_non_orc.value SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:value, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_one_part_table_non_orc PARTITION(ds=102).key SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:key, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_one_part_table_non_orc PARTITION(ds=102).value SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:value, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_one_part_table_non_orc PARTITION(ds=103).key SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:key, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_one_part_table_non_orc PARTITION(ds=103).value SIMPLE
[(one_part_source)one_part_source.FieldSchema(name:value, type:string,
comment:null), ]
+PREHOOK: query: select count(*) from ctas_table_non_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_non_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_table_non_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_non_orc
+#### A masked pattern was here ####
+1000
+PREHOOK: query: select count(*) from ctas_part_table_non_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_part_table_non_orc
+PREHOOK: Input: default@ctas_part_table_non_orc@ds=102
+PREHOOK: Input: default@ctas_part_table_non_orc@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_part_table_non_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_part_table_non_orc
+POSTHOOK: Input: default@ctas_part_table_non_orc@ds=102
+POSTHOOK: Input: default@ctas_part_table_non_orc@ds=103
+#### A masked pattern was here ####
+2000
+PREHOOK: query: select count(*) from ctas_one_part_table_non_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_one_part_table_non_orc
+PREHOOK: Input: default@ctas_one_part_table_non_orc@ds=102
+PREHOOK: Input: default@ctas_one_part_table_non_orc@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_one_part_table_non_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_one_part_table_non_orc
+POSTHOOK: Input: default@ctas_one_part_table_non_orc@ds=102
+POSTHOOK: Input: default@ctas_one_part_table_non_orc@ds=103
+#### A masked pattern was here ####
+1500
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_non_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_non_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_non_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_non_orc
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_part_table_non_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_part_table_non_orc
+PREHOOK: Input: default@ctas_part_table_non_orc@ds=102
+PREHOOK: Input: default@ctas_part_table_non_orc@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_part_table_non_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_part_table_non_orc
+POSTHOOK: Input: default@ctas_part_table_non_orc@ds=102
+POSTHOOK: Input: default@ctas_part_table_non_orc@ds=103
+#### A masked pattern was here ####
+2
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_one_part_table_non_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_one_part_table_non_orc
+PREHOOK: Input: default@ctas_one_part_table_non_orc@ds=102
+PREHOOK: Input: default@ctas_one_part_table_non_orc@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_one_part_table_non_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_one_part_table_non_orc
+POSTHOOK: Input: default@ctas_one_part_table_non_orc@ds=102
+POSTHOOK: Input: default@ctas_one_part_table_non_orc@ds=103
+#### A masked pattern was here ####
+2
+PREHOOK: query: create external table ctas_table_orc_union stored as orc as
((select * from part_source where ds = '102') union (select * from part_source
where ds = '103'))
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@part_source
+PREHOOK: Input: default@part_source@ds=102
+PREHOOK: Input: default@part_source@ds=103
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_table_orc_union
+POSTHOOK: query: create external table ctas_table_orc_union stored as orc as
((select * from part_source where ds = '102') union (select * from part_source
where ds = '103'))
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@part_source
+POSTHOOK: Input: default@part_source@ds=102
+POSTHOOK: Input: default@part_source@ds=103
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_table_orc_union
+POSTHOOK: Lineage: ctas_table_orc_union.ds EXPRESSION []
+POSTHOOK: Lineage: ctas_table_orc_union.key EXPRESSION
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_table_orc_union.value EXPRESSION
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: create external table ctas_table_part_orc_union partitioned by
(ds) stored as orc as ((select * from part_source where ds = '102') union
(select * from part_source where ds = '103'))
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@part_source
+PREHOOK: Input: default@part_source@ds=102
+PREHOOK: Input: default@part_source@ds=103
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_table_part_orc_union
+PREHOOK: Output: default@ctas_table_part_orc_union
+POSTHOOK: query: create external table ctas_table_part_orc_union partitioned
by (ds) stored as orc as ((select * from part_source where ds = '102') union
(select * from part_source where ds = '103'))
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@part_source
+POSTHOOK: Input: default@part_source@ds=102
+POSTHOOK: Input: default@part_source@ds=103
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_table_part_orc_union
+POSTHOOK: Output: default@ctas_table_part_orc_union
+POSTHOOK: Output: default@ctas_table_part_orc_union@ds=102
+POSTHOOK: Output: default@ctas_table_part_orc_union@ds=103
+POSTHOOK: Lineage: ctas_table_part_orc_union.key EXPRESSION
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_table_part_orc_union.value EXPRESSION
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_table_part_orc_union PARTITION(ds=102).key EXPRESSION
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_table_part_orc_union PARTITION(ds=102).value
EXPRESSION [(part_source)part_source.FieldSchema(name:value, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_table_part_orc_union PARTITION(ds=103).key EXPRESSION
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_table_part_orc_union PARTITION(ds=103).value
EXPRESSION [(part_source)part_source.FieldSchema(name:value, type:string,
comment:null), ]
+PREHOOK: query: select count(*) from ((select * from part_source where ds =
'102') union (select * from part_source where ds = '103')) count_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_source
+PREHOOK: Input: default@part_source@ds=102
+PREHOOK: Input: default@part_source@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ((select * from part_source where ds =
'102') union (select * from part_source where ds = '103')) count_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_source
+POSTHOOK: Input: default@part_source@ds=102
+POSTHOOK: Input: default@part_source@ds=103
+#### A masked pattern was here ####
+618
+PREHOOK: query: select count(*) from ctas_table_orc_union
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_orc_union
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_table_orc_union
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_orc_union
+#### A masked pattern was here ####
+618
+PREHOOK: query: select count(*) from ctas_table_part_orc_union
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_part_orc_union
+PREHOOK: Input: default@ctas_table_part_orc_union@ds=102
+PREHOOK: Input: default@ctas_table_part_orc_union@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_table_part_orc_union
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_part_orc_union
+POSTHOOK: Input: default@ctas_table_part_orc_union@ds=102
+POSTHOOK: Input: default@ctas_table_part_orc_union@ds=103
+#### A masked pattern was here ####
+618
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_orc_union
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_orc_union
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_orc_union
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_orc_union
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_part_orc_union
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_part_orc_union
+PREHOOK: Input: default@ctas_table_part_orc_union@ds=102
+PREHOOK: Input: default@ctas_table_part_orc_union@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_part_orc_union
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_part_orc_union
+POSTHOOK: Input: default@ctas_table_part_orc_union@ds=102
+POSTHOOK: Input: default@ctas_table_part_orc_union@ds=103
+#### A masked pattern was here ####
+2
+PREHOOK: query: create external table ctas_table_non_orc_union as ((select *
from part_source where ds = '102') union (select * from part_source where ds =
'103'))
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@part_source
+PREHOOK: Input: default@part_source@ds=102
+PREHOOK: Input: default@part_source@ds=103
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_table_non_orc_union
+POSTHOOK: query: create external table ctas_table_non_orc_union as ((select *
from part_source where ds = '102') union (select * from part_source where ds =
'103'))
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@part_source
+POSTHOOK: Input: default@part_source@ds=102
+POSTHOOK: Input: default@part_source@ds=103
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_table_non_orc_union
+POSTHOOK: Lineage: ctas_table_non_orc_union.ds EXPRESSION []
+POSTHOOK: Lineage: ctas_table_non_orc_union.key EXPRESSION
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_table_non_orc_union.value EXPRESSION
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: create external table ctas_table_part_non_orc_union
partitioned by (ds) as ((select * from part_source where ds = '102') union
(select * from part_source where ds = '103'))
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@part_source
+PREHOOK: Input: default@part_source@ds=102
+PREHOOK: Input: default@part_source@ds=103
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_table_part_non_orc_union
+PREHOOK: Output: default@ctas_table_part_non_orc_union
+POSTHOOK: query: create external table ctas_table_part_non_orc_union
partitioned by (ds) as ((select * from part_source where ds = '102') union
(select * from part_source where ds = '103'))
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@part_source
+POSTHOOK: Input: default@part_source@ds=102
+POSTHOOK: Input: default@part_source@ds=103
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_table_part_non_orc_union
+POSTHOOK: Output: default@ctas_table_part_non_orc_union
+POSTHOOK: Output: default@ctas_table_part_non_orc_union@ds=102
+POSTHOOK: Output: default@ctas_table_part_non_orc_union@ds=103
+POSTHOOK: Lineage: ctas_table_part_non_orc_union.key EXPRESSION
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_table_part_non_orc_union.value EXPRESSION
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_table_part_non_orc_union PARTITION(ds=102).key
EXPRESSION [(part_source)part_source.FieldSchema(name:key, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_table_part_non_orc_union PARTITION(ds=102).value
EXPRESSION [(part_source)part_source.FieldSchema(name:value, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_table_part_non_orc_union PARTITION(ds=103).key
EXPRESSION [(part_source)part_source.FieldSchema(name:key, type:string,
comment:null), ]
+POSTHOOK: Lineage: ctas_table_part_non_orc_union PARTITION(ds=103).value
EXPRESSION [(part_source)part_source.FieldSchema(name:value, type:string,
comment:null), ]
+PREHOOK: query: select count(*) from ctas_table_non_orc_union
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_non_orc_union
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_table_non_orc_union
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_non_orc_union
+#### A masked pattern was here ####
+618
+PREHOOK: query: select count(*) from ctas_table_part_non_orc_union
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_part_non_orc_union
+PREHOOK: Input: default@ctas_table_part_non_orc_union@ds=102
+PREHOOK: Input: default@ctas_table_part_non_orc_union@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_table_part_non_orc_union
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_part_non_orc_union
+POSTHOOK: Input: default@ctas_table_part_non_orc_union@ds=102
+POSTHOOK: Input: default@ctas_table_part_non_orc_union@ds=103
+#### A masked pattern was here ####
+618
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_non_orc_union
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_non_orc_union
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_non_orc_union
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_non_orc_union
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_part_non_orc_union
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_part_non_orc_union
+PREHOOK: Input: default@ctas_table_part_non_orc_union@ds=102
+PREHOOK: Input: default@ctas_table_part_non_orc_union@ds=103
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_part_non_orc_union
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_part_non_orc_union
+POSTHOOK: Input: default@ctas_table_part_non_orc_union@ds=102
+POSTHOOK: Input: default@ctas_table_part_non_orc_union@ds=103
+#### A masked pattern was here ####
+2
+PREHOOK: query: create external table ctas_table_orc_union_all stored as orc
as ((select * from part_source where ds = '102') union all (select * from
part_source where ds = '103'))
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@part_source
+PREHOOK: Input: default@part_source@ds=102
+PREHOOK: Input: default@part_source@ds=103
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_table_orc_union_all
+POSTHOOK: query: create external table ctas_table_orc_union_all stored as orc
as ((select * from part_source where ds = '102') union all (select * from
part_source where ds = '103'))
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@part_source
+POSTHOOK: Input: default@part_source@ds=102
+POSTHOOK: Input: default@part_source@ds=103
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_table_orc_union_all
+POSTHOOK: Lineage: ctas_table_orc_union_all.ds EXPRESSION []
+POSTHOOK: Lineage: ctas_table_orc_union_all.key EXPRESSION
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_table_orc_union_all.value EXPRESSION
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: create external table ctas_table_non_orc_union_all as ((select
* from part_source where ds = '102') union all (select * from part_source where
ds = '103'))
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@part_source
+PREHOOK: Input: default@part_source@ds=102
+PREHOOK: Input: default@part_source@ds=103
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_table_non_orc_union_all
+POSTHOOK: query: create external table ctas_table_non_orc_union_all as
((select * from part_source where ds = '102') union all (select * from
part_source where ds = '103'))
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@part_source
+POSTHOOK: Input: default@part_source@ds=102
+POSTHOOK: Input: default@part_source@ds=103
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_table_non_orc_union_all
+POSTHOOK: Lineage: ctas_table_non_orc_union_all.ds EXPRESSION []
+POSTHOOK: Lineage: ctas_table_non_orc_union_all.key EXPRESSION
[(part_source)part_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_table_non_orc_union_all.value EXPRESSION
[(part_source)part_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from ctas_table_orc_union_all
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_orc_union_all
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_table_orc_union_all
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_orc_union_all
+#### A masked pattern was here ####
+2000
+PREHOOK: query: select count(*) from ctas_table_non_orc_union_all
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_non_orc_union_all
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_table_non_orc_union_all
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_non_orc_union_all
+#### A masked pattern was here ####
+2000
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_orc_union_all
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_orc_union_all
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_orc_union_all
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_orc_union_all
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_non_orc_union_all
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_table_non_orc_union_all
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(INPUT__FILE__NAME)) from
ctas_table_non_orc_union_all
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_table_non_orc_union_all
+#### A masked pattern was here ####
+1