Author: prasanthj
Date: Tue Feb 3 18:13:45 2015
New Revision: 1656871
URL: http://svn.apache.org/r1656871
Log:
HIVE-9529: "alter table .. concatenate" under Tez mode should create TezTask
(Prasanth Jayachandran reviewed by Gunther Hagleitner)
Added:
hive/trunk/ql/src/test/results/clientpositive/tez/orc_merge8.q.out
Modified:
hive/trunk/itests/src/test/resources/testconfiguration.properties
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL:
http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1656871&r1=1656870&r2=1656871&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Tue Feb
3 18:13:45 2015
@@ -134,6 +134,7 @@ minitez.query.files.shared=alter_merge_2
orc_merge5.q,\
orc_merge6.q,\
orc_merge7.q,\
+ orc_merge8.q,\
orc_merge_incompat1.q,\
orc_merge_incompat2.q,\
orc_vectorization_ppd.q,\
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1656871&r1=1656870&r2=1656871&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Tue Feb
3 18:13:45 2015
@@ -18,6 +18,35 @@
package org.apache.hadoop.hive.ql.exec;
+import static org.apache.commons.lang.StringUtils.join;
+import static org.apache.hadoop.util.StringUtils.stringifyException;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Serializable;
+import java.io.Writer;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
@@ -60,6 +89,7 @@ import org.apache.hadoop.hive.ql.DriverC
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo;
+import org.apache.hadoop.hive.ql.exec.tez.TezTask;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
@@ -140,6 +170,7 @@ import org.apache.hadoop.hive.ql.plan.Sh
import org.apache.hadoop.hive.ql.plan.ShowTblPropertiesDesc;
import org.apache.hadoop.hive.ql.plan.ShowTxnsDesc;
import org.apache.hadoop.hive.ql.plan.SwitchDatabaseDesc;
+import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.TruncateTableDesc;
import org.apache.hadoop.hive.ql.plan.UnlockDatabaseDesc;
import org.apache.hadoop.hive.ql.plan.UnlockTableDesc;
@@ -170,8 +201,8 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.shims.HadoopShims;
-import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatus;
+import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.tools.HadoopArchives;
import org.apache.hadoop.util.ReflectionUtils;
@@ -179,35 +210,6 @@ import org.apache.hadoop.util.ToolRunner
import org.apache.hive.common.util.AnnotationUtils;
import org.stringtemplate.v4.ST;
-import java.io.BufferedWriter;
-import java.io.DataOutputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.Serializable;
-import java.io.Writer;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-import static org.apache.commons.lang.StringUtils.join;
-import static org.apache.hadoop.util.StringUtils.stringifyException;
-
/**
* DDLTask implementation.
*
@@ -567,6 +569,12 @@ public class DDLTask extends Task<DDLWor
// merge work only needs input and output.
MergeFileWork mergeWork = new MergeFileWork(mergeFilesDesc.getInputDir(),
mergeFilesDesc.getOutputDir(),
mergeFilesDesc.getInputFormatClass().getName());
+ LinkedHashMap<String, ArrayList<String>> pathToAliases =
+ new LinkedHashMap<String, ArrayList<String>>();
+ ArrayList<String> inputDirstr = new ArrayList<String>(1);
+ inputDirstr.add(mergeFilesDesc.getInputDir().toString());
+ pathToAliases.put(mergeFilesDesc.getInputDir().get(0).toString(),
inputDirstr);
+ mergeWork.setPathToAliases(pathToAliases);
mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx());
mergeWork.resolveConcatenateMerge(db.getConf());
mergeWork.setMapperCannotSpanPartns(true);
@@ -592,12 +600,21 @@ public class DDLTask extends Task<DDLWor
aliasToWork.put(mergeFilesDesc.getInputDir().toString(), mergeOp);
mergeWork.setAliasToWork(aliasToWork);
DriverContext driverCxt = new DriverContext();
- MergeFileTask taskExec = new MergeFileTask();
- taskExec.initialize(db.getConf(), null, driverCxt);
- taskExec.setWork(mergeWork);
- taskExec.setQueryPlan(this.getQueryPlan());
- int ret = taskExec.execute(driverCxt);
+ Task task = null;
+ if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
+ TezWork tezWork = new
TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
+ mergeWork.setName("File Merge");
+ tezWork.add(mergeWork);
+ task = new TezTask();
+ task.setWork(tezWork);
+ } else {
+ task = new MergeFileTask();
+ task.setWork(mergeWork);
+ }
+ // initialize the task and execute
+ task.initialize(db.getConf(), getQueryPlan(), driverCxt);
+ int ret = task.execute(driverCxt);
return ret;
}
Added: hive/trunk/ql/src/test/results/clientpositive/tez/orc_merge8.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/orc_merge8.q.out?rev=1656871&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/orc_merge8.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/orc_merge8.q.out Tue Feb
3 18:13:45 2015
@@ -0,0 +1,130 @@
+PREHOOK: query: create table if not exists alltypes (
+ bo boolean,
+ ti tinyint,
+ si smallint,
+ i int,
+ bi bigint,
+ f float,
+ d double,
+ de decimal(10,3),
+ ts timestamp,
+ da date,
+ s string,
+ c char(5),
+ vc varchar(5),
+ m map<string, string>,
+ l array<int>,
+ st struct<c1:int, c2:string>
+) row format delimited fields terminated by '|'
+collection items terminated by ','
+map keys terminated by ':' stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypes
+POSTHOOK: query: create table if not exists alltypes (
+ bo boolean,
+ ti tinyint,
+ si smallint,
+ i int,
+ bi bigint,
+ f float,
+ d double,
+ de decimal(10,3),
+ ts timestamp,
+ da date,
+ s string,
+ c char(5),
+ vc varchar(5),
+ m map<string, string>,
+ l array<int>,
+ st struct<c1:int, c2:string>
+) row format delimited fields terminated by '|'
+collection items terminated by ','
+map keys terminated by ':' stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypes
+PREHOOK: query: create table alltypes_orc like alltypes
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: create table alltypes_orc like alltypes
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: alter table alltypes_orc set fileformat orc
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@alltypes_orc
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: alter table alltypes_orc set fileformat orc
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@alltypes_orc
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: load data local inpath '../../data/files/alltypes2.txt'
overwrite into table alltypes
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@alltypes
+POSTHOOK: query: load data local inpath '../../data/files/alltypes2.txt'
overwrite into table alltypes
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@alltypes
+PREHOOK: query: insert overwrite table alltypes_orc select * from alltypes
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: insert overwrite table alltypes_orc select * from alltypes
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes
+POSTHOOK: Output: default@alltypes_orc
+POSTHOOK: Lineage: alltypes_orc.bi SIMPLE
[(alltypes)alltypes.FieldSchema(name:bi, type:bigint, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.bo SIMPLE
[(alltypes)alltypes.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.c SIMPLE
[(alltypes)alltypes.FieldSchema(name:c, type:char(5), comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.d SIMPLE
[(alltypes)alltypes.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.da SIMPLE
[(alltypes)alltypes.FieldSchema(name:da, type:date, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.de SIMPLE
[(alltypes)alltypes.FieldSchema(name:de, type:decimal(10,3), comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.f SIMPLE
[(alltypes)alltypes.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.i SIMPLE
[(alltypes)alltypes.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.l SIMPLE
[(alltypes)alltypes.FieldSchema(name:l, type:array<int>, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.m SIMPLE
[(alltypes)alltypes.FieldSchema(name:m, type:map<string,string>, comment:null),
]
+POSTHOOK: Lineage: alltypes_orc.s SIMPLE
[(alltypes)alltypes.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.si SIMPLE
[(alltypes)alltypes.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.st SIMPLE
[(alltypes)alltypes.FieldSchema(name:st, type:struct<c1:int,c2:string>,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.ti SIMPLE
[(alltypes)alltypes.FieldSchema(name:ti, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.ts SIMPLE
[(alltypes)alltypes.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.vc SIMPLE
[(alltypes)alltypes.FieldSchema(name:vc, type:varchar(5), comment:null), ]
+PREHOOK: query: insert into table alltypes_orc select * from alltypes
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: insert into table alltypes_orc select * from alltypes
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes
+POSTHOOK: Output: default@alltypes_orc
+POSTHOOK: Lineage: alltypes_orc.bi SIMPLE
[(alltypes)alltypes.FieldSchema(name:bi, type:bigint, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.bo SIMPLE
[(alltypes)alltypes.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.c SIMPLE
[(alltypes)alltypes.FieldSchema(name:c, type:char(5), comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.d SIMPLE
[(alltypes)alltypes.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.da SIMPLE
[(alltypes)alltypes.FieldSchema(name:da, type:date, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.de SIMPLE
[(alltypes)alltypes.FieldSchema(name:de, type:decimal(10,3), comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.f SIMPLE
[(alltypes)alltypes.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.i SIMPLE
[(alltypes)alltypes.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.l SIMPLE
[(alltypes)alltypes.FieldSchema(name:l, type:array<int>, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.m SIMPLE
[(alltypes)alltypes.FieldSchema(name:m, type:map<string,string>, comment:null),
]
+POSTHOOK: Lineage: alltypes_orc.s SIMPLE
[(alltypes)alltypes.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.si SIMPLE
[(alltypes)alltypes.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.st SIMPLE
[(alltypes)alltypes.FieldSchema(name:st, type:struct<c1:int,c2:string>,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.ti SIMPLE
[(alltypes)alltypes.FieldSchema(name:ti, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.ts SIMPLE
[(alltypes)alltypes.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.vc SIMPLE
[(alltypes)alltypes.FieldSchema(name:vc, type:varchar(5), comment:null), ]
+Found 2 items
+#### A masked pattern was here ####
+PREHOOK: query: alter table alltypes_orc concatenate
+PREHOOK: type: ALTER_TABLE_MERGE
+PREHOOK: Input: default@alltypes_orc
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: alter table alltypes_orc concatenate
+POSTHOOK: type: ALTER_TABLE_MERGE
+POSTHOOK: Input: default@alltypes_orc
+POSTHOOK: Output: default@alltypes_orc
+Found 1 items
+#### A masked pattern was here ####