Hello !
I have a very simple workflow, using a database to create a table.
Here is my property file (I replaced the real values for security reasons) :
###
#
------------------------------------------------------------------------------
# Environment
#
------------------------------------------------------------------------------
nameNode=hdfs://<NAMENODE>:8020
jobTracker=<RESOURCE MANAGER>:8050
kerberosRealm=<KERBEROS REALM>
queueName=<myqueue>
hiveMetastoreHost=<HIVEMETASTORE_HOST>
#
------------------------------------------------------------------------------
# Application
#
------------------------------------------------------------------------------
appRoot=${nameNode}/tmp/test
oozie.wf.application.path=${appRoot}/hive.xml
#
------------------------------------------------------------------------------
# Oozie
#
------------------------------------------------------------------------------
oozie.use.system.libpath=true
oozie.wf.rerun.failnodes=true
#
------------------------------------------------------------------------------
# Kerberos
#
------------------------------------------------------------------------------
hcatMetastoreUri=thrift://${hiveMetastoreHost}:9083
hcatMetastorePrincipal=hive/_HOST@${kerberosRealm}
hiveConfPath=${appRoot}/hive-site.xml
#
------------------------------------------------------------------------------
# Tez
#
------------------------------------------------------------------------------
tezLibPath=${nameNode}/hdp/apps/2.3.2.0-2950/tez/
###
Here is the xml file for the workflow :
###
<workflow-app xmlns="uri:oozie:workflow:0.5" name="TEST_HIVE_ACTION">
<credentials>
<credential name="hive_credentials" type="hcat">
<property>
<name>hcat.metastore.uri</name>
<value>${hcatMetastoreUri}</value>
</property>
<property>
<name>hcat.metastore.principal</name>
<value>${hcatMetastorePrincipal}</value>
</property>
</credential>
</credentials>
<start to="init-hive" />
<action name="init-hive" cred="hive_credentials">
<hive xmlns="uri:oozie:hive-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<job-xml>${hiveConfPath}</job-xml>
<configuration>
<property>
<name>oozie.hive.defaults</name>
<value>${hiveConfPath}</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>tez</value>
</property>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>tez.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>tez.lib.uris</name>
<value>${tezLibPath}</value>
</property>
</configuration>
<script>hive.hql</script>
<file>hive.hql#hive.hql</file>
</hive>
<ok to="end"/>
<error to="fail"/>
</action>
<kill name="fail">
<message>Script failed, error
message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<end name="end"/>
</workflow-app>
###
Here is the hql file (very simple, it is just for the test ;-)) :
###
use mabdd;
create table if not exists test_tez_via_oozie (test string);
###
Here is the hive-site.xml
###
<configuration>
<property>
<name>ambari.hive.db.schema.name</name>
<value>hive</value>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>false</value>
</property>
<property>
<name>datanucleus.cache.level2.type</name>
<value>none</value>
</property>
<property>
<name>fs.file.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.join</name>
<value>false</value>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask.size</name>
<value>999999668</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join.to.mapjoin</name>
<value>false</value>
</property>
<property>
<name>hive.cbo.enable</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.header</name>
<value>false</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.class</name>
<value>org.apache.hadoop.hive.thrift.ZooKeeperTokenStore</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.connectString</name>
<value><srv1>:2181,<srv2>:2181,<rv3>:2181</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.znode</name>
<value>/hive/cluster/delegation</value>
</property>
<property>
<name>hive.compactor.abortedtxn.threshold</name>
<value>1000</value>
</property>
<property>
<name>hive.compactor.check.interval</name>
<value>300L</value>
</property>
<property>
<name>hive.compactor.delta.num.threshold</name>
<value>10</value>
</property>
<property>
<name>hive.compactor.delta.pct.threshold</name>
<value>0.1f</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>false</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>0</value>
</property>
<property>
<name>hive.compactor.worker.timeout</name>
<value>86400L</value>
</property>
<property>
<name>hive.compute.query.using.stats</name>
<value>true</value>
</property>
<property>
<name>hive.conf.restricted.list</name>
<value>hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role</value>
</property>
<property>
<name>hive.convert.join.bucket.mapjoin.tez</name>
<value>false</value>
</property>
<property>
<name>hive.default.fileformat</name>
<value>TextFile</value>
</property>
<property>
<name>hive.default.fileformat.managed</name>
<value>TextFile</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>false</value>
</property>
<property>
<name>hive.enforce.sorting</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.sortmergebucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.exec.compress.intermediate</name>
<value>false</value>
</property>
<property>
<name>hive.exec.compress.output</name>
<value>false</value>
</property>
<property>
<name>hive.exec.dynamic.partition</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>strict</value>
</property>
<property>
<name>hive.exec.failure.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.exec.max.created.files</name>
<value>100000</value>
</property>
<property>
<name>hive.exec.max.dynamic.partitions</name>
<value>5000</value>
</property>
<property>
<name>hive.exec.max.dynamic.partitions.pernode</name>
<value>2000</value>
</property>
<property>
<name>hive.exec.orc.compression.strategy</name>
<value>SPEED</value>
</property>
<property>
<name>hive.exec.orc.default.compress</name>
<value>ZLIB</value>
</property>
<property>
<name>hive.exec.orc.default.stripe.size</name>
<value>67108864</value>
</property>
<property>
<name>hive.exec.orc.encoding.strategy</name>
<value>SPEED</value>
</property>
<property>
<name>hive.exec.parallel</name>
<value>false</value>
</property>
<property>
<name>hive.exec.parallel.thread.number</name>
<value>8</value>
</property>
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.exec.reducers.bytes.per.reducer</name>
<value>67108864</value>
</property>
<property>
<name>hive.exec.reducers.max</name>
<value>1009</value>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>/tmp/hive</value>
</property>
<property>
<name>hive.exec.submit.local.task.via.child</name>
<value>true</value>
</property>
<property>
<name>hive.exec.submitviachild</name>
<value>false</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>mr</value>
</property>
<property>
<name>hive.fetch.task.aggr</name>
<value>false</value>
</property>
<property>
<name>hive.fetch.task.conversion</name>
<value>more</value>
</property>
<property>
<name>hive.fetch.task.conversion.threshold</name>
<value>1073741824</value>
</property>
<property>
<name>hive.limit.optimize.enable</name>
<value>true</value>
</property>
<property>
<name>hive.limit.pushdown.memory.usage</name>
<value>0.04</value>
</property>
<property>
<name>hive.map.aggr</name>
<value>true</value>
</property>
<property>
<name>hive.map.aggr.hash.force.flush.memory.threshold</name>
<value>0.9</value>
</property>
<property>
<name>hive.map.aggr.hash.min.reduction</name>
<value>0.5</value>
</property>
<property>
<name>hive.map.aggr.hash.percentmemory</name>
<value>0.5</value>
</property>
<property>
<name>hive.mapjoin.bucket.cache.size</name>
<value>10000</value>
</property>
<property>
<name>hive.mapjoin.optimized.hashtable</name>
<value>true</value>
</property>
<property>
<name>hive.mapred.reduce.tasks.speculative.execution</name>
<value>false</value>
</property>
<property>
<name>hive.merge.mapfiles</name>
<value>true</value>
</property>
<property>
<name>hive.merge.mapredfiles</name>
<value>false</value>
</property>
<property>
<name>hive.merge.orcfile.stripe.level</name>
<value>true</value>
</property>
<property>
<name>hive.merge.rcfile.block.level</name>
<value>true</value>
</property>
<property>
<name>hive.merge.size.per.task</name>
<value>256000000</value>
</property>
<property>
<name>hive.merge.smallfiles.avgsize</name>
<value>16000000</value>
</property>
<property>
<name>hive.merge.tezfiles</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.authorization.storage.checks</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.cache.pinobjtypes</name>
<value>Table,Database,Type,FieldSchema,Order</value>
</property>
<property>
<name>hive.metastore.client.connect.retry.delay</name>
<value>5s</value>
</property>
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>1800s</value>
</property>
<property>
<name>hive.metastore.connect.retries</name>
<value>24</value>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.failure.retries</name>
<value>24</value>
</property>
<property>
<name>hive.metastore.kerberos.keytab.file</name>
<value>/etc/security/keytabs/hive.service.keytab</value>
</property>
<property>
<name>hive.metastore.kerberos.principal</name>
<value>hive/_HOST@<KRB_REALM></value>
</property>
<property>
<name>hive.metastore.pre.event.listeners</name>
<value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener</value>
</property>
<property>
<name>hive.metastore.sasl.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.server.max.threads</name>
<value>100000</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://<db_srv>:9083</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/apps/hive/warehouse</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin.sortedmerge</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.constant.propagation</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.index.filter</name>
<value>false</value>
</property>
<property>
<name>hive.optimize.metadataonly</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.null.scan</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication.min.reducer</name>
<value>1</value>
</property>
<property>
<name>hive.optimize.sort.dynamic.partition</name>
<value>false</value>
</property>
<property>
<name>hive.orc.compute.splits.num.threads</name>
<value>10</value>
</property>
<property>
<name>hive.orc.splits.include.file.footer</name>
<value>false</value>
</property>
<property>
<name>hive.prewarm.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.prewarm.numcontainers</name>
<value>3</value>
</property>
<property>
<name>hive.security.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.ProxyUserAuthenticator</value>
</property>
<property>
<name>hive.security.authorization.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.security.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value>
</property>
<property>
<name>hive.security.metastore.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
</property>
<property>
<name>hive.security.metastore.authorization.auth.reads</name>
<value>true</value>
</property>
<property>
<name>hive.security.metastore.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
</property>
<property>
<name>hive.server2.allow.user.substitution</name>
<value>true</value>
</property>
<property>
<name>hive.server2.authentication</name>
<value>KERBEROS</value>
</property>
<property>
<name>hive.server2.authentication.kerberos.keytab</name>
<value>/etc/security/keytabs/hive.service.keytab</value>
</property>
<property>
<name>hive.server2.authentication.kerberos.principal</name>
<value>hive/_HOST@<KRB_REALM></value>
</property>
<property>
<name>hive.server2.authentication.spnego.keytab</name>
<value>/etc/security/keytabs/spnego.service.keytab</value>
</property>
<property>
<name>hive.server2.authentication.spnego.principal</name>
<value>HTTP/_HOST@<KRB_REALM></value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>true</value>
</property>
<property>
<name>hive.server2.keystore.password</name>
<value>hive_server2</value>
</property>
<property>
<name>hive.server2.keystore.path</name>
<value>/etc/hive/keystores/hive2.jks</value>
</property>
<property>
<name>hive.server2.logging.operation.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>${system:java.io.tmpdir}/${system:user.name
}/operation_logs</value>
</property>
<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>true</value>
</property>
<property>
<name>hive.server2.table.type.mapping</name>
<value>CLASSIC</value>
</property>
<property>
<name>hive.server2.tez.default.queues</name>
<value>default</value>
</property>
<property>
<name>hive.server2.tez.initialize.default.sessions</name>
<value>false</value>
</property>
<property>
<name>hive.server2.tez.sessions.per.default.queue</name>
<value>1</value>
</property>
<property>
<name>hive.server2.thrift.http.path</name>
<value>cliservice</value>
</property>
<property>
<name>hive.server2.thrift.http.port</name>
<value>10001</value>
</property>
<property>
<name>hive.server2.thrift.max.worker.threads</name>
<value>500</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.server2.thrift.sasl.qop</name>
<value>auth</value>
</property>
<property>
<name>hive.server2.transport.mode</name>
<value>binary</value>
</property>
<property>
<name>hive.server2.use.SSL</name>
<value>false</value>
</property>
<property>
<name>hive.server2.zookeeper.namespace</name>
<value>hiveserver2</value>
</property>
<property>
<name>hive.smbjoin.cache.rows</name>
<value>10000</value>
</property>
<property>
<name>hive.stats.autogather</name>
<value>true</value>
</property>
<property>
<name>hive.stats.dbclass</name>
<value>fs</value>
</property>
<property>
<name>hive.stats.fetch.column.stats</name>
<value>true</value>
</property>
<property>
<name>hive.stats.fetch.partition.stats</name>
<value>true</value>
</property>
<property>
<name>hive.support.concurrency</name>
<value>false</value>
</property>
<property>
<name>hive.tez.auto.reducer.parallelism</name>
<value>true</value>
</property>
<property>
<name>hive.tez.container.size</name>
<value>2048</value>
</property>
<property>
<name>hive.tez.cpu.vcores</name>
<value>-1</value>
</property>
<property>
<name>hive.tez.dynamic.partition.pruning</name>
<value>true</value>
</property>
<property>
<name>hive.tez.dynamic.partition.pruning.max.data.size</name>
<value>104857600</value>
</property>
<property>
<name>hive.tez.dynamic.partition.pruning.max.event.size</name>
<value>1048576</value>
</property>
<property>
<name>hive.tez.input.format</name>
<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
</property>
<property>
<name>hive.tez.java.opts</name>
<value>-server -Xmx1639m -Djava.net.preferIPv4Stack=true
-XX:NewRatio=8 -XX:+UseNUMA -XX:+UseParallelGC -XX:+PrintGCDetails
-verbose:gc -XX:+PrintGCTimeStamps</value>
</property>
<property>
<name>hive.tez.log.level</name>
<value>INFO</value>
</property>
<property>
<name>hive.tez.max.partition.factor</name>
<value>2.0</value>
</property>
<property>
<name>hive.tez.min.partition.factor</name>
<value>0.25</value>
</property>
<property>
<name>hive.tez.smb.number.waves</name>
<value>0.5</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value>
</property>
<property>
<name>hive.txn.max.open.batch</name>
<value>1000</value>
</property>
<property>
<name>hive.txn.timeout</name>
<value>300</value>
</property>
<property>
<name>hive.user.install.directory</name>
<value>/tmp/</value>
</property>
<property>
<name>hive.vectorized.execution.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.vectorized.execution.reduce.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.vectorized.groupby.checkinterval</name>
<value>1024</value>
</property>
<property>
<name>hive.vectorized.groupby.flush.percent</name>
<value>0.1</value>
</property>
<property>
<name>hive.vectorized.groupby.maxentries</name>
<value>100000</value>
</property>
<property>
<name>hive.warehouse.subdir.inherit.perms</name>
<value>true</value>
</property>
<property>
<name>hive.zookeeper.client.port</name>
<value>2181</value>
</property>
<property>
<name>hive.zookeeper.namespace</name>
<value>hive_zookeeper_namespace</value>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value><srv1>:2181,<srv2>:2181,<srv3>:2181</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.postgresql.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:postgresql://<db_srv>:<port>/hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
</configuration>
###
When I run the workflow, I get the following error message :
###
# oozie job -oozie=http://<OOZIE_SRV>:<OOZIE_PORT>/oozie -info
0004487-160314163828133-oozie-oozi-W -verbose
Job ID : 0004487-160314163828133-oozie-oozi-W
------------------------------------------------------------------------------------------------------------------------------------
Workflow Name : TEST_HIVE_ACTION
App Path : hdfs://<NAMENODE_SRV>:8020/tmp/test/hive.xml
Status : KILLED
Run : 0
User : <myuser>
Group : -
Created : 2016-03-29 13:37:46 GMT
Started : 2016-03-29 13:37:46 GMT
Last Modified : 2016-03-29 13:38:11 GMT
Ended : 2016-03-29 13:38:11 GMT
CoordAction ID: -
Actions
------------------------------------------------------------------------------------------------------------------------------------
ID Console URL Error Code Error Message External ID
External Status Name RetriesTracker URI Type Started Status Ended
------------------------------------------------------------------------------------------------------------------------------------
0004487-160314163828133-oozie-oozi-W@:start: - - -
- OK :start: 0 -:START: 2016-03-29 13:37:46 GMT OK
2016-03-29 13:37:46 GMT
------------------------------------------------------------------------------------------------------------------------------------
0004487-160314163828133-oozie-oozi-W@init-hive
http://<NAMENODE_SRV>:8088/proxy/application_1457363931356_28038/
JA018 org.apache.tez.dag.api.SessionNotRunning: TezSession has already
shutdown. Application application_1457363931356_28039 failed 2 times due to
AM Container for appattempt_1457363931356_28039_000002 exited with
exitCode: 1
For more detailed output, check application tracking
page:http://<NAMENODE_SRV>:8088/cluster/app/application_1457363931356_28039Then,
click on links to logs of each attempt.
Diagnostics: Exception from container-launch.
Container id: container_e19_1457363931356 job_1457363931356_28038
FAILED/KILLED init-hive 0<NAMENODE_SRV>:8050 hive 2016-03-29
13:37:46 GMT ERROR 2016-03-29 13:38:11 GMT
------------------------------------------------------------------------------------------------------------------------------------
0004487-160314163828133-oozie-oozi-W@fail - E0729 Script
failed, error message[org.apache.tez.dag.api.SessionNotRunning: TezSession
has already shutdown. Application application_1457363931356_28039 failed 2
times due to AM Container for appattempt_1457363931356_28039_000002 exited
with exitCode: 1
For more detailed output, check application tracking
page:http://<NAMENODE_SRV>:<RM_PORT>/cluster/app/application_1457363931356_28039Then,
click on links to logs of each attempt.
Diagnostics: Exception from container-launch.
Container id - OK fail 0 - :KILL: 2016-03-29
13:38:11 GMT OK 2016-03-29 13:38:11 GMT
------------------------------------------------------------------------------------------------------------------------------------
###
And when I check the app attempt, here is what I found :
###
Container: container_e19_1457363931356_28039_01_000001 on <DATANODE>_45454
==================================================================================================
LogType:stderr
Log Upload Time:Tue Mar 29 15:38:10 +0200 2016
LogLength:77
Log Contents:
Error: Could not find or load main class org.apache.tez.dag.app.DAGAppMaster
End of LogType:stderr
LogType:stdout
Log Upload Time:Tue Mar 29 15:38:10 +0200 2016
LogLength:949
Log Contents:
Heap
PSYoungGen total 602112K, used 10322K [0x00000007d5500000,
0x00000007ff500000, 0x0000000800000000)
eden space 516096K, 2% used
[0x00000007d5500000,0x00000007d5f14898,0x00000007f4d00000)
lgrp 0 space 258048K, 4% used
[0x00000007d5500000,0x00000007d5f14898,0x00000007e5100000)
lgrp 1 space 258048K, 0% used
[0x00000007e5100000,0x00000007e5100000,0x00000007f4d00000)
from space 86016K, 0% used
[0x00000007fa100000,0x00000007fa100000,0x00000007ff500000)
to space 86016K, 0% used
[0x00000007f4d00000,0x00000007f4d00000,0x00000007fa100000)
ParOldGen total 1375232K, used 0K [0x0000000780000000,
0x00000007d3f00000, 0x00000007d5500000)
object space 1375232K, 0% used
[0x0000000780000000,0x0000000780000000,0x00000007d3f00000)
PSPermGen total 21504K, used 2874K [0x0000000775a00000,
0x0000000776f00000, 0x0000000780000000)
object space 21504K, 13% used
[0x0000000775a00000,0x0000000775cce858,0x0000000776f00000)
End of LogType:stdout
###
So this is the following error which blocks my workflow :
###
Could not find or load main class org.apache.tez.dag.app.DAGAppMaster
###
I don't understand why I have this error message, because I set the
tez.lib.uris in my workflow.xml.
In the path for Tez libs, I have a tar.gz file containing all the necessary
jars like the tez-dag containing this class.
So I send you this mail just to know how to use Tez as execution engine in
an oozie workflow. May someone help me please ?
Best regards.
Morgrim.