dev on bcc

Zhenxiao,

Cool you got it set up.

The query runs a full order by before the limit - are you sure it's not
just still running? Hive on Tez prints "total tasks/completed tasks", so no
update may mean none of the reduce tasks have finished yet.

If not, it'd be great to see the yarn logs (yarn logs -applicationId) and
get more info about the table you're using (size, file format, etc). If the
logs are really big you might want to consider opening/attaching them to a
jira (issues.apache.org) (or send them directly to me).

There are a bunch of settings that might be of interest to you (in general
not just for this query) - I've attached a text doc with some details.

Thanks,
Gunther.




On Fri, Dec 13, 2013 at 1:12 PM, Zhenxiao Luo <z...@netflix.com> wrote:

> Hi,
>
> Excuse me. May I ask a question about running Hive on Tez?
>
> I've installed Hive on Tez, and running a simple query from hiveCli,
>
> hive> set hive.optimize.tez=true;
> hive> select * from table order by title_id limit 5;
>
> While, each time, I could see from the TezJobMonitor that, all the map
> jobs are done, but the reducer never get started, and the job is
> running forever there.
>
> I tried a number of times, and each time the same failure(job running
> hangs) happens again and again.
> Does anyone successfully running queries using Hive on Tez? Are there
> any tips or suggestions?
>
> Here is my job log:
>
> 13/12/13 20:57:31 INFO client.TezSession: Submitting dag to
> TezSession, sessionName=HIVE-365b35bc-2461-4e2f-83f9-8da1fa356a86,
> applicationId=application_1386896881353_0027
> 13/12/13 20:57:33 INFO client.TezSession: Submitted dag to TezSession,
> sessionName=HIVE-365b35bc-2461-4e2f-83f9-8da1fa356a86,
> applicationId=application_1386896881353_0027,
> dagId=dag_1386896881353_0027_1
> 13/12/13 20:57:33 INFO client.RMProxy: Connecting to ResourceManager
> at /10.183.195.180:9022
> 13/12/13 20:57:33 INFO log.PerfLogger: </PERFLOG method=TezSubmitDag
> start=1386968251250 end=1386968253338 duration=2088
> from=org.apache.hadoop.hive.ql.exec.tez.TezTask>
>
>
> 13/12/13 20:57:33 INFO tez.TezJobMonitor:
>
> 13/12/13 20:57:33 INFO log.PerfLogger: <PERFLOG method=TezRunDag
> from=org.apache.hadoop.hive.ql.exec.tez.TezJobMonitor>
> 13/12/13 20:57:33 INFO log.PerfLogger: <PERFLOG
> method=TezSubmitToRunningDag
> from=org.apache.hadoop.hive.ql.exec.tez.TezJobMonitor>
> 13/12/13 20:57:33 INFO log.PerfLogger: </PERFLOG
> method=TezSubmitToRunningDag start=1386968253341 end=1386968253402
> duration=61 from=org.apache.hadoop.hive.ql.exec.tez.TezJobMonitor>
> Status: Running (application id: application_1386896881353_0027)
>
> 13/12/13 20:57:33 INFO tez.TezJobMonitor: Status: Running (application
> id: application_1386896881353_0027)
>
> 13/12/13 20:57:33 INFO log.PerfLogger: <PERFLOG
> method=TezRunVertex.Reducer 2
> from=org.apache.hadoop.hive.ql.exec.tez.TezJobMonitor>
> 13/12/13 20:57:33 INFO log.PerfLogger: <PERFLOG
> method=TezRunVertex.Map 1
> from=org.apache.hadoop.hive.ql.exec.tez.TezJobMonitor>
> Map 1: -/- Reducer 2: -/-
> 13/12/13 20:57:33 INFO tez.TezJobMonitor: Map 1: -/- Reducer 2: -/-
> Map 1: -/- Reducer 2: 0/1
> 13/12/13 20:57:33 INFO tez.TezJobMonitor: Map 1: -/- Reducer 2: 0/1
> Map 1: 0/16 Reducer 2: 0/1
> 13/12/13 20:57:34 INFO tez.TezJobMonitor: Map 1: 0/16 Reducer 2: 0/1
> Map 1: 0/16 Reducer 2: 0/1
> 13/12/13 20:57:37 INFO tez.TezJobMonitor: Map 1: 0/16 Reducer 2: 0/1
> Map 1: 0/16 Reducer 2: 0/1
> 13/12/13 20:57:40 INFO tez.TezJobMonitor: Map 1: 0/16 Reducer 2: 0/1
> Map 1: 0/16 Reducer 2: 0/1
> 13/12/13 20:57:43 INFO tez.TezJobMonitor: Map 1: 0/16 Reducer 2: 0/1
> Map 1: 0/16 Reducer 2: 0/1
> 13/12/13 20:57:46 INFO tez.TezJobMonitor: Map 1: 0/16 Reducer 2: 0/1
> Map 1: 0/16 Reducer 2: 0/1
> 13/12/13 20:57:49 INFO tez.TezJobMonitor: Map 1: 0/16 Reducer 2: 0/1
> Map 1: 0/16 Reducer 2: 0/1
> 13/12/13 20:57:52 INFO tez.TezJobMonitor: Map 1: 0/16 Reducer 2: 0/1
> Map 1: 0/16 Reducer 2: 0/1
> 13/12/13 20:57:55 INFO tez.TezJobMonitor: Map 1: 0/16 Reducer 2: 0/1
> Map 1: 1/16 Reducer 2: 0/1
> 13/12/13 20:57:56 INFO tez.TezJobMonitor: Map 1: 1/16 Reducer 2: 0/1
> Map 1: 2/16 Reducer 2: 0/1
> 13/12/13 20:57:58 INFO tez.TezJobMonitor: Map 1: 2/16 Reducer 2: 0/1
> Map 1: 3/16 Reducer 2: 0/1
> 13/12/13 20:57:58 INFO tez.TezJobMonitor: Map 1: 3/16 Reducer 2: 0/1
> Map 1: 5/16 Reducer 2: 0/1
> 13/12/13 20:57:59 INFO tez.TezJobMonitor: Map 1: 5/16 Reducer 2: 0/1
> Map 1: 8/16 Reducer 2: 0/1
> 13/12/13 20:57:59 INFO tez.TezJobMonitor: Map 1: 8/16 Reducer 2: 0/1
> Map 1: 12/16 Reducer 2: 0/1
> 13/12/13 20:57:59 INFO tez.TezJobMonitor: Map 1: 12/16 Reducer 2: 0/1
> Map 1: 15/16 Reducer 2: 0/1
> 13/12/13 20:58:00 INFO tez.TezJobMonitor: Map 1: 15/16 Reducer 2: 0/1
> 13/12/13 20:58:00 INFO log.PerfLogger: </PERFLOG
> method=TezRunVertex.Map 1 start=1386968253402 end=1386968280223
> duration=26821 from=org.apache.hadoop.hive.ql.exec.tez.TezJobMonitor>
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:00 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:03 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:06 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:09 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:12 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:15 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:18 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:21 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:24 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:27 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:30 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:33 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:36 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:39 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:42 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:45 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:48 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:51 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:54 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:58:57 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:00 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:03 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:06 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:09 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:12 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:15 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:18 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:21 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:24 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:27 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:30 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:33 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:36 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:39 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:42 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:45 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:48 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:52 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:55 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 20:59:58 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 21:00:01 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 21:00:04 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 21:00:07 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 21:00:10 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
> Map 1: 16/16 Reducer 2: 0/1
> 13/12/13 21:00:13 INFO tez.TezJobMonitor: Map 1: 16/16 Reducer 2: 0/1
>
> Thanks,
> Zhenxiao
>

-- 
CONFIDENTIALITY NOTICE
NOTICE: This message is intended for the use of the individual or entity to 
which it is addressed and may contain information that is confidential, 
privileged and exempt from disclosure under applicable law. If the reader 
of this message is not the intended recipient, you are hereby notified that 
any printing, copying, dissemination, distribution, disclosure or 
forwarding of this communication is strictly prohibited. If you have 
received this communication in error, please contact the sender immediately 
and delete it from your system. Thank You.
Notes:

- If you're not already you should use ORC file format for your tables, if you 
want to see the full thing in action.
- I start hive with "hive -hiveconf hive.tez.optimize=true", not exactly 
necessary, but it will start the AM/containers right away instead of on first 
query.

Hive settings:

// needed because SMB isn't supported on tez yet
set hive.optimize.bucketmapjoin=false;
set hive.optimize.bucketmapjoin.sortedmerge=false;
set hive.auto.convert.sortmerge.join=false;
set hive.auto.convert.sortmerge.join.noconditionaltask=false;
set hive.auto.convert.join.noconditionaltask=true;

// depends on your available mem/cluster, but map/reduce mb should be set to 
the same for container reuse
set hive.auto.convert.join.noconditionaltask.size=64000000;
set mapred.map.child.java.opts=-server -Xmx3584m 
-Djava.net.preferIPv4Stack=true -XX:+UseNUMA -XX:+UseParallelGC;
set mapred.reduce.child.java.opts=-server -Xmx3584m 
-Djava.net.preferIPv4Stack=true -XX:+UseNUMA -XX:+UseParallelGC;
set mapreduce.map.memory.mb=4096;
set mapreduce.reduce.memory.mb=4096;

// generic opts
set hive.optimize.reducededuplication.min.reducer=1;
set hive.optimize.mapjoin.mapreduce=true;

// autogather might require you to up the max number of counters, if you run 
into issues
set hive.stats.autogather=true;
set hive.stats.dbclass=counter;

set mapreduce.map.output.compress=true;
set 
mapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.DefaultCodec;
set tez.runtime.intermediate-output.should-compress=true;
set 
tez.runtime.intermediate-output.compress.codec=org.apache.hadoop.io.compress.DefaultCodec;
set tez.runtime.intermdiate-input.is-compressed=true;
set 
tez.runtime.intermediate-input.compress.codec=org.apache.hadoop.io.compress.DefaultCodec;

// tez groups in the AM
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;

set hive.orc.splits.include.file.footer=true;

set hive.root.logger=ERROR,console;
set hive.optimize.tez=true;
set hive.vectorized.execution.enabled=true;
set hive.exec.local.cache=true;
set hive.compute.query.using.stats=true;

for tez:

<property>
    <name>tez.am.resource.memory.mb</name>
    <value>8192</value>
</property>
<property>
    <name>tez.am.java.opts</name>
    <value>-server -Xmx7168m -Djava.net.preferIPv4Stack=true</value>
</property>
<property>
    <name>tez.am.grouping.min-size</name>
    <value>16777216</value>
  </property>
  <!-- Client Submission timeout value when submitting DAGs to a session -->
  <property>
    <name>tez.session.client.timeout.secs</name>
    <value>-1</value>
  </property>
  <!-- prewarm stuff -->
  <property>
    <name>tez.session.pre-warm.enabled</name>
    <value>true</value>
  </property>

  <property>
    <name>tez.session.pre-warm.num.containers</name>
    <value>10</value>
  </property>
<property>
    <name>tez.am.grouping.split-waves</name>
    <value>0.9</value>
  </property>

  <property>
    <name>tez.am.container.reuse.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>tez.am.container.reuse.rack-fallback.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>tez.am.container.reuse.non-local-fallback.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>tez.am.container.session.delay-allocation-millis</name>
    <value>-1</value>
  </property>
  <property>
    <name>tez.am.container.reuse.locality.delay-allocation-millis</name>
    <value>250</value>
  </property>

Reply via email to