[ 
https://issues.apache.org/jira/browse/HIVE-22294?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17415338#comment-17415338
 ] 

Nemon Lou commented on HIVE-22294:
----------------------------------

The following sql can reproduce this issue,with tpc-ds factor 2, hive 2.3.0:
{code:sql}
use hive_tpcds_text;
set hive.optimize.skewjoin=true;
set hive.auto.convert.join.noconditionaltask.size=1000;
set hive.mapjoin.smalltable.filesize=2500;
select  i_item_id, 
        avg(ss_quantity) agg1,
        avg(ss_list_price) agg2,
        avg(ss_coupon_amt) agg3,
        avg(ss_sales_price) agg4 
 from store_sales, customer_demographics, date_dim, item, promotion
 where ss_sold_date_sk = d_date_sk and
       ss_item_sk = i_item_sk and
       ss_cdemo_sk = cd_demo_sk and
       ss_promo_sk = p_promo_sk and
       cd_gender = 'F' and 
       cd_marital_status = 'W' and
       cd_education_status = 'College' and
       (p_channel_email = 'N' or p_channel_event = 'N') and
       d_year = 2001 
 group by i_item_id
 order by i_item_id
 limit 100;
{code}

Error log:
{noformat}
2021-09-15 10:15:36,602 | ERROR | 43f5fc4c-2294-443e-897e-9c73261d4ccb 
HiveServer2-Handler-Pool: Thread-100 | FAILED: ClassCastException 
org.apache.hadoop.hive.ql.plan.ConditionalWork cannot be cast to 
org.apache.hadoop.hive.ql.plan.MapredWork
java.lang.ClassCastException: org.apache.hadoop.hive.ql.plan.ConditionalWork 
cannot be cast to org.apache.hadoop.hive.ql.plan.MapredWork
        at 
org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver$LocalMapJoinTaskDispatcher.processCurrentTask(MapJoinResolver.java:102)
        at 
org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver$LocalMapJoinTaskDispatcher.dispatch(MapJoinResolver.java:239)
        at 
org.apache.hadoop.hive.ql.lib.TaskGraphWalker.dispatch(TaskGraphWalker.java:111)
        at 
org.apache.hadoop.hive.ql.lib.TaskGraphWalker.walk(TaskGraphWalker.java:180)
        at 
org.apache.hadoop.hive.ql.lib.TaskGraphWalker.startWalking(TaskGraphWalker.java:125)
        at 
org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.resolve(MapJoinResolver.java:81)
        at 
org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer.optimize(PhysicalOptimizer.java:114)
        at 
org.apache.hadoop.hive.ql.parse.MapReduceCompiler.optimizeTaskPlan(MapReduceCompiler.java:271)
        at 
org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:292)
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:11289)
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:286)
        at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:258)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:513)
        at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1318)
        at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1296)
        at 
org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:206)
        at 
org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:321)
        at 
org.apache.hive.service.cli.operation.Operation.run(Operation.java:320)
        at 
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:530)
        at 
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:517)
        at sun.reflect.GeneratedMethodAccessor77.invoke(Unknown Source)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
        at 
org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
        at 
org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1840)
        at 
org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
        at com.sun.proxy.$Proxy38.executeStatementAsync(Unknown Source)
        at 
org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:310)
        at 
org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:761)
        at 
org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1437)
        at 
org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1422)
        at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
        at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
        at 
org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
        at 
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
 | 
org.apache.hadoop.hive.ql.session.SessionState$LogHelper.printError(SessionState.java:1140)

{noformat}
 

> ConditionalWork cannot be cast to MapredWork  When both skew.join and 
> auto.convert is on.  
> -------------------------------------------------------------------------------------------
>
>                 Key: HIVE-22294
>                 URL: https://issues.apache.org/jira/browse/HIVE-22294
>             Project: Hive
>          Issue Type: Bug
>          Components: Physical Optimizer
>    Affects Versions: 2.3.0, 2.3.4, 3.1.1
>            Reporter: Qiang.Kang
>            Assignee: Rui Li
>            Priority: Critical
>
> Our hive version is 1.2.1 which has merged some patches (including patches 
> mentioned  in https://issues.apache.org/jira/browse/HIVE-14557, 
> https://issues.apache.org/jira/browse/HIVE-16155 ) .
>  
> My sql query string is like this:
> {code:java}
> // code placeholder
> set hive.auto.convert.join = true;
> set hive.optimize.skewjoin=true;
>  
> SELECT a.*
> FROM
> a
> JOIN b
> ON a.id=b.id AND a.uid = b.uid 
> LEFT JOIN c
> ON b.id=c.id AND b.uid=c.uid;
>  
> {code}
>  
> And we met some error: 
> FAILED: ClassCastException org.apache.hadoop.hive.ql.plan.ConditionalWork 
> cannot be cast to org.apache.hadoop.hive.ql.plan.MapredWork
>  
> The main reason is that there is a conditional task (*MapJoin*) in the list 
> tasks of another Conditional task (*SkewJoin*).  Here is the code snippet 
> where it throws this exception:
> `org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver:`
>  
> {code:java}
> // code placeholder
> public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs)
>  throws SemanticException {
>  Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd;
>  // not map reduce task or not conditional task, just skip
>  if (currTask.isMapRedTask()) {
>  if (currTask instanceof ConditionalTask) {
>  // get the list of task
>  List<Task<? extends Serializable>> taskList = ((ConditionalTask) 
> currTask).getListTasks();
>  for (Task<? extends Serializable> tsk : taskList) {
>  if (tsk.isMapRedTask())
> {   //  ATTENTION: tsk May be ConditionalTask !!! 
> this.processCurrentTask(tsk, ((ConditionalTask) currTask)); }
> }
>  } else
> { this.processCurrentTask(currTask, null); }
> }
>  return null;
>  }
> private void processCurrentTask(Task<? extends Serializable> currTask,
>  ConditionalTask conditionalTask) throws SemanticException {
>  // get current mapred work and its local work
>  MapredWork mapredWork = (MapredWork) currTask.getWork(); // WRONG!!!!!!
>  MapredLocalWork localwork = mapredWork.getMapWork().getMapRedLocalWork();
>  
> {code}
>  
> Here is some detail Information about query plan:
>  * 
>  --  set hive.auto.convert.join = true; set hive.optimize.skewjoin=false;*
> {code:java}
> // code placeholder
> Stage-1 is a root stage [a join b]
>  Stage-12 [map join]depends on stages: Stage-1 , consists of Stage-13, Stage-2
>  Stage-13 has a backup stage: Stage-2
>  Stage-11 depends on stages: Stage-13
>  Stage-8 depends on stages: Stage-2, Stage-11 , consists of Stage-5, Stage-4, 
> Stage-6
>  Stage-5
>  Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
>  Stage-14 depends on stages: Stage-0
>  Stage-3 depends on stages: Stage-14
>  Stage-4
>  Stage-6
>  Stage-7 depends on stages: Stage-6
>  Stage-2
>  
> {code}
>  * 
>  --  set hive.auto.convert.join = false; set hive.optimize.skewjoin=true;*
> {code:java}
> // code placeholder
> STAGE DEPENDENCIES:
>  Stage-1 is a root stage
>  Stage-12 depends on stages: Stage-1 , consists of Stage-13, Stage-2
>  Stage-13 [skew Join map local task]
>  Stage-11 depends on stages: Stage-13
>  Stage-2 depends on stages: Stage-11
>  Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
>  Stage-5
>  Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
>  Stage-14 depends on stages: Stage-0
>  Stage-3 depends on stages: Stage-14
>  Stage-4
>  Stage-6
>  Stage-7 depends on stages: Stage-6
> {code}
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to