[ https://issues.apache.org/jira/browse/HIVE-22294?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17415338#comment-17415338 ]
Nemon Lou commented on HIVE-22294: ---------------------------------- The following sql can reproduce this issue,with tpc-ds factor 2, hive 2.3.0: {code:sql} use hive_tpcds_text; set hive.optimize.skewjoin=true; set hive.auto.convert.join.noconditionaltask.size=1000; set hive.mapjoin.smalltable.filesize=2500; select i_item_id, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, item, promotion where ss_sold_date_sk = d_date_sk and ss_item_sk = i_item_sk and ss_cdemo_sk = cd_demo_sk and ss_promo_sk = p_promo_sk and cd_gender = 'F' and cd_marital_status = 'W' and cd_education_status = 'College' and (p_channel_email = 'N' or p_channel_event = 'N') and d_year = 2001 group by i_item_id order by i_item_id limit 100; {code} Error log: {noformat} 2021-09-15 10:15:36,602 | ERROR | 43f5fc4c-2294-443e-897e-9c73261d4ccb HiveServer2-Handler-Pool: Thread-100 | FAILED: ClassCastException org.apache.hadoop.hive.ql.plan.ConditionalWork cannot be cast to org.apache.hadoop.hive.ql.plan.MapredWork java.lang.ClassCastException: org.apache.hadoop.hive.ql.plan.ConditionalWork cannot be cast to org.apache.hadoop.hive.ql.plan.MapredWork at org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver$LocalMapJoinTaskDispatcher.processCurrentTask(MapJoinResolver.java:102) at org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver$LocalMapJoinTaskDispatcher.dispatch(MapJoinResolver.java:239) at org.apache.hadoop.hive.ql.lib.TaskGraphWalker.dispatch(TaskGraphWalker.java:111) at org.apache.hadoop.hive.ql.lib.TaskGraphWalker.walk(TaskGraphWalker.java:180) at org.apache.hadoop.hive.ql.lib.TaskGraphWalker.startWalking(TaskGraphWalker.java:125) at org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.resolve(MapJoinResolver.java:81) at org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer.optimize(PhysicalOptimizer.java:114) at org.apache.hadoop.hive.ql.parse.MapReduceCompiler.optimizeTaskPlan(MapReduceCompiler.java:271) at org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:292) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:11289) at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:286) at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:258) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:513) at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1318) at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1296) at org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:206) at org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:321) at org.apache.hive.service.cli.operation.Operation.run(Operation.java:320) at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:530) at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:517) at sun.reflect.GeneratedMethodAccessor77.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78) at org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36) at org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1840) at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59) at com.sun.proxy.$Proxy38.executeStatementAsync(Unknown Source) at org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:310) at org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:761) at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1437) at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1422) at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56) at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) | org.apache.hadoop.hive.ql.session.SessionState$LogHelper.printError(SessionState.java:1140) {noformat} > ConditionalWork cannot be cast to MapredWork When both skew.join and > auto.convert is on. > ------------------------------------------------------------------------------------------- > > Key: HIVE-22294 > URL: https://issues.apache.org/jira/browse/HIVE-22294 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer > Affects Versions: 2.3.0, 2.3.4, 3.1.1 > Reporter: Qiang.Kang > Assignee: Rui Li > Priority: Critical > > Our hive version is 1.2.1 which has merged some patches (including patches > mentioned in https://issues.apache.org/jira/browse/HIVE-14557, > https://issues.apache.org/jira/browse/HIVE-16155 ) . > > My sql query string is like this: > {code:java} > // code placeholder > set hive.auto.convert.join = true; > set hive.optimize.skewjoin=true; > > SELECT a.* > FROM > a > JOIN b > ON a.id=b.id AND a.uid = b.uid > LEFT JOIN c > ON b.id=c.id AND b.uid=c.uid; > > {code} > > And we met some error: > FAILED: ClassCastException org.apache.hadoop.hive.ql.plan.ConditionalWork > cannot be cast to org.apache.hadoop.hive.ql.plan.MapredWork > > The main reason is that there is a conditional task (*MapJoin*) in the list > tasks of another Conditional task (*SkewJoin*). Here is the code snippet > where it throws this exception: > `org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver:` > > {code:java} > // code placeholder > public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) > throws SemanticException { > Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd; > // not map reduce task or not conditional task, just skip > if (currTask.isMapRedTask()) { > if (currTask instanceof ConditionalTask) { > // get the list of task > List<Task<? extends Serializable>> taskList = ((ConditionalTask) > currTask).getListTasks(); > for (Task<? extends Serializable> tsk : taskList) { > if (tsk.isMapRedTask()) > { // ATTENTION: tsk May be ConditionalTask !!! > this.processCurrentTask(tsk, ((ConditionalTask) currTask)); } > } > } else > { this.processCurrentTask(currTask, null); } > } > return null; > } > private void processCurrentTask(Task<? extends Serializable> currTask, > ConditionalTask conditionalTask) throws SemanticException { > // get current mapred work and its local work > MapredWork mapredWork = (MapredWork) currTask.getWork(); // WRONG!!!!!! > MapredLocalWork localwork = mapredWork.getMapWork().getMapRedLocalWork(); > > {code} > > Here is some detail Information about query plan: > * > -- set hive.auto.convert.join = true; set hive.optimize.skewjoin=false;* > {code:java} > // code placeholder > Stage-1 is a root stage [a join b] > Stage-12 [map join]depends on stages: Stage-1 , consists of Stage-13, Stage-2 > Stage-13 has a backup stage: Stage-2 > Stage-11 depends on stages: Stage-13 > Stage-8 depends on stages: Stage-2, Stage-11 , consists of Stage-5, Stage-4, > Stage-6 > Stage-5 > Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 > Stage-14 depends on stages: Stage-0 > Stage-3 depends on stages: Stage-14 > Stage-4 > Stage-6 > Stage-7 depends on stages: Stage-6 > Stage-2 > > {code} > * > -- set hive.auto.convert.join = false; set hive.optimize.skewjoin=true;* > {code:java} > // code placeholder > STAGE DEPENDENCIES: > Stage-1 is a root stage > Stage-12 depends on stages: Stage-1 , consists of Stage-13, Stage-2 > Stage-13 [skew Join map local task] > Stage-11 depends on stages: Stage-13 > Stage-2 depends on stages: Stage-11 > Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 > Stage-5 > Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 > Stage-14 depends on stages: Stage-0 > Stage-3 depends on stages: Stage-14 > Stage-4 > Stage-6 > Stage-7 depends on stages: Stage-6 > {code} > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > -- This message was sent by Atlassian Jira (v8.3.4#803005)