[ https://issues.apache.org/jira/browse/HIVE-2821?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13276606#comment-13276606 ]
Zhang Xinyu commented on HIVE-2821: ----------------------------------- hive> explain select count(1) from ( > select key,value from (select /*+ MAPJOIN(b) */ a.key, a.value from src a join src1 b on a.key=b.key) t1 > union all > select key,value from (select /*+ MAPJOIN(bb) */ aa.key, aa.value from src aa join src1 bb on aa.key=bb.key) t2 > ) t3; FAILED: Hive Internal Error: java.lang.NullPointerException(null) java.lang.NullPointerException at org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.prune(PartitionPruner.java:170) at org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:553) at org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:514) at org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.initPlan(GenMapRedUtils.java:125) at org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1.process(GenMRRedSink1.java:76) at org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3.process(GenMRRedSink3.java:64) at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89) ... environment: hive-05b8af0(0.8.1release); Mac 10.7.3; java 1.6.0_31-b04-415-11M3635. I debug in eclipse to see how to generate mapjoin plan, find current logic depends on the sequence walk each TableScanOperator. For case like: TS_1 -> \ TS_2 -> Mapjoin_3 -> \ ....................TS_4 -> Mapjoin_5 must walk TS_1 or TS_2 first, or hive will throw NPE; For case like: TS_1 -> \ TS_2 -> Mapjoin_3 -> Union_4 -> \ .....................................TS_5 -> Mapjoin_6 must walk TS_5 first, or throw NPE; What i do is to replace "private HashMap<String, Operator<? extends Serializable>> topOps;"(in SemanticAnalyzer.java) with "private LinkedHashMap<String, Operator<? extends Serializable>> topOps;" and hack a Transform in Optimizer.java to adjust the sequence of each TSOperator. I tested, it works well. But that is not enough for case like Mapjoin union all Mapjoin(map only union) then followed by a reducer. I have another hack, in ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java: 123c123 < if (!seenOps.contains(currTopOp)) { --- > if (!seenOps.contains(currTopOp) && currTopOp!=null) { It's toooooo hack, and not even tested by running job(its plan from explain is OK). I'm waiting for official patch too. > union with two mapjoin will throw NPE > --------------------------------------- > > Key: HIVE-2821 > URL: https://issues.apache.org/jira/browse/HIVE-2821 > Project: Hive > Issue Type: Bug > Affects Versions: 0.7.0 > Environment: Linux zongren-VirtualBox 3.0.0-14-generic #23-Ubuntu SMP > Mon Nov 21 20:34:47 UTC 2011 i686 i686 i386 GNU/Linux > java version "1.6.0_25" > hadoop-0.20.2-cdh3u0 > hive-0.7.0-cdh3u0 > Reporter: caofangkun > Priority: Critical > Labels: optimizer, ql, union > > create table src (key string, value string); > create table src1 (key string, value string); > select count(*) from ( > select /+mapjoin(b)/ a.* > from src a > join > src1 b > on a.key=b.key > where a.key=48 > union all > select /+mapjoin(bb)/ aa.* > from src aa > join > src1 bb > on aa.key=bb.key > where aa.key=100 > ) t; > FAILED: Hive Internal Error: java.lang.NullPointerException(null) > java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.prune(PartitionPruner.java:156) > at > org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:553) > at > org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:514) > at > org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.initPlan(GenMapRedUtils.java:125) > at > org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1.process(GenMRRedSink1.java:76) > at > org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3.process(GenMRRedSink3.java:64) > at > org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:88) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:55) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:102) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genMapRedTasks(SemanticAnalyzer.java:6946) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:7247) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:240) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:431) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:337) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:904) > at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:279) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:228) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:417) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:350) > at org.apache.hadoop.hive.cli.CliDriver.processReader(CliDriver.java:451) > at org.apache.hadoop.hive.cli.CliDriver.processFile(CliDriver.java:461) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:675) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:585) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at org.apache.hadoop.util.RunJar.main(RunJar.java:186) -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa For more information on JIRA, see: http://www.atlassian.com/software/jira