[ 
https://issues.apache.org/jira/browse/HIVE-29457?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

ASF GitHub Bot updated HIVE-29457:
----------------------------------
    Labels: pull-request-available  (was: )

> HiveSortExchangePullUpConstantsRule doesn't remove constant column from 
> distribution keys
> -----------------------------------------------------------------------------------------
>
>                 Key: HIVE-29457
>                 URL: https://issues.apache.org/jira/browse/HIVE-29457
>             Project: Hive
>          Issue Type: Bug
>          Components: Hive
>    Affects Versions: 4.3.0
>            Reporter: Soumyakanti Das
>            Assignee: Soumyakanti Das
>            Priority: Major
>              Labels: pull-request-available
>
> Since HiveSortExchangePullUpConstantsRule doesn't remove constant column from 
> distribution keys, it creates HiveRelDistribution with null keys, e.g.,
> {code:java}
> rel#51:HiveSortExchange.HIVE.[0].hash[0, 
> null](input=HiveProject#54,distribution=hash[0, null],collation=[0]) {code}
> and we eventually run into NPE.
> To reproduce this, run:
> {code:java}
> CREATE TABLE test (col1 string, col2 string);
> EXPLAIN CBO
> SELECT col1 FROM test
> WHERE col2 = 'a'
> DISTRIBUTE BY col1, col2     
> SORT BY col1, col2; {code}
> Error:
> {code:java}
>  java.lang.NullPointerException: Cannot invoke "java.lang.Integer.intValue()" 
> because the return value of "java.util.Iterator.next()" is null
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(HiveRelFieldTrimmer.java:890)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VarArgsFunc4.apply(HiveReflectUtil.java:322)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$HiveMethodDispatcher.invoke(HiveReflectUtil.java:221)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:287)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimChild(HiveRelFieldTrimmer.java:201)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.trimFields(RelFieldTrimmer.java:437)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VarArgsFunc4.apply(HiveReflectUtil.java:322)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$HiveMethodDispatcher.invoke(HiveReflectUtil.java:221)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:287)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimChild(HiveRelFieldTrimmer.java:201)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.trimFields(RelFieldTrimmer.java:437)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VarArgsFunc4.apply(HiveReflectUtil.java:322)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$HiveMethodDispatcher.invoke(HiveReflectUtil.java:221)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:287)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.trim(RelFieldTrimmer.java:170)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trim(HiveRelFieldTrimmer.java:162)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trim(HiveRelFieldTrimmer.java:151)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFieldTrimmerRule.trim(HiveFieldTrimmerRule.java:66)
>  at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFieldTrimmerRule.onMatch(HiveFieldTrimmerRule.java:61)
>  at 
> org.apache.calcite.plan.AbstractRelOptPlanner.fireRule(AbstractRelOptPlanner.java:337)
>  at org.apache.calcite.plan.hep.HepPlanner.applyRule(HepPlanner.java:556)
>  at org.apache.calcite.plan.hep.HepPlanner.applyRules(HepPlanner.java:420)
>  at 
> org.apache.calcite.plan.hep.HepPlanner.executeRuleInstance(HepPlanner.java:243)
>  at 
> org.apache.calcite.plan.hep.HepInstruction$RuleInstance$State.execute(HepInstruction.java:178)
>  at 
> org.apache.calcite.plan.hep.HepPlanner.lambda$executeProgram$0(HepPlanner.java:211)
>  at 
> org.apache.hive.com.google.common.collect.ImmutableList.forEach(ImmutableList.java:397)
>  at org.apache.calcite.plan.hep.HepPlanner.executeProgram(HepPlanner.java:210)
>  at org.apache.calcite.plan.hep.HepProgram$State.execute(HepProgram.java:118)
>  at org.apache.calcite.plan.hep.HepPlanner.executeProgram(HepPlanner.java:205)
>  at org.apache.calcite.plan.hep.HepPlanner.findBestExp(HepPlanner.java:191)
>  at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.executeProgram(CalcitePlanner.java:2562)
>  at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.executeProgram(CalcitePlanner.java:2522)
>  at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.executeProgram(CalcitePlanner.java:2516)
>  at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.applyPreJoinOrderingTransforms(CalcitePlanner.java:1971)
>  at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1690)
>  at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1553)
>  at 
> org.apache.calcite.tools.Frameworks.lambda$withPlanner$0(Frameworks.java:140)
>  at 
> org.apache.calcite.prepare.CalcitePrepareImpl.perform(CalcitePrepareImpl.java:936)
>  at org.apache.calcite.tools.Frameworks.withPrepare(Frameworks.java:191)
>  at org.apache.calcite.tools.Frameworks.withPlanner(Frameworks.java:135)
>  at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.logicalPlan(CalcitePlanner.java:1331)
>  at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:588)
>  at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:13222)
>  at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:481)
>  at 
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:358)
>  at 
> org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:187)
>  at 
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:358)
>  at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224)
>  at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:109)
>  at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:498)
>  at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:450)
>  at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:414)
>  at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:408)
>  at 
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126)
>  at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:234)
>  at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:259)
>  at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:203)
>  at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:129)
>  at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:430)
>  at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:358)
>  at 
> org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:760)
>  at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:730)
>  at 
> org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:115)
>  at org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:139)
>  at 
> org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver(TestMiniLlapLocalCliDriver.java:62)
>  {code}
> This doesn't affect collations, as we remove constants in 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortPullUpConstantsRule.HiveSortPullUpConstantsRuleBase#applyToFieldCollations
> {code:java}
> protected List<RelFieldCollation> applyToFieldCollations(
>     RelCollation relCollation, Mappings.TargetMapping mapping) {
>   List<RelFieldCollation> fieldCollations = new ArrayList<>();
>   for (RelFieldCollation fc : relCollation.getFieldCollations()) {
>     final int target = mapping.getTargetOpt(fc.getFieldIndex());
>     if (target < 0) {
>       // It is a constant, we can ignore it
>       continue;
>     }
>     fieldCollations.add(fc.withFieldIndex(target));
>   }
>   return fieldCollations;
> } {code}
> This can be fixed in 
> org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution#apply by not 
> putting nulls in newKeys.
> {code:java}
> public RelDistribution apply(TargetMapping mapping) {
>   if (keys.isEmpty()) {
>     return this;
>   }
>   List<Integer> newKeys = new ArrayList<>(keys.size());
>   if (Bug.CALCITE_4166_FIXED) {
>     throw new AssertionError("Remove logic in HiveRelDistribution when 
> [CALCITE-4166] "
>         + "has been fixed and use newKeys.add(mapping.getTargetOpt(key)); 
> instead.");
>   }
>   Map<Integer, Integer> tmp = new HashMap<>(mapping.getSourceCount());
>   for (IntPair aMapping : mapping) {
>     tmp.put(aMapping.source, aMapping.target);
>   }
>   for (Integer key : keys) {
>     newKeys.add(tmp.get(key));        <<<<<<<<---------------
>   }
>   return new HiveRelDistribution(type, newKeys);
> } {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to