Stamatis Zampetakis created HIVE-28582:
------------------------------------------

             Summary: OOM when compiling query with many GROUP BY columns 
aliased multiple times
                 Key: HIVE-28582
                 URL: https://issues.apache.org/jira/browse/HIVE-28582
             Project: Hive
          Issue Type: Bug
      Security Level: Public (Viewable by anyone)
          Components: CBO, HiveServer2
    Affects Versions: 4.0.1
            Reporter: Stamatis Zampetakis
            Assignee: Stamatis Zampetakis


{code:sql}
CREATE TABLE t (
    c1 string,
    c2 string,
    c3 string,
    c4 string,
    c5 string,
    c6 string,
    c7 string,
    c8 string,
    c9 string
);

EXPLAIN CBO
SELECT a0
FROM 
(SELECT c1 as a0,
        c1 as a1,
        c1 as a2,
        c2 as a3,
        c2 as a4,
        c2 as a5,
        c3 as a6,
        c3 as a7,
        c3 as a8,
        c4 as a9,
        c4 as a10,
        c4 as a11,
        c5 as a12,
        c5 as a13,
        c5 as a14,
        c6 as a15,
        c6 as a16,
        c6 as a17,
        c7 as a18,
        c7 as a19,
        c7 as a20,
        c8 as a21,
        c8 as a22,
        c8 as a23,
        c9 as a24,
        c9 as a25,
        c9 as a26
FROM t GROUP BY c1,c2,c3,c4,c5,c6,c7,c8,c9) t1
GROUP BY a0, a4
{code}
The query above spends a lot of time in compilation and eventually leads to an 
OutOfMemoryError causing Hiveserver2 to crash.

Note that each column in the inner query appears in the GROUP BY clause and is 
aliased three times in the SELECT clause. The multiple aliases of the same 
column as well as the fact that the column appears in the GROUP BY are 
necessary conditions to trigger the problem.

The stacktraces show that the query spends the entire time in field trimming 
(HiveRelFieldTrimmer) while trying to obtain metadata information about the 
unique keys.
{noformat}
"fdc1b4d2-11d6-4b5c-a665-c321ee5f7e22 main" #1 prio=5 os_prio=0 
tid=0x00007f2e2800b800 nid=0x132ff runnable [0x00007f2e2e7fb000]
   java.lang.Thread.State: RUNNABLE
        at 
org.apache.calcite.util.ImmutableBitSet$Builder.addAll(ImmutableBitSet.java:1086)
        at 
org.apache.calcite.util.ImmutableBitSet.union(ImmutableBitSet.java:691)
        at 
org.apache.calcite.rel.metadata.RelMdUniqueKeys$$Lambda$1124/1526659719.apply(Unknown
 Source)
        at 
org.apache.hive.com.google.common.collect.Iterators$5.transform(Iterators.java:757)
        at 
org.apache.hive.com.google.common.collect.TransformedIterator.next(TransformedIterator.java:48)
        at 
org.apache.hive.com.google.common.collect.ImmutableCollection$Builder.addAll(ImmutableCollection.java:418)
        at 
org.apache.hive.com.google.common.collect.ImmutableCollection$ArrayBasedBuilder.addAll(ImmutableCollection.java:502)
        at 
org.apache.hive.com.google.common.collect.ImmutableSet$Builder.addAll(ImmutableSet.java:520)
        at 
org.apache.calcite.rel.metadata.RelMdUniqueKeys.getProjectUniqueKeys(RelMdUniqueKeys.java:162)
        at 
org.apache.calcite.rel.metadata.RelMdUniqueKeys.getUniqueKeys(RelMdUniqueKeys.java:93)
        at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys_$(Unknown Source)
        at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys(Unknown Source)
        at 
org.apache.calcite.rel.metadata.RelMetadataQuery.getUniqueKeys(RelMetadataQuery.java:464)
        at 
org.apache.calcite.rel.metadata.RelMdUniqueKeys.getProjectUniqueKeys(RelMdUniqueKeys.java:136)
        at 
org.apache.calcite.rel.metadata.RelMdUniqueKeys.getUniqueKeys(RelMdUniqueKeys.java:93)
        at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys_$(Unknown Source)
        at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys(Unknown Source)
        at 
org.apache.calcite.rel.metadata.RelMetadataQuery.getUniqueKeys(RelMetadataQuery.java:464)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.generateNewGroupset(HiveRelFieldTrimmer.java:500)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(HiveRelFieldTrimmer.java:641)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VisitDispatcher$$Lambda$1113/231262971.apply(Unknown
 Source)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VarArgsFunc4.apply(HiveReflectUtil.java:322)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$HiveMethodDispatcher.invoke(HiveReflectUtil.java:221)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:286)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimChild(HiveRelFieldTrimmer.java:203)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.trimFields(RelFieldTrimmer.java:447)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(HiveRelFieldTrimmer.java:759)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VisitDispatcher$$Lambda$1112/565392473.apply(Unknown
 Source)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VarArgsFunc4.apply(HiveReflectUtil.java:322)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$HiveMethodDispatcher.invoke(HiveReflectUtil.java:221)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:286)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.trim(RelFieldTrimmer.java:170)
        at 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trim(HiveRelFieldTrimmer.java:164)
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1674)
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1583)
        at 
org.apache.calcite.tools.Frameworks.lambda$withPlanner$0(Frameworks.java:131)
        at 
org.apache.calcite.tools.Frameworks$$Lambda$716/1464860003.apply(Unknown Source)
        at 
org.apache.calcite.prepare.CalcitePrepareImpl.perform(CalcitePrepareImpl.java:914)
        at org.apache.calcite.tools.Frameworks.withPrepare(Frameworks.java:180)
        at org.apache.calcite.tools.Frameworks.withPlanner(Frameworks.java:126)
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.logicalPlan(CalcitePlanner.java:1335)
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:583)
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:13163)
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:476)
        at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:332)
        at 
org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:180)
        at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:332)
        at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224)
        at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:109)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:499)
{noformat}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to