[
https://issues.apache.org/jira/browse/HIVE-28582?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Stamatis Zampetakis updated HIVE-28582:
---------------------------------------
Attachment: hive_28582.q
> OOM when compiling query with many GROUP BY columns aliased multiple times
> --------------------------------------------------------------------------
>
> Key: HIVE-28582
> URL: https://issues.apache.org/jira/browse/HIVE-28582
> Project: Hive
> Issue Type: Bug
> Security Level: Public(Viewable by anyone)
> Components: CBO, HiveServer2
> Affects Versions: 4.0.1
> Reporter: Stamatis Zampetakis
> Assignee: Stamatis Zampetakis
> Priority: Major
> Attachments: hive_28582.q
>
>
> {code:sql}
> CREATE TABLE t (
> c1 string,
> c2 string,
> c3 string,
> c4 string,
> c5 string,
> c6 string,
> c7 string,
> c8 string,
> c9 string
> );
> EXPLAIN CBO
> SELECT a0
> FROM
> (SELECT c1 as a0,
> c1 as a1,
> c1 as a2,
> c2 as a3,
> c2 as a4,
> c2 as a5,
> c3 as a6,
> c3 as a7,
> c3 as a8,
> c4 as a9,
> c4 as a10,
> c4 as a11,
> c5 as a12,
> c5 as a13,
> c5 as a14,
> c6 as a15,
> c6 as a16,
> c6 as a17,
> c7 as a18,
> c7 as a19,
> c7 as a20,
> c8 as a21,
> c8 as a22,
> c8 as a23,
> c9 as a24,
> c9 as a25,
> c9 as a26
> FROM t GROUP BY c1,c2,c3,c4,c5,c6,c7,c8,c9) t1
> GROUP BY a0, a4
> {code}
> The query above spends a lot of time in compilation and eventually leads to
> an OutOfMemoryError causing Hiveserver2 to crash.
> Note that each column in the inner query appears in the GROUP BY clause and
> is aliased three times in the SELECT clause. The multiple aliases of the same
> column as well as the fact that the column appears in the GROUP BY are
> necessary conditions to trigger the problem.
> The stacktraces show that the query spends the entire time in field trimming
> (HiveRelFieldTrimmer) while trying to obtain metadata information about the
> unique keys.
> {noformat}
> "fdc1b4d2-11d6-4b5c-a665-c321ee5f7e22 main" #1 prio=5 os_prio=0
> tid=0x00007f2e2800b800 nid=0x132ff runnable [0x00007f2e2e7fb000]
> java.lang.Thread.State: RUNNABLE
> at
> org.apache.calcite.util.ImmutableBitSet$Builder.addAll(ImmutableBitSet.java:1086)
> at
> org.apache.calcite.util.ImmutableBitSet.union(ImmutableBitSet.java:691)
> at
> org.apache.calcite.rel.metadata.RelMdUniqueKeys$$Lambda$1124/1526659719.apply(Unknown
> Source)
> at
> org.apache.hive.com.google.common.collect.Iterators$5.transform(Iterators.java:757)
> at
> org.apache.hive.com.google.common.collect.TransformedIterator.next(TransformedIterator.java:48)
> at
> org.apache.hive.com.google.common.collect.ImmutableCollection$Builder.addAll(ImmutableCollection.java:418)
> at
> org.apache.hive.com.google.common.collect.ImmutableCollection$ArrayBasedBuilder.addAll(ImmutableCollection.java:502)
> at
> org.apache.hive.com.google.common.collect.ImmutableSet$Builder.addAll(ImmutableSet.java:520)
> at
> org.apache.calcite.rel.metadata.RelMdUniqueKeys.getProjectUniqueKeys(RelMdUniqueKeys.java:162)
> at
> org.apache.calcite.rel.metadata.RelMdUniqueKeys.getUniqueKeys(RelMdUniqueKeys.java:93)
> at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys_$(Unknown Source)
> at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys(Unknown Source)
> at
> org.apache.calcite.rel.metadata.RelMetadataQuery.getUniqueKeys(RelMetadataQuery.java:464)
> at
> org.apache.calcite.rel.metadata.RelMdUniqueKeys.getProjectUniqueKeys(RelMdUniqueKeys.java:136)
> at
> org.apache.calcite.rel.metadata.RelMdUniqueKeys.getUniqueKeys(RelMdUniqueKeys.java:93)
> at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys_$(Unknown Source)
> at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys(Unknown Source)
> at
> org.apache.calcite.rel.metadata.RelMetadataQuery.getUniqueKeys(RelMetadataQuery.java:464)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.generateNewGroupset(HiveRelFieldTrimmer.java:500)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(HiveRelFieldTrimmer.java:641)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VisitDispatcher$$Lambda$1113/231262971.apply(Unknown
> Source)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VarArgsFunc4.apply(HiveReflectUtil.java:322)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$HiveMethodDispatcher.invoke(HiveReflectUtil.java:221)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:286)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimChild(HiveRelFieldTrimmer.java:203)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.trimFields(RelFieldTrimmer.java:447)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(HiveRelFieldTrimmer.java:759)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VisitDispatcher$$Lambda$1112/565392473.apply(Unknown
> Source)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VarArgsFunc4.apply(HiveReflectUtil.java:322)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$HiveMethodDispatcher.invoke(HiveReflectUtil.java:221)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:286)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.trim(RelFieldTrimmer.java:170)
> at
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trim(HiveRelFieldTrimmer.java:164)
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1674)
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1583)
> at
> org.apache.calcite.tools.Frameworks.lambda$withPlanner$0(Frameworks.java:131)
> at
> org.apache.calcite.tools.Frameworks$$Lambda$716/1464860003.apply(Unknown
> Source)
> at
> org.apache.calcite.prepare.CalcitePrepareImpl.perform(CalcitePrepareImpl.java:914)
> at
> org.apache.calcite.tools.Frameworks.withPrepare(Frameworks.java:180)
> at
> org.apache.calcite.tools.Frameworks.withPlanner(Frameworks.java:126)
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.logicalPlan(CalcitePlanner.java:1335)
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:583)
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:13163)
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:476)
> at
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:332)
> at
> org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:180)
> at
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:332)
> at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224)
> at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:109)
> at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:499)
> {noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)