[
https://issues.apache.org/jira/browse/HIVE-26715?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
xorsum updated HIVE-26715:
--------------------------
Summary: CBO failed with JDBCJoinPushDown because of wrong
HiveJdbcConverter (was: CBO failed with JDBCJoinPushDown because wrong
HiveJdbcConverter)
> CBO failed with JDBCJoinPushDown because of wrong HiveJdbcConverter
> -------------------------------------------------------------------
>
> Key: HIVE-26715
> URL: https://issues.apache.org/jira/browse/HIVE-26715
> Project: Hive
> Issue Type: Bug
> Components: CBO
> Affects Versions: 3.0.0, 3.1.3
> Reporter: xorsum
> Priority: Major
>
> h4. reproduction
>
> {code:java}
> create database tpch100m;
> use tpch100m;
> CREATE EXTERNAL TABLE region (
> r_regionkey int,
> r_name string,
> r_comment string
> ) STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' TBLPROPERTIES (
> "hive.sql.database.type" = "MYSQL",
> "hive.sql.jdbc.url" =
> "jdbc:mysql://127.0.0.1:3306/tpch100m?serverTimezone=UTC&useSSL=false",
> "hive.sql.jdbc.driver" = "com.mysql.jdbc.Driver",
> "hive.sql.dbcp.username" = "root",
> "hive.sql.dbcp.password" = "123456",
> "hive.sql.table" = "region",
> "hive.sql.dbcp.maxActive" = "1"
> );
> CREATE EXTERNAL TABLE nation (
> n_nationkey int,
> n_name string,
> n_regionkey int,
> n_comment string
> ) STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' TBLPROPERTIES (
> "hive.sql.database.type" = "MYSQL",
> "hive.sql.jdbc.url" =
> "jdbc:mysql://127.0.0.1:3306/tpch100m?serverTimezone=UTC&useSSL=false",
> "hive.sql.jdbc.driver" = "com.mysql.jdbc.Driver",
> "hive.sql.dbcp.username" = "root",
> "hive.sql.dbcp.password" = "123456",
> "hive.sql.table" = "nation",
> "hive.sql.dbcp.maxActive" = "1"
> );
> select * from nation,region where n_regionkey = r_regionkey; {code}
> h4. log
> {code:java}
> 2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main]
> translator.ASTBuilder: The HiveJdbcConverter generated sql message is:
> SELECT *
> FROM (SELECT *
> FROM `nation`
> WHERE `n_regionkey` IS NOT NULL) AS `t`
> INNER JOIN (SELECT *
> FROM `region`
> WHERE `r_regionkey` IS NOT NULL) AS `t0` ON `t`.`n_regionkey` =
> `t0`.`r_regionkey`
> 2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main]
> parse.CalcitePlanner: Get metadata for source tables
> 2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main]
> metastore.HiveMetaStore: 0: get_table : tbl=hive.tpch100m.nation
> 2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main]
> HiveMetaStore.audit: ugi=hbk ip=unknown-ip-addr cmd=get_table :
> tbl=hive.tpch100m.nation
> 2022-11-09T16:00:35,194 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main]
> parse.CalcitePlanner: Get metadata for subqueries
> 2022-11-09T16:00:35,194 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main]
> parse.CalcitePlanner: Get metadata for destination tables
> 2022-11-09T16:00:35,202 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main]
> ql.Context: New scratch dir is
> hdfs://0.0.0.0:9000/tmp/hive/hbk/04ebb58a-de45-4f8c-9742-06cac555223b/hive_2022-11-09_16-00-34_987_5570790922374716113-1
> 2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main]
> jdbc.JdbcSerDe: Caught exception while initializing the SqlSerDe
> org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table
> definition has 4 columns
> at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:79)
> ~[hive-jdbc-handler-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.serde2.AbstractSerDe.initialize(AbstractSerDe.java:54)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:540)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:90)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:77)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:289)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:271)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genTablePlan(SemanticAnalyzer.java:10973)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11329)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11288)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:517)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12164)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:330)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:659)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1826)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1773)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1768)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:214)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:239)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:188)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:402)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> ~[?:1.8.0_342]
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> ~[?:1.8.0_342]
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> ~[?:1.8.0_342]
> at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_342]
> at org.apache.hadoop.util.RunJar.run(RunJar.java:244)
> ~[hadoop-common-2.9.2.jar:?]
> at org.apache.hadoop.util.RunJar.main(RunJar.java:158)
> ~[hadoop-common-2.9.2.jar:?]
> 2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main]
> hive.log: error in initSerDe: org.apache.hadoop.hive.serde2.SerDeException
> org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table
> definition has 4 columns
> org.apache.hadoop.hive.serde2.SerDeException:
> org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table
> definition has 4 columns
> at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:104)
> ~[hive-jdbc-handler-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.serde2.AbstractSerDe.initialize(AbstractSerDe.java:54)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:540)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:90)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:77)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:289)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:271)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genTablePlan(SemanticAnalyzer.java:10973)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11329)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11288)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:517)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12164)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:330)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:659)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1826)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1773)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1768)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:214)
> ~[hive-exec-3.1.3.jar:3.1.3]
> at
> org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:239)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:188)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:402)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683)
> ~[hive-cli-3.1.3.jar:3.1.3]
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> ~[?:1.8.0_342]
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> ~[?:1.8.0_342]
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> ~[?:1.8.0_342]
> at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_342]
> at org.apache.hadoop.util.RunJar.run(RunJar.java:244)
> ~[hadoop-common-2.9.2.jar:?]
> at org.apache.hadoop.util.RunJar.main(RunJar.java:158)
> ~[hadoop-common-2.9.2.jar:?]
> Caused by: org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns.
> Table definition has 4 columns
> at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:79)
> ~[hive-jdbc-handler-3.1.3.jar:3.1.3]
> ... 31 more
> 2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main]
> parse.CalcitePlanner: CBO failed, skipping CBO.
> {code}
> h4. reason
> 1. The *JDBCJoinPushDownRule* transforms
> *HiveJoin(HiveJdbcConverter(nation),HiveJdbcConverter(region))* to
> {*}HiveJdbcConverte(JdbcJoin(nation,region)){*}.
> 2. *HiveJdbcConverter.getTableScan()* returns the first
> {*}JdbcHiveTableScan{*}(nation), but does not return {*}JdbcJoin{*}(nation
> join region). It is the reason of this bug.
> 3. When initializing the {*}JdbcSerde{*}, the columns of (nation) and (nation
> join region) mismatch, so CBO failed.
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)