[ https://issues.apache.org/jira/browse/FLINK-25012?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17448402#comment-17448402 ]
xiangqiao commented on FLINK-25012: ----------------------------------- Thank you [~luoyuxia] , i have added `hive.strict.checks.type.safety = false` in hive-site.xml ,the unit test can be run success. >From the abstract syntax tree, the different column types(string and bigint) >will be casted to double. How the implicit type coercion done? Will it cause >data correctness problems? {code:java} == Abstract Syntax Tree == LogicalSink(table=[test-catalog.db1.dest], fields=[key, val]) +- LogicalProject(key=[$0], val=[$1]) +- LogicalProject(key=[$0], val=[$3]) +- LogicalJoin(condition=[=(CAST($0):DOUBLE, CAST($2):DOUBLE)], joinType=[left]) :- LogicalTableScan(table=[[test-catalog, db1, src2]]) +- LogicalTableScan(table=[[test-catalog, db1, src1]]) {code} > Cannot join hive tables with different column types > --------------------------------------------------- > > Key: FLINK-25012 > URL: https://issues.apache.org/jira/browse/FLINK-25012 > Project: Flink > Issue Type: Improvement > Components: Connectors / Hive > Affects Versions: 1.13.0, 1.14.0 > Reporter: xiangqiao > Priority: Major > > When using the flick batch mode and join hive table, we will get the > following exception (this usage is no problem in spark) > {code:java} > java.lang.RuntimeException: > org.apache.hadoop.hive.ql.parse.SemanticException: Line 6:10 Wrong arguments > 'key': Unsafe compares between different types are disabled for safety > reasons. If you know what you are doing, please > sethive.strict.checks.type.safety to false and that hive.mapred.mode is not > set to 'strict' to proceed. Note that if you may get errors or incorrect > results if you make a mistake while using some of the unsafe features. at > org.apache.flink.table.planner.delegation.hive.HiveParserCalcitePlanner.logicalPlan(HiveParserCalcitePlanner.java:305) > at > org.apache.flink.table.planner.delegation.hive.HiveParserCalcitePlanner.genLogicalPlan(HiveParserCalcitePlanner.java:273) > at > org.apache.flink.table.planner.delegation.hive.HiveParser.analyzeSql(HiveParser.java:326) > at > org.apache.flink.table.planner.delegation.hive.HiveParser.processCmd(HiveParser.java:274) > at > org.apache.flink.table.planner.delegation.hive.HiveParser.parse(HiveParser.java:217) > at > org.apache.flink.table.api.internal.TableEnvironmentImpl.executeSql(TableEnvironmentImpl.java:723) > at > org.apache.flink.connectors.hive.TableEnvHiveConnectorITCase.testJoinWithDifferentColumnType(TableEnvHiveConnectorITCase.java:136) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288) > at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48) > at org.junit.rules.RunRules.evaluate(RunRules.java:20) > at org.junit.runners.ParentRunner.run(ParentRunner.java:363) > at org.junit.runner.JUnitCore.run(JUnitCore.java:137) > at > com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:69) > at > com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:33) > at > com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:220) > at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:53) > Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: Line 6:10 Wrong > arguments 'key': Unsafe compares between different types are disabled for > safety reasons. If you know what you are doing, please > sethive.strict.checks.type.safety to false and that hive.mapred.mode is not > set to 'strict' to proceed. Note that if you may get errors or incorrect > results if you make a mistake while using some of the unsafe features. > at > org.apache.flink.table.planner.delegation.hive.HiveParserTypeCheckProcFactory$DefaultExprProcessor.process(HiveParserTypeCheckProcFactory.java:1561) > at > org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90) > at > org.apache.flink.table.planner.delegation.hive.copy.HiveParserDefaultGraphWalker.dispatchAndReturn(HiveParserDefaultGraphWalker.java:75) > at > org.apache.flink.table.planner.delegation.hive.copy.HiveParserDefaultGraphWalker.dispatch(HiveParserDefaultGraphWalker.java:61) > at > org.apache.flink.table.planner.delegation.hive.copy.HiveParserExpressionWalker.walk(HiveParserExpressionWalker.java:63) > at > org.apache.flink.table.planner.delegation.hive.copy.HiveParserDefaultGraphWalker.startWalking(HiveParserDefaultGraphWalker.java:86) > at > org.apache.flink.table.planner.delegation.hive.HiveParserTypeCheckProcFactory.genExprNode(HiveParserTypeCheckProcFactory.java:289) > at > org.apache.flink.table.planner.delegation.hive.HiveParserUtils.genExprNode(HiveParserUtils.java:392) > at > org.apache.flink.table.planner.delegation.hive.HiveParserCalcitePlanner.genJoinRelNode(HiveParserCalcitePlanner.java:520) > at > org.apache.flink.table.planner.delegation.hive.HiveParserCalcitePlanner.genJoinLogicalPlan(HiveParserCalcitePlanner.java:775) > at > org.apache.flink.table.planner.delegation.hive.HiveParserCalcitePlanner.genLogicalPlan(HiveParserCalcitePlanner.java:2752) > at > org.apache.flink.table.planner.delegation.hive.HiveParserCalcitePlanner.genLogicalPlan(HiveParserCalcitePlanner.java:2670) > at > org.apache.flink.table.planner.delegation.hive.HiveParserCalcitePlanner.genLogicalPlan(HiveParserCalcitePlanner.java:2711) > at > org.apache.flink.table.planner.delegation.hive.HiveParserCalcitePlanner.logicalPlan(HiveParserCalcitePlanner.java:285) > ... 31 more > Caused by: org.apache.hadoop.hive.ql.exec.UDFArgumentException: Unsafe > compares between different types are disabled for safety reasons. If you know > what you are doing, please sethive.strict.checks.type.safety to false and > that hive.mapred.mode is not set to 'strict' to proceed. Note that if you may > get errors or incorrect results if you make a mistake while using some of the > unsafe features. > at > org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc.newInstance(ExprNodeGenericFuncDesc.java:226) > at > org.apache.flink.table.planner.delegation.hive.HiveParserTypeCheckProcFactory$DefaultExprProcessor.getXpathOrFuncExprNodeDesc(HiveParserTypeCheckProcFactory.java:1228) > at > org.apache.flink.table.planner.delegation.hive.HiveParserTypeCheckProcFactory$DefaultExprProcessor.process(HiveParserTypeCheckProcFactory.java:1546) > ... 44 more {code} > > This problem can be reproduced by adding a unit test > *TableEnvHiveConnectorITCase#testJoinWithDifferentColumnType:* > {code:java} > @Test > public void testJoinWithDifferentColumnType() throws Exception { > TableEnvironment tableEnv = getTableEnvWithHiveCatalog(); > tableEnv.executeSql("create database db1"); > try { > tableEnv.useDatabase("db1"); > tableEnv.executeSql("create table src1(key bigint, val string)"); > tableEnv.executeSql("create table src2(key string, val string)"); > tableEnv.executeSql("create table dest(key string, val string)"); > HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "src1") > .addRow(new Object[] {"1", "val1"}) > .addRow(new Object[] {"2", "val2"}) > .addRow(new Object[] {"3", "val3"}) > .commit(); > HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "src2") > .addRow(new Object[] {"3", "val4"}) > .addRow(new Object[] {"4", "val4"}) > .commit(); > tableEnv.executeSql( > "INSERT OVERWRITE TABLE dest\n" > + "SELECT j.*\n" > + "FROM (SELECT t1.key, p1.val\n" > + " FROM src2 t1\n" > + " LEFT OUTER JOIN src1 p1\n" > + " ON (t1.key = p1.key)\n" > + ") j") > .await(); > List<Row> results = > CollectionUtil.iteratorToList( > tableEnv.executeSql("select * from dest order by > key").collect()); > assertEquals( > "[+I[3, val3], +I[4, null]]", results.toString()); > } finally { > tableEnv.useDatabase("default"); > tableEnv.executeSql("drop database db1 cascade"); > } > } {code} -- This message was sent by Atlassian Jira (v8.20.1#820001)