[
https://issues.apache.org/jira/browse/HIVE-26628?focusedWorklogId=824101&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-824101
]
ASF GitHub Bot logged work on HIVE-26628:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 08/Nov/22 05:10
Start Date: 08/Nov/22 05:10
Worklog Time Spent: 10m
Work Description: kasakrisz commented on code in PR #3724:
URL: https://github.com/apache/hive/pull/3724#discussion_r1016148487
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergQueryLifeTimeHook.java:
##########
@@ -58,6 +69,11 @@ public void afterExecution(QueryLifeTimeHookContext ctx,
boolean hasError) {
}
private void checkAndRollbackIcebergCTAS(QueryLifeTimeHookContext ctx) {
+ if (!(HiveOperation.CREATETABLE_AS_SELECT.getOperationName().equals(
Review Comment:
Yes, that is the case.
Unfortunately the table is created without this patch too which is a side
affect of executing an explain statement.
The create is called from `HiveIcebergSerDe` init and this happening at
compile time during FileSinkDesc generation.
https://github.com/apache/hive/blob/63b6134b97036d6d31924ad0ec323fca2016dace/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java#L129
Later the Iceberg `Table` object is also required when it's properties are
added to the job configs when augmenting the FileSinkOperator plan
https://github.com/apache/hive/blob/63b6134b97036d6d31924ad0ec323fca2016dace/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java#L1611
https://github.com/apache/hive/blob/63b6134b97036d6d31924ad0ec323fca2016dace/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java#L796
The goal of this patch is to clean up this table after executing the
statement.
Issue Time Tracking
-------------------
Worklog Id: (was: 824101)
Time Spent: 2h 40m (was: 2.5h)
> Iceberg table is created when running explain ctas command
> ----------------------------------------------------------
>
> Key: HIVE-26628
> URL: https://issues.apache.org/jira/browse/HIVE-26628
> Project: Hive
> Issue Type: Bug
> Components: StorageHandler
> Reporter: Krisztian Kasa
> Priority: Major
> Labels: pull-request-available
> Time Spent: 2h 40m
> Remaining Estimate: 0h
>
> {code}
> create table source(a int, b string, c int);
> explain
> create table tbl_ice stored by iceberg stored as orc tblproperties
> ('format-version'='2') as
> select a, b, c from source;
> create table tbl_ice stored by iceberg stored as orc tblproperties
> ('format-version'='2') as
> select a, b, c from source;
> {code}
> {code}
> org.apache.hadoop.hive.ql.parse.SemanticException:
> org.apache.hadoop.hive.ql.parse.SemanticException: Table already exists:
> default.tbl_ice
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:13963)
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genResolvedParseTree(SemanticAnalyzer.java:12528)
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12693)
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:460)
> at
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:317)
> at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224)
> at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:106)
> at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:522)
> at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:474)
> at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:439)
> at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:433)
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:121)
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:227)
> at
> org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:255)
> at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:200)
> at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:126)
> at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:421)
> at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:352)
> at
> org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:727)
> at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:697)
> at
> org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:114)
> at
> org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157)
> at
> org.apache.hadoop.hive.cli.TestIcebergLlapLocalCliDriver.testCliDriver(TestIcebergLlapLocalCliDriver.java:60)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59)
> at
> org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
> at
> org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56)
> at
> org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
> at
> org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:135)
> at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
> at
> org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100)
> at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366)
> at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103)
> at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63)
> at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
> at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
> at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
> at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
> at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
> at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
> at org.junit.runners.Suite.runChild(Suite.java:128)
> at org.junit.runners.Suite.runChild(Suite.java:27)
> at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
> at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
> at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
> at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
> at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
> at
> org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:95)
> at org.junit.rules.RunRules.evaluate(RunRules.java:20)
> at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
> at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
> at
> org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:377)
> at
> org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:138)
> at
> org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:465)
> at
> org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:451)
> Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: Table already
> exists: default.tbl_ice
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:13960)
> ... 61 more
> {code}
> The EXPLAIN ... command creates the Iceberg table default.tbl_ice hence the
> ctas command executed after it fails with table already exists.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)