[
https://issues.apache.org/jira/browse/HIVE-24163?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17195741#comment-17195741
]
Rajkumar Singh commented on HIVE-24163:
---------------------------------------
this seems regression of https://issues.apache.org/jira/browse/HIVE-21164
> Dynamic Partitioning Insert fail for MM table fail while Move Operation
> -----------------------------------------------------------------------
>
> Key: HIVE-24163
> URL: https://issues.apache.org/jira/browse/HIVE-24163
> Project: Hive
> Issue Type: Bug
> Components: Hive
> Reporter: Rajkumar Singh
> Priority: Major
> Fix For: 3.1.2
>
>
> -- create MM table
> {code:java}
> CREATE TABLE `part1`( |
> | `id` double, |
> | `n` double, |
> | `name` varchar(8), |
> | `sex` varchar(1)) |
> | PARTITIONED BY ( |
> | `weight` string, |
> | `age` string, |
> | `height` string) |
> | ROW FORMAT SERDE |
> | 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |
> | WITH SERDEPROPERTIES ( |
> | 'field.delim'='\u0001', |
> | 'line.delim'='\n', |
> | 'serialization.format'='\u0001') |
> | STORED AS INPUTFORMAT |
> | 'org.apache.hadoop.mapred.TextInputFormat' |
> | OUTPUTFORMAT |
> | 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
> | LOCATION |
> | 'hdfs://hostname:8020/warehouse/tablespace/managed/hive/part1' |
> | TBLPROPERTIES ( |
> | 'bucketing_version'='2', |
> | 'transactional'='true', |
> | 'transactional_properties'='insert_only', |
> | 'transient_lastDdlTime'='1599053368')
> {code}
> -- create managed table
> {code:java}
> CREATE TABLE `class`( |
> | `name` varchar(8), |
> | `sex` varchar(1), |
> | `age` double, |
> | `height` double, |
> | `weight` double) |
> | ROW FORMAT SERDE |
> | 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' |
> | STORED AS INPUTFORMAT |
> | 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' |
> | OUTPUTFORMAT |
> | 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' |
> | LOCATION |
> | 'hdfs://hostname:8020/warehouse/tablespace/managed/hive/class' |
> | TBLPROPERTIES ( |
> | 'bucketing_version'='2', |
> | 'transactional'='true', |
> | 'transactional_properties'='default', |
> | 'transient_lastDdlTime'='1599053345')
> {code}
> -- Run Insert query
> {code:java}
> INSERT INTO TABLE `part1` PARTITION (`Weight`,`Age`,`Height`) SELECT 0, 0,
> `Name`,`Sex`,`Weight`,`Age`,`Height` FROM `class`;
> {code}
> it fail during the MoveTask execution:
> {code:java}
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: partition
> hdfs://hostname:8020/warehouse/tablespace/managed/hive/part1/.hive-staging_hive_2020-09-02_13-29-58_765_4475282758764123921-1/-ext-10000/tmpstats-0_FS_3
> is not a directory!
> at
> org.apache.hadoop.hive.ql.metadata.Hive.getValidPartitionsInPath(Hive.java:2769)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at
> org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:2837)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at
> org.apache.hadoop.hive.ql.exec.MoveTask.handleDynParts(MoveTask.java:562)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:440)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:359)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:721)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:488)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:482)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> at
> org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:225)
> ~[hive-service-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> {code}
> The reason is Task write the fsstat during the FileSinkOperator closing, HS2
> ran the MoveTask to move data into the destination partition directory, while
> getting the partition location hive check whether destination is directory or
> not and failing.
> -- hive set the stat location during
> https://github.com/apache/hive/blob/d700ea54ec5da5364d92a9faaa58f89ea03181e0/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java#L8135
> which is relative to the hive-staging directory:
> https://github.com/apache/hive/blob/fecad5b0f72c535ed1c53f2cc62b0d6649b651ae/ql/src/java/org/apache/hadoop/hive/ql/Context.java#L617
--
This message was sent by Atlassian Jira
(v8.3.4#803005)