Rajkumar Singh created HIVE-24163:
-------------------------------------
Summary: Dynamic Partitioning Insert fail for MM table fail while
Move Operation
Key: HIVE-24163
URL: https://issues.apache.org/jira/browse/HIVE-24163
Project: Hive
Issue Type: Bug
Components: Hive
Reporter: Rajkumar Singh
Fix For: 3.1.2
-- create MM table
{code:java}
CREATE TABLE `part1`( |
| `id` double, |
| `n` double, |
| `name` varchar(8), |
| `sex` varchar(1)) |
| PARTITIONED BY ( |
| `weight` string, |
| `age` string, |
| `height` string) |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |
| WITH SERDEPROPERTIES ( |
| 'field.delim'='\u0001', |
| 'line.delim'='\n', |
| 'serialization.format'='\u0001') |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.mapred.TextInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION |
| 'hdfs://hostname:8020/warehouse/tablespace/managed/hive/part1' |
| TBLPROPERTIES ( |
| 'bucketing_version'='2', |
| 'transactional'='true', |
| 'transactional_properties'='insert_only', |
| 'transient_lastDdlTime'='1599053368')
{code}
-- create managed table
{code:java}
CREATE TABLE `class`( |
| `name` varchar(8), |
| `sex` varchar(1), |
| `age` double, |
| `height` double, |
| `weight` double) |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' |
| LOCATION |
| 'hdfs://hostname:8020/warehouse/tablespace/managed/hive/class' |
| TBLPROPERTIES ( |
| 'bucketing_version'='2', |
| 'transactional'='true', |
| 'transactional_properties'='default', |
| 'transient_lastDdlTime'='1599053345')
{code}
-- Run Insert query
{code:java}
INSERT INTO TABLE `part1` PARTITION (`Weight`,`Age`,`Height`) SELECT 0, 0,
`Name`,`Sex`,`Weight`,`Age`,`Height` FROM `class`;
{code}
it fail during the MoveTask execution:
{code:java}
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: partition
hdfs://hostname:8020/warehouse/tablespace/managed/hive/part1/.hive-staging_hive_2020-09-02_13-29-58_765_4475282758764123921-1/-ext-10000/tmpstats-0_FS_3
is not a directory!
at
org.apache.hadoop.hive.ql.metadata.Hive.getValidPartitionsInPath(Hive.java:2769)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at
org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:2837)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at
org.apache.hadoop.hive.ql.exec.MoveTask.handleDynParts(MoveTask.java:562)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:440)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:359)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:721)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:488)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:482)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:225)
~[hive-service-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
{code}
The reason is Task write the fsstat during the FileSinkOperator closing, HS2
ran the MoveTask to move data into the destination partition directory, while
getting the partition location hive check whether destination is directory or
not and failing.
-- hive set the stat location during
https://github.com/apache/hive/blob/d700ea54ec5da5364d92a9faaa58f89ea03181e0/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java#L8135
which is relative to the hive-staging directory:
https://github.com/apache/hive/blob/fecad5b0f72c535ed1c53f2cc62b0d6649b651ae/ql/src/java/org/apache/hadoop/hive/ql/Context.java#L617
--
This message was sent by Atlassian Jira
(v8.3.4#803005)