[ 
https://issues.apache.org/jira/browse/HIVE-24163?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Rajkumar Singh updated HIVE-24163:
----------------------------------
    Description: 
-- DDLs and Query
{code:java}
create table `class` (name varchar(8), sex varchar(1), age double precision, 
height double precision, weight double precision);

insert into table class values ('RAJ','MALE',28,12,12);
CREATE TABLE `PART1` (`id` DOUBLE,`N` DOUBLE,`Name` VARCHAR(8),`Sex` 
VARCHAR(1)) PARTITIONED BY(Weight string, Age
string, Height string)  ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES 
TERMINATED BY '\012' STORED AS TEXTFILE;

INSERT INTO TABLE `part1` PARTITION (`Weight`,`Age`,`Height`)  SELECT 0, 0, 
`Name`,`Sex`,`Weight`,`Age`,`Height` FROM `class`;
{code}



it fail during the MoveTask execution:

{code:java}
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: partition 
hdfs://hostname:8020/warehouse/tablespace/managed/hive/part1/.hive-staging_hive_2020-09-02_13-29-58_765_4475282758764123921-1/-ext-10000/tmpstats-0_FS_3
 is not a directory!
        at 
org.apache.hadoop.hive.ql.metadata.Hive.getValidPartitionsInPath(Hive.java:2769)
 ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at 
org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:2837) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at 
org.apache.hadoop.hive.ql.exec.MoveTask.handleDynParts(MoveTask.java:562) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:440) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at 
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:359) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:721) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:488) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:482) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at 
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at 
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:225)
 ~[hive-service-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]

{code}

The reason is Task write the fsstat during the FileSinkOperator closing, HS2 
ran the MoveTask to move data into the destination partition directory, while 
getting the partition location hive check whether destination is directory or 
not and failing.

-- hive set the stat location during 
https://github.com/apache/hive/blob/d700ea54ec5da5364d92a9faaa58f89ea03181e0/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java#L8135

which is relative to the  hive-staging directory:

https://github.com/apache/hive/blob/fecad5b0f72c535ed1c53f2cc62b0d6649b651ae/ql/src/java/org/apache/hadoop/hive/ql/Context.java#L617






  was:
-- create MM table 
{code:java}
CREATE TABLE `part1`(                              |
|   `id` double,                                     |
|   `n` double,                                      |
|   `name` varchar(8),                               |
|   `sex` varchar(1))                                |
| PARTITIONED BY (                                   |
|   `weight` string,                                 |
|   `age` string,                                    |
|   `height` string)                                 |
| ROW FORMAT SERDE                                   |
|   'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'  |
| WITH SERDEPROPERTIES (                             |
|   'field.delim'='\u0001',                          |
|   'line.delim'='\n',                               |
|   'serialization.format'='\u0001')                 |
| STORED AS INPUTFORMAT                              |
|   'org.apache.hadoop.mapred.TextInputFormat'       |
| OUTPUTFORMAT                                       |
|   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION                                           |
|   'hdfs://hostname:8020/warehouse/tablespace/managed/hive/part1' |
| TBLPROPERTIES (                                    |
|   'bucketing_version'='2',                         |
|   'transactional'='true',                          |
|   'transactional_properties'='insert_only',        |
|   'transient_lastDdlTime'='1599053368')    
{code}

-- create managed table 

{code:java}
CREATE TABLE `class`(                              |
|   `name` varchar(8),                               |
|   `sex` varchar(1),                                |
|   `age` double,                                    |
|   `height` double,                                 |
|   `weight` double)                                 |
| ROW FORMAT SERDE                                   |
|   'org.apache.hadoop.hive.ql.io.orc.OrcSerde'      |
| STORED AS INPUTFORMAT                              |
|   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'  |
| OUTPUTFORMAT                                       |
|   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' |
| LOCATION                                           |
|   'hdfs://hostname:8020/warehouse/tablespace/managed/hive/class' |
| TBLPROPERTIES (                                    |
|   'bucketing_version'='2',                         |
|   'transactional'='true',                          |
|   'transactional_properties'='default',            |
|   'transient_lastDdlTime'='1599053345')  
{code}


-- Run Insert query

{code:java}
INSERT INTO TABLE `part1` PARTITION (`Weight`,`Age`,`Height`)  SELECT 0, 0, 
`Name`,`Sex`,`Weight`,`Age`,`Height` FROM `class`;
{code}

it fail during the MoveTask execution:

{code:java}
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: partition 
hdfs://hostname:8020/warehouse/tablespace/managed/hive/part1/.hive-staging_hive_2020-09-02_13-29-58_765_4475282758764123921-1/-ext-10000/tmpstats-0_FS_3
 is not a directory!
        at 
org.apache.hadoop.hive.ql.metadata.Hive.getValidPartitionsInPath(Hive.java:2769)
 ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at 
org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:2837) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at 
org.apache.hadoop.hive.ql.exec.MoveTask.handleDynParts(MoveTask.java:562) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:440) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at 
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:359) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:721) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:488) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:482) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at 
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) 
~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at 
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:225)
 ~[hive-service-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]

{code}

The reason is Task write the fsstat during the FileSinkOperator closing, HS2 
ran the MoveTask to move data into the destination partition directory, while 
getting the partition location hive check whether destination is directory or 
not and failing.

-- hive set the stat location during 
https://github.com/apache/hive/blob/d700ea54ec5da5364d92a9faaa58f89ea03181e0/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java#L8135

which is relative to the  hive-staging directory:

https://github.com/apache/hive/blob/fecad5b0f72c535ed1c53f2cc62b0d6649b651ae/ql/src/java/org/apache/hadoop/hive/ql/Context.java#L617







> Dynamic Partitioning Insert fail for MM table fail while Move Operation
> -----------------------------------------------------------------------
>
>                 Key: HIVE-24163
>                 URL: https://issues.apache.org/jira/browse/HIVE-24163
>             Project: Hive
>          Issue Type: Bug
>          Components: Hive
>            Reporter: Rajkumar Singh
>            Priority: Major
>             Fix For: 3.1.2
>
>
> -- DDLs and Query
> {code:java}
> create table `class` (name varchar(8), sex varchar(1), age double precision, 
> height double precision, weight double precision);
> insert into table class values ('RAJ','MALE',28,12,12);
> CREATE TABLE `PART1` (`id` DOUBLE,`N` DOUBLE,`Name` VARCHAR(8),`Sex` 
> VARCHAR(1)) PARTITIONED BY(Weight string, Age
> string, Height string)  ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' 
> LINES TERMINATED BY '\012' STORED AS TEXTFILE;
> INSERT INTO TABLE `part1` PARTITION (`Weight`,`Age`,`Height`)  SELECT 0, 0, 
> `Name`,`Sex`,`Weight`,`Age`,`Height` FROM `class`;
> {code}
> it fail during the MoveTask execution:
> {code:java}
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: partition 
> hdfs://hostname:8020/warehouse/tablespace/managed/hive/part1/.hive-staging_hive_2020-09-02_13-29-58_765_4475282758764123921-1/-ext-10000/tmpstats-0_FS_3
>  is not a directory!
>         at 
> org.apache.hadoop.hive.ql.metadata.Hive.getValidPartitionsInPath(Hive.java:2769)
>  ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at 
> org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:2837) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at 
> org.apache.hadoop.hive.ql.exec.MoveTask.handleDynParts(MoveTask.java:562) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:440) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at 
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:359) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:721) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at org.apache.hadoop.hive.ql.Driver.run(Driver.java:488) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at org.apache.hadoop.hive.ql.Driver.run(Driver.java:482) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at 
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) 
> ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
>         at 
> org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:225)
>  ~[hive-service-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
> {code}
> The reason is Task write the fsstat during the FileSinkOperator closing, HS2 
> ran the MoveTask to move data into the destination partition directory, while 
> getting the partition location hive check whether destination is directory or 
> not and failing.
> -- hive set the stat location during 
> https://github.com/apache/hive/blob/d700ea54ec5da5364d92a9faaa58f89ea03181e0/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java#L8135
> which is relative to the  hive-staging directory:
> https://github.com/apache/hive/blob/fecad5b0f72c535ed1c53f2cc62b0d6649b651ae/ql/src/java/org/apache/hadoop/hive/ql/Context.java#L617



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to