[
https://issues.apache.org/jira/browse/HUDI-3643?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
董可伦 updated HUDI-3643:
----------------------
Description:
Spark SQL create non-partition hudi table:
```sql
create table test_hudi_table (
id int,
name string,
price double,
ts long,
dt string
) using hudi
options (
primaryKey = 'id',
preCombineField = 'ts',
type = 'cow'
)
location '/tmp/test_hudi_table';
```
hive tez count
```sql
select count(1) from test_hudi_table;
```
hen exception:
hudi 0.9.0
```java
ERROR : Status: Failed
ERROR : Vertex failed, vertexName=Map 1,
vertexId=vertex_1647336877182_0100_4_00, diagnostics=[Vertex
vertex_1647336877182_0100_4_00 [Map 1] killed/failed due
to:ROOT_INPUT_INIT_FAILURE, Vertex Input: test_hudi_table initializer failed,
vertex=vertex_1647336877182_0100_4_00 [Map 1], java.lang.NullPointerException
at
org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getTableMetaClientForBasePath(HoodieInputFormatUtils.java:327)
at
org.apache.hudi.hadoop.InputPathHandler.parseInputPaths(InputPathHandler.java:107)
at
org.apache.hudi.hadoop.InputPathHandler.<init>(InputPathHandler.java:68)
at
org.apache.hudi.hadoop.HoodieParquetInputFormat.listStatus(HoodieParquetInputFormat.java:80)
at
org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:325)
at
org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:524)
at
org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:779)
at
org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator.initialize(HiveSplitGenerator.java:243)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:278)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:269)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:269)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:253)
at
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
at
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
at
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
```
hudi master also exception
```java
ERROR : Status: Failed
ERROR : Vertex failed, vertexName=Map 1,
vertexId=vertex_1647336877182_0106_1_00, diagnostics=[Vertex
vertex_1647336877182_0106_1_00 [Map 1] killed/failed due
to:ROOT_INPUT_INIT_FAILURE, Vertex Input: test_hudi_table initializer failed,
vertex=vertex_1647336877182_0106_1_00 [Map 1], java.lang.NullPointerException
at
org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getTableMetaClientForBasePathUnchecked(HoodieInputFormatUtils.java:335)
at
org.apache.hudi.hadoop.InputPathHandler.parseInputPaths(InputPathHandler.java:110)
at
org.apache.hudi.hadoop.InputPathHandler.<init>(InputPathHandler.java:72)
at
org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatus(HoodieCopyOnWriteTableInputFormat.java:109)
at
org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:325)
at
org.apache.hudi.hadoop.HoodieParquetInputFormatBase.getSplits(HoodieParquetInputFormatBase.java:68)
at
org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:524)
at
org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:779)
at
org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator.initialize(HiveSplitGenerator.java:243)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:278)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:269)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:269)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:253)
at
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
at
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
at
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
```
was:
## Spark SQL create non-partition hudi table:
```sql
create table test_hudi_table (
id int,
name string,
price double,
ts long,
dt string
) using hudi
options (
primaryKey = 'id',
preCombineField = 'ts',
type = 'cow'
)
location '/tmp/test_hudi_table';
```
## hive tez count
```sql
select count(1) from test_hudi_table;
```
## then exception:
### hudi 0.9.0
```java
ERROR : Status: Failed
ERROR : Vertex failed, vertexName=Map 1,
vertexId=vertex_1647336877182_0100_4_00, diagnostics=[Vertex
vertex_1647336877182_0100_4_00 [Map 1] killed/failed due
to:ROOT_INPUT_INIT_FAILURE, Vertex Input: test_hudi_table initializer failed,
vertex=vertex_1647336877182_0100_4_00 [Map 1], java.lang.NullPointerException
at
org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getTableMetaClientForBasePath(HoodieInputFormatUtils.java:327)
at
org.apache.hudi.hadoop.InputPathHandler.parseInputPaths(InputPathHandler.java:107)
at
org.apache.hudi.hadoop.InputPathHandler.<init>(InputPathHandler.java:68)
at
org.apache.hudi.hadoop.HoodieParquetInputFormat.listStatus(HoodieParquetInputFormat.java:80)
at
org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:325)
at
org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:524)
at
org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:779)
at
org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator.initialize(HiveSplitGenerator.java:243)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:278)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:269)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:269)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:253)
at
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
at
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
at
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
```
### hudi master also exception
```java
ERROR : Status: Failed
ERROR : Vertex failed, vertexName=Map 1,
vertexId=vertex_1647336877182_0106_1_00, diagnostics=[Vertex
vertex_1647336877182_0106_1_00 [Map 1] killed/failed due
to:ROOT_INPUT_INIT_FAILURE, Vertex Input: test_hudi_table initializer failed,
vertex=vertex_1647336877182_0106_1_00 [Map 1], java.lang.NullPointerException
at
org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getTableMetaClientForBasePathUnchecked(HoodieInputFormatUtils.java:335)
at
org.apache.hudi.hadoop.InputPathHandler.parseInputPaths(InputPathHandler.java:110)
at
org.apache.hudi.hadoop.InputPathHandler.<init>(InputPathHandler.java:72)
at
org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatus(HoodieCopyOnWriteTableInputFormat.java:109)
at
org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:325)
at
org.apache.hudi.hadoop.HoodieParquetInputFormatBase.getSplits(HoodieParquetInputFormatBase.java:68)
at
org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:524)
at
org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:779)
at
org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator.initialize(HiveSplitGenerator.java:243)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:278)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:269)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:269)
at
org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:253)
at
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
at
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
at
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
```
> Hive count throws exception when the table is empty and the path depth is
> less than 3
> -------------------------------------------------------------------------------------
>
> Key: HUDI-3643
> URL: https://issues.apache.org/jira/browse/HUDI-3643
> Project: Apache Hudi
> Issue Type: Bug
> Components: hive
> Affects Versions: 0.9.0
> Reporter: 董可伦
> Assignee: 董可伦
> Priority: Major
> Fix For: 0.11.0
>
>
> Spark SQL create non-partition hudi table:
> ```sql
> create table test_hudi_table (
> id int,
> name string,
> price double,
> ts long,
> dt string
> ) using hudi
> options (
> primaryKey = 'id',
> preCombineField = 'ts',
> type = 'cow'
> )
> location '/tmp/test_hudi_table';
> ```
> hive tez count
> ```sql
> select count(1) from test_hudi_table;
> ```
> hen exception:
> hudi 0.9.0
> ```java
> ERROR : Status: Failed
> ERROR : Vertex failed, vertexName=Map 1,
> vertexId=vertex_1647336877182_0100_4_00, diagnostics=[Vertex
> vertex_1647336877182_0100_4_00 [Map 1] killed/failed due
> to:ROOT_INPUT_INIT_FAILURE, Vertex Input: test_hudi_table initializer failed,
> vertex=vertex_1647336877182_0100_4_00 [Map 1], java.lang.NullPointerException
> at
> org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getTableMetaClientForBasePath(HoodieInputFormatUtils.java:327)
> at
> org.apache.hudi.hadoop.InputPathHandler.parseInputPaths(InputPathHandler.java:107)
> at
> org.apache.hudi.hadoop.InputPathHandler.<init>(InputPathHandler.java:68)
> at
> org.apache.hudi.hadoop.HoodieParquetInputFormat.listStatus(HoodieParquetInputFormat.java:80)
> at
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:325)
> at
> org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:524)
> at
> org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:779)
> at
> org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator.initialize(HiveSplitGenerator.java:243)
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:278)
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:269)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:269)
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:253)
> at
> com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
> at
> com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
> at
> com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> ```
> hudi master also exception
> ```java
> ERROR : Status: Failed
> ERROR : Vertex failed, vertexName=Map 1,
> vertexId=vertex_1647336877182_0106_1_00, diagnostics=[Vertex
> vertex_1647336877182_0106_1_00 [Map 1] killed/failed due
> to:ROOT_INPUT_INIT_FAILURE, Vertex Input: test_hudi_table initializer failed,
> vertex=vertex_1647336877182_0106_1_00 [Map 1], java.lang.NullPointerException
> at
> org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getTableMetaClientForBasePathUnchecked(HoodieInputFormatUtils.java:335)
> at
> org.apache.hudi.hadoop.InputPathHandler.parseInputPaths(InputPathHandler.java:110)
> at
> org.apache.hudi.hadoop.InputPathHandler.<init>(InputPathHandler.java:72)
> at
> org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatus(HoodieCopyOnWriteTableInputFormat.java:109)
> at
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:325)
> at
> org.apache.hudi.hadoop.HoodieParquetInputFormatBase.getSplits(HoodieParquetInputFormatBase.java:68)
> at
> org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:524)
> at
> org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:779)
> at
> org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator.initialize(HiveSplitGenerator.java:243)
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:278)
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:269)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:269)
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:253)
> at
> com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
> at
> com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
> at
> com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> ```
--
This message was sent by Atlassian Jira
(v8.20.1#820001)