jonashartwig opened a new issue #2661:
URL: https://github.com/apache/hudi/issues/2661
Steps to reproduce the behavior:
1. start spark shell `spark-shell \
--packages org.apache.hudi:hudi-spark-bundle_2.11:0.7.0 \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'`
2. run spark code and create data:
```scala
import org.apache.spark.sql.Row
val rdd = spark.sparkContext.parallelize(List(Row(1, "a")))
import org.apache.spark.sql.types._
val df = spark.createDataFrame(rdd, StructType(List(StructField("int",
IntegerType), StructField("string", StringType))))
import org.apache.hudi.QuickstartUtils._
import scala.collection.JavaConversions._
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._
df.write.format("hudi").partitionBy("int").mode(Overwrite).option(TABLE_NAME,
"hudi").option(PRECOMBINE_FIELD_OPT_KEY, "int").option(RECORDKEY_FIELD_OPT_KEY,
"int").option(PARTITIONPATH_FIELD_OPT_KEY,
"int").save("/data/test/swe/base/hudi/hudi")
```
3. create hive table
```sql
CREATE EXTERNAL TABLE test_swe_base.t_hudi_hudi (`string` STRING)
PARTITIONED BY (`int` int)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT 'org.apache.hudi.hadoop.HoodieParquetInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION '/data/test/swe/base/hudi/hudi';
```
4. recover partitions:
```sql
msck repair table test_swe_base.t_hudi_hudi;
```
**Expected behavior**
repair table command succeeds and the table can be queried foe 1 row
**Environment Description**
* Hudi version : 0.7.0
* Spark version : 2.4.5
* Hive version : 3
* Hadoop version : 3
* Storage (HDFS/S3/GCS..) : HDFS
* Running on Docker? (yes/no) :no
**Stacktrace**
2021-03-11 10:44:36,243 ERROR
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker:
[HiveServer2-Background-Pool: Thread-34494]:
org.apache.hadoop.hive.metastore.utils.MetastoreException: Invalid partition
name
hdfs://dl300cdppoc02.ddc.teliasonera.net:8020/data/test/swe/base/hudi/hudi/1
2021-03-11 10:44:36,244 WARN org.apache.hadoop.hive.metastore.Msck:
[HiveServer2-Background-Pool: Thread-34494]: Failed to run metacheck:
org.apache.hadoop.hive.metastore.utils.MetastoreException:
org.apache.hadoop.hive.metastore.utils.MetastoreException: Invalid partition
name
hdfs://dl300cdppoc02.ddc.teliasonera.net:8020/data/test/swe/base/hudi/hudi/1
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkPartitionDirs(HiveMetaStoreChecker.java:568)
~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkPartitionDirs(HiveMetaStoreChecker.java:447)
~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.findUnknownPartitions(HiveMetaStoreChecker.java:380)
~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:353)
~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:273)
~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkMetastore(HiveMetaStoreChecker.java:139)
~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.metastore.Msck.repair(Msck.java:121)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hadoop.hive.ql.ddl.misc.msck.MsckOperation.execute(MsckOperation.java:74)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:80)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:740)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:495)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:489)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:225)
[hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
]
at
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:87)
[hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:322)
[hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at java.security.AccessController.doPrivileged(Native Method)
[?:1.8.0_232]
at javax.security.auth.Subject.doAs(Subject.java:422) [?:1.8.0_232]
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
[hadoop-common-3.1.1.7.1.4.0-203.jar:?]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:340)
[hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
[?:1.8.0_232]
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
[?:1.8.0_232]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
[?:1.8.0_232]
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
[?:1.8.0_232]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[?:1.8.0_232]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[?:1.8.0_232]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_232]
Caused by: org.apache.hadoop.hive.metastore.utils.MetastoreException:
Invalid partition name
hdfs://dl300cdppoc02.ddc.teliasonera.net:8020/data/test/swe/base/hudi/hudi/1
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.logOrThrowExceptionWithMsg(HiveMetaStoreChecker.java:519)
~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.processPathDepthInfo(HiveMetaStoreChecker.java:500)
~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:470)
~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:452)
~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
... 4 more
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]