jonashartwig opened a new issue #2661:
URL: https://github.com/apache/hudi/issues/2661


   Steps to reproduce the behavior:
   
   1. start spark shell `spark-shell \
     --packages org.apache.hudi:hudi-spark-bundle_2.11:0.7.0 \
     --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'`
   2. run spark code and create data:
   ```scala
   import org.apache.spark.sql.Row
   val rdd = spark.sparkContext.parallelize(List(Row(1, "a")))
   import org.apache.spark.sql.types._
   val df = spark.createDataFrame(rdd, StructType(List(StructField("int", 
IntegerType), StructField("string", StringType))))
   
   import org.apache.hudi.QuickstartUtils._
   import scala.collection.JavaConversions._
   import org.apache.spark.sql.SaveMode._
   import org.apache.hudi.DataSourceReadOptions._
   import org.apache.hudi.DataSourceWriteOptions._
   import org.apache.hudi.config.HoodieWriteConfig._
   
df.write.format("hudi").partitionBy("int").mode(Overwrite).option(TABLE_NAME, 
"hudi").option(PRECOMBINE_FIELD_OPT_KEY, "int").option(RECORDKEY_FIELD_OPT_KEY, 
"int").option(PARTITIONPATH_FIELD_OPT_KEY, 
"int").save("/data/test/swe/base/hudi/hudi")
   
   ```
   3. create hive table
   ```sql
   CREATE EXTERNAL TABLE test_swe_base.t_hudi_hudi (`string` STRING)
   PARTITIONED BY (`int` int)
   ROW FORMAT SERDE 
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
   STORED AS INPUTFORMAT 'org.apache.hudi.hadoop.HoodieParquetInputFormat'
   OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
   LOCATION '/data/test/swe/base/hudi/hudi';
   ```
   4. recover partitions:
   ```sql
   msck repair table test_swe_base.t_hudi_hudi;
   ```
   
   **Expected behavior**
   repair table command succeeds and the table can be queried foe 1 row
   
   **Environment Description**
   
   * Hudi version : 0.7.0
   
   * Spark version : 2.4.5
   
   * Hive version : 3
   
   * Hadoop version : 3
   
   * Storage (HDFS/S3/GCS..) : HDFS
   
   * Running on Docker? (yes/no) :no
   **Stacktrace**
   2021-03-11 10:44:36,243 ERROR 
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker: 
[HiveServer2-Background-Pool: Thread-34494]: 
org.apache.hadoop.hive.metastore.utils.MetastoreException: Invalid partition 
name 
hdfs://dl300cdppoc02.ddc.teliasonera.net:8020/data/test/swe/base/hudi/hudi/1
   2021-03-11 10:44:36,244 WARN  org.apache.hadoop.hive.metastore.Msck: 
[HiveServer2-Background-Pool: Thread-34494]: Failed to run metacheck:
   org.apache.hadoop.hive.metastore.utils.MetastoreException: 
org.apache.hadoop.hive.metastore.utils.MetastoreException: Invalid partition 
name 
hdfs://dl300cdppoc02.ddc.teliasonera.net:8020/data/test/swe/base/hudi/hudi/1
           at 
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkPartitionDirs(HiveMetaStoreChecker.java:568)
 ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkPartitionDirs(HiveMetaStoreChecker.java:447)
 ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.findUnknownPartitions(HiveMetaStoreChecker.java:380)
 ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:353)
 ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:273)
 ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkMetastore(HiveMetaStoreChecker.java:139)
 ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at org.apache.hadoop.hive.metastore.Msck.repair(Msck.java:121) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hadoop.hive.ql.ddl.misc.msck.MsckOperation.execute(MsckOperation.java:74)
 [hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:80) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:740) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at org.apache.hadoop.hive.ql.Driver.run(Driver.java:495) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at org.apache.hadoop.hive.ql.Driver.run(Driver.java:489) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) 
[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:225)
 [hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
   ]
           at 
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:87)
 [hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:322)
 [hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at java.security.AccessController.doPrivileged(Native Method) 
[?:1.8.0_232]
           at javax.security.auth.Subject.doAs(Subject.java:422) [?:1.8.0_232]
           at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
 [hadoop-common-3.1.1.7.1.4.0-203.jar:?]
           at 
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:340)
 [hive-service-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
[?:1.8.0_232]
           at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
[?:1.8.0_232]
           at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
[?:1.8.0_232]
           at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
[?:1.8.0_232]
           at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
[?:1.8.0_232]
           at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
[?:1.8.0_232]
           at java.lang.Thread.run(Thread.java:748) [?:1.8.0_232]
   Caused by: org.apache.hadoop.hive.metastore.utils.MetastoreException: 
Invalid partition name 
hdfs://dl300cdppoc02.ddc.teliasonera.net:8020/data/test/swe/base/hudi/hudi/1
           at 
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.logOrThrowExceptionWithMsg(HiveMetaStoreChecker.java:519)
 ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.processPathDepthInfo(HiveMetaStoreChecker.java:500)
 ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:470)
 ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           at 
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:452)
 ~[hive-exec-3.1.3000.7.1.4.0-203.jar:3.1.3000.7.1.4.0-203]
           ... 4 more
   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to