fhan688 commented on code in PR #12410:
URL: https://github.com/apache/hudi/pull/12410#discussion_r1868927763
##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowInvalidParquetProcedure.scala:
##########
@@ -52,11 +54,14 @@ class ShowInvalidParquetProcedure extends BaseProcedure
with ProcedureBuilder {
val srcPath = getArgValueOrDefault(args,
PARAMETERS(0)).get.asInstanceOf[String]
val limit = getArgValueOrDefault(args, PARAMETERS(1))
val needDelete = getArgValueOrDefault(args,
PARAMETERS(2)).get.asInstanceOf[Boolean]
+ val partitions = getArgValueOrDefault(args,
PARAMETERS(3)).map(_.toString).getOrElse("")
val storageConf =
HadoopFSUtils.getStorageConfWithCopy(jsc.hadoopConfiguration())
val storage = new HoodieHadoopStorage(srcPath, storageConf)
- val partitionPaths: java.util.List[String] = FSUtils.getAllPartitionPaths(
- new HoodieSparkEngineContext(jsc), storage, srcPath, false)
- val javaRdd: JavaRDD[String] = jsc.parallelize(partitionPaths,
partitionPaths.size())
+ val metadataConfig = HoodieMetadataConfig.newBuilder.enable(false).build
+ val metadata = HoodieTableMetadata.create(new
HoodieSparkEngineContext(jsc), storage, metadataConfig, srcPath)
+ val partitionPaths: java.util.List[String] =
metadata.getPartitionPathWithPathPrefixes(partitions.split(",").toList.asJava)
Review Comment:
`getPartitionPathWithPathPrefixes(partitions)` will return all
partitionPaths when `partitions` are empty.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]