[
https://issues.apache.org/jira/browse/HUDI-8613?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Y Ethan Guo reassigned HUDI-8613:
---------------------------------
Assignee: Y Ethan Guo
> Incremental query failed after MDT is enabled by default on the reader
> ----------------------------------------------------------------------
>
> Key: HUDI-8613
> URL: https://issues.apache.org/jira/browse/HUDI-8613
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Y Ethan Guo
> Assignee: Y Ethan Guo
> Priority: Blocker
> Fix For: 1.0.0
>
>
> {code:java}
> scala> actual.count
> 24/11/30 00:32:44 WARN HoodieFileIndex: Data skipping requires Metadata Table
> and at least one of the indices to be enabled! (isMetadataTableEnabled =
> false, isColumnStatsIndexEnabled = false, isRecordIndexApplicable = false,
> isExpressionIndexEnabled = false, isBucketIndexEnable = false,
> isPartitionStatsIndexEnabled = false), isBloomFiltersIndexEnabled = false)
> res3: Long = 4269100
>
> scala> val actual =
> spark.read.format("hudi").option("hoodie.datasource.query.type",
> "incremental").option("hoodie.datasource.read.begin.instanttime",
> "20241118011455593").option("hoodie.datasource.read.end.instanttime",
> "20241118014520931").option("hoodie.datasource.read.incr.fallback.fulltablescan.enable",
> "true").load(basePath).select("key", "partition", "ts", "textField",
> "decimalField", "longField", "round")
> actual: org.apache.spark.sql.DataFrame = [key: string, partition: string ...
> 5 more fields]
> scala> actual.count
> org.apache.hudi.exception.HoodieMetadataException: Failed to retrieve list of
> partition from metadata
> at
> org.apache.hudi.metadata.BaseTableMetadata.getAllPartitionPaths(BaseTableMetadata.java:126)
> at
> org.apache.hudi.metadata.HoodieBackedTableMetadata.getPartitionPathWithPathPrefixes(HoodieBackedTableMetadata.java:187)
> at
> org.apache.hudi.BaseHoodieTableFileIndex.listPartitionPaths(BaseHoodieTableFileIndex.java:323)
> at
> org.apache.hudi.BaseHoodieTableFileIndex.getAllQueryPartitionPaths(BaseHoodieTableFileIndex.java:219)
> at
> org.apache.hudi.SparkHoodieTableFileIndex.listMatchingPartitionPaths(SparkHoodieTableFileIndex.scala:228)
> at
> org.apache.hudi.HoodieFileIndex.prunePartitionsAndGetFileSlices(HoodieFileIndex.scala:336)
> at
> org.apache.hudi.HoodieFileIndex.filterFileSlices(HoodieFileIndex.scala:230)
> at
> org.apache.spark.sql.hudi.analysis.HoodiePruneFileSourcePartitions$$anonfun$apply$1.applyOrElse(HoodiePruneFileSourcePartitions.scala:55)
> at
> org.apache.spark.sql.hudi.analysis.HoodiePruneFileSourcePartitions$$anonfun$apply$1.applyOrElse(HoodiePruneFileSourcePartitions.scala:43)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
> at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
> at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$3(TreeNode.scala:466)
> at
> org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1216)
> at
> org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1215)
> at
> org.apache.spark.sql.catalyst.plans.logical.Aggregate.mapChildren(basicLogicalOperators.scala:1151)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:466)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
> at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)
> at
> org.apache.spark.sql.hudi.analysis.HoodiePruneFileSourcePartitions.apply(HoodiePruneFileSourcePartitions.scala:43)
> at
> org.apache.spark.sql.hudi.analysis.HoodiePruneFileSourcePartitions.apply(HoodiePruneFileSourcePartitions.scala:41)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)
> at
> scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
> at
> scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
> at scala.collection.immutable.List.foldLeft(List.scala:91)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)
> at scala.collection.immutable.List.foreach(List.scala:431)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)
> at
> org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)
> at
> org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$1(QueryExecution.scala:152)
> at
> org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)
> at
> org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)
> at
> org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)
> at
> org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
> at
> org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)
> at
> org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:148)
> at
> org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:144)
> at
> org.apache.spark.sql.execution.QueryExecution.assertOptimized(QueryExecution.scala:162)
> at
> org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:182)
> at
> org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:179)
> at
> org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:238)
> at
> org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:284)
> at
> org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:252)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:117)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4321)
> at org.apache.spark.sql.Dataset.count(Dataset.scala:3615)
> ... 47 elided
> Caused by: java.lang.IllegalStateException: Number of file slices for
> partition files should be > 0
> at
> org.apache.hudi.common.util.ValidationUtils.checkState(ValidationUtils.java:76)
> at
> org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordsByKeys(HoodieBackedTableMetadata.java:265)
> at
> org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordByKey(HoodieBackedTableMetadata.java:160)
> at
> org.apache.hudi.metadata.BaseTableMetadata.fetchAllPartitionPaths(BaseTableMetadata.java:354)
> at
> org.apache.hudi.metadata.BaseTableMetadata.getAllPartitionPaths(BaseTableMetadata.java:124)
> ... 109 more {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)