Davis Zhang created HUDI-9802:
---------------------------------
Summary: DML bootstrap MDT hit issue
Key: HUDI-9802
URL: https://issues.apache.org/jira/browse/HUDI-9802
Project: Apache Hudi
Issue Type: Bug
Reporter: Davis Zhang
Fix For: 1.1.0
repro step: * brand new version 9 table with MDT off, load data
* then set MDT on, run dml with where filter on any columns
25/09/09 17:22:45 WARN HoodieTableFileSystemView: Partition: record_index is
not available in store java.lang.IllegalStateException: Partition record_index
should be part of inflight metadata partitions here [] at
org.apache.hudi.common.util.ValidationUtils.checkState(ValidationUtils.java:78)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.lambda$tagRecordsWithLocationForStreamingWrites$27(HoodieBackedTableMetadataWriter.java:1340)
at java.base/java.lang.Iterable.forEach(Iterable.java:75) at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.tagRecordsWithLocationForStreamingWrites(HoodieBackedTableMetadataWriter.java:1334)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.streamWriteToMetadataPartitions(HoodieBackedTableMetadataWriter.java:1240)
at
org.apache.hudi.client.StreamingMetadataWriteHandler.streamWriteToMetadataTable(StreamingMetadataWriteHandler.java:92)
at
org.apache.hudi.client.StreamingMetadataWriteHandler.streamWriteToMetadataTable(StreamingMetadataWriteHandler.java:57)
at
org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:111)
at
org.apache.hudi.HoodieSparkSqlWriterInternal.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:1000)
at
org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:545)
at
org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$write$1(HoodieSparkSqlWriter.scala:195)
at
org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:213)
at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:132)
at
org.apache.spark.sql.hudi.command.DeleteHoodieTableCommand.run(DeleteHoodieTableCommand.scala:52)
at
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
at
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
at
org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)
at
org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:220) at
org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100) at
org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at
org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) at
org.apache.spark.sql.SparkSession.$anonfun$sql$4(SparkSession.scala:691) at
org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at
org.apache.spark.sql.SparkSession.sql(SparkSession.scala:682) at
org.apache.spark.sql.SparkSession.sql(SparkSession.scala:713) at
org.apache.spark.sql.SparkSession.sql(SparkSession.scala:744) ... 47 elided
reproducible with hudi OSS git hash a749d6b9a557ebead5227ac9421513ac57afde57
--
This message was sent by Atlassian Jira
(v8.20.10#820010)