[
https://issues.apache.org/jira/browse/HUDI-9069?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17929300#comment-17929300
]
Y Ethan Guo commented on HUDI-9069:
-----------------------------------
We see that when metaserver is enabled, the write transaction started and
failed but the timeline is empty; yet there are data files written as part of
the inflight commit, which does not seem right. This needs more investigation.
{code:java}
> ls -ltr /tmp/hudi-bundles/tests/trips1/*/*/*
/tmp/hudi-bundles/tests/trips1/americas/united_states/san_francisco:
total 856
-rw-r--r-- 1 ethan wheel 438175 Feb 21 19:30
c3d7cc10-bf5c-4b81-b6d0-0ce44d23d12e-0_1-14-21_20250221193043106.parquet
/tmp/hudi-bundles/tests/trips1/americas/brazil/sao_paulo:
total 856
-rw-r--r-- 1 ethan wheel 437819 Feb 21 19:30
397e93b7-ecc5-498d-a4cd-b492821d524a-0_0-14-20_20250221193043106.parquet
/tmp/hudi-bundles/tests/trips1/asia/india/chennai:
total 856
-rw-r--r-- 1 ethan wheel 437609 Feb 21 19:30
a12e23c5-6be5-4540-8049-96cb170441d5-0_2-14-22_20250221193043106.parquet
> ls -ltr /tmp/hudi-bundles/tests/trips1/.hoodie/timeline/
total 0
drwxr-xr-x 2 ethan wheel 64 Feb 21 19:30 history/ {code}
> meta server does not work
> -------------------------
>
> Key: HUDI-9069
> URL: https://issues.apache.org/jira/browse/HUDI-9069
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Davis Zhang
> Priority: Major
> Fix For: 1.1.0
>
>
> {code:java}
> diff --git a/packaging/bundle-validation/service/read.scala
> b/packaging/bundle-validation/service/read.scala
> index b9780ffa576..caa6ee2e689 100644
> --- a/packaging/bundle-validation/service/read.scala
> +++ b/packaging/bundle-validation/service/read.scala
> @@ -22,7 +22,5 @@ spark.read.format("hudi").
> option("hoodie.table.name", tableName).
> option("hoodie.database.name", "default").
> option("hoodie.metadata.enable", "false").
> - option("hoodie.metaserver.enabled", "true").
> - option("hoodie.metaserver.uris", "thrift://localhost:9090").
>
> load(basePath).coalesce(1).write.csv("/tmp/metaserver-bundle/sparkdatasource/trips/results")
> System.exit(0)
> diff --git a/packaging/bundle-validation/service/write.scala
> b/packaging/bundle-validation/service/write.scala
> index 86fd759d923..378e7381f18 100644
> --- a/packaging/bundle-validation/service/write.scala
> +++ b/packaging/bundle-validation/service/write.scala
> @@ -41,8 +41,6 @@ df.write.format("hudi").
> option("hoodie.datasource.meta.sync.enable", "false").
> option("hoodie.datasource.hive_sync.enable", "false").
> option("hoodie.metadata.enable", "false").
> - option("hoodie.metaserver.enabled", "true").
> - option("hoodie.metaserver.uris", "thrift://localhost:9090").
> mode(Overwrite).
> save(basePath)
>
> ~
> {code}
>
> The write.scala does not write successfully, the timeline on the FS is empty
> when read.scala runs, it will give a completion instant whose completion time
> is null, which does not make sense.
>
> {code:java}
> scala> spark.read.format("hudi").
> | option("hoodie.table.name", tableName).
> | option("hoodie.database.name", "default").
> | option("hoodie.metadata.enable", "false").
> | option("hoodie.metaserver.enabled", "true").
> | option("hoodie.metaserver.uris", "thrift://localhost:9090").
> |
> load(basePath).coalesce(1).write.csv("/tmp/metaserver-bundle/sparkdatasource/trips/results")
> java.lang.IllegalArgumentException: Completion time should not be empty
> at
> org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:42)
> at
> org.apache.hudi.common.table.timeline.versioning.v2.InstantFileNameGeneratorV2.getCompleteFileName(InstantFileNameGeneratorV2.java:286)
> at
> org.apache.hudi.common.table.timeline.versioning.v2.InstantFileNameGeneratorV2.getFileName(InstantFileNameGeneratorV2.java:318)
> at
> org.apache.hudi.common.table.timeline.versioning.v2.ActiveTimelineV2.getContentStream(ActiveTimelineV2.java:271)
> at
> org.apache.hudi.common.table.timeline.BaseHoodieTimeline.getInstantContentStream(BaseHoodieTimeline.java:558)
> at
> org.apache.hudi.common.table.timeline.versioning.v2.ActiveTimelineV2.lambda$getCommitMetadataStream$5(ActiveTimelineV2.java:309)
> at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
> at java.util.stream.SortedOps$SizedRefSortingSink.end(SortedOps.java:361)
> at
> java.util.stream.AbstractPipeline.copyIntoWithCancel(AbstractPipeline.java:500)
> at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:486)
> at
> java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
> at java.util.stream.FindOps$FindOp.evaluateSequential(FindOps.java:152)
> at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
> at java.util.stream.ReferencePipeline.findFirst(ReferencePipeline.java:531)
> at
> org.apache.hudi.common.table.timeline.versioning.v2.ActiveTimelineV2.getLastCommitMetadataWithValidSchema(ActiveTimelineV2.java:287)
> at
> org.apache.hudi.common.table.TableSchemaResolver.getLatestCommitMetadataWithValidSchema(TableSchemaResolver.java:487)
> at
> org.apache.hudi.common.table.TableSchemaResolver.getTableSchemaFromLatestCommitMetadata(TableSchemaResolver.java:229)
> at
> org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchemaInternal(TableSchemaResolver.java:191)
> at
> org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchema(TableSchemaResolver.java:145)
> at
> org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchema(TableSchemaResolver.java:134)
> at
> org.apache.hudi.HoodieBaseHadoopFsRelationFactory.$anonfun$x$2$10(HoodieHadoopFsRelationFactory.scala:159)
> at scala.util.Try$.apply(Try.scala:213)
> at
> org.apache.hudi.HoodieBaseHadoopFsRelationFactory.$anonfun$x$2$9(HoodieHadoopFsRelationFactory.scala:159)
> at scala.Option.getOrElse(Option.scala:189)
> at
> org.apache.hudi.HoodieBaseHadoopFsRelationFactory.x$2$lzycompute(HoodieHadoopFsRelationFactory.scala:159)
> at
> org.apache.hudi.HoodieBaseHadoopFsRelationFactory.x$2(HoodieHadoopFsRelationFactory.scala:136)
> at
> org.apache.hudi.HoodieBaseHadoopFsRelationFactory.tableAvroSchema$lzycompute(HoodieHadoopFsRelationFactory.scala:136)
> at
> org.apache.hudi.HoodieBaseHadoopFsRelationFactory.tableAvroSchema(HoodieHadoopFsRelationFactory.scala:136)
> at
> org.apache.hudi.HoodieBaseHadoopFsRelationFactory.tableStructSchema$lzycompute(HoodieHadoopFsRelationFactory.scala:176)
> at
> org.apache.hudi.HoodieBaseHadoopFsRelationFactory.tableStructSchema(HoodieHadoopFsRelationFactory.scala:175)
> at
> org.apache.hudi.HoodieMergeOnReadSnapshotHadoopFsRelationFactory.<init>(HoodieHadoopFsRelationFactory.scala:263)
> at
> org.apache.hudi.HoodieCopyOnWriteSnapshotHadoopFsRelationFactory.<init>(HoodieHadoopFsRelationFactory.scala:355)
> at org.apache.hudi.DefaultSource$.createRelation(DefaultSource.scala:332)
> at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:144)
> at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:82)
> at
> org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:346)
> at
> org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:229)
> at
> org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:211)
> at scala.Option.getOrElse(Option.scala:189)
> at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
> at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:186)
> ... 65 elided {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)