[
https://issues.apache.org/jira/browse/HUDI-8950?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated HUDI-8950:
---------------------------------
Labels: pull-request-available (was: )
> Cloudwatch metrics not usable on Hudi 1.0
> -----------------------------------------
>
> Key: HUDI-8950
> URL: https://issues.apache.org/jira/browse/HUDI-8950
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Shawn Chang
> Priority: Major
> Labels: pull-request-available
>
> This change included on Hudi 1.0 require hoodie.base.path to exist in the
> props: [https://github.com/apache/hudi/pull/11498]
>
> But when running Hudi jobs with dataframe api, users are not required to
> configure `hoodie.base.path` explicitly, causing metrics feature not usable:
>
> Reproduction steps:
> {code:java}
> import org.apache.hudi.DataSourceWriteOptions
> import org.apache.spark.sql.SaveMode
> // use data frame api to create Hudi table with meta sync and metrics on
> val r = scala.util.Randomval num = r.nextInt(99999)var tableName =
> "yxchang_hudi_cow_simple_14_" + num
> var tablePath = "s3://<bucket>/" + tableName + "/"
> val df1 =
> spark.read.format("parquet").load("s3://<bucket>/test-data/hudi/parquet-data/hudi.parquet")
> df1.write.format("hudi") .option("hoodie.metadata.enable", "false")
> .option("hoodie.table.name", tableName) .option("hoodie.database.name",
> "yxchang_nolf") .option("hoodie.datasource.write.operation", "insert") // use
> insert .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
> .option("hoodie.datasource.write.recordkey.field", "event_id")
> .option("hoodie.datasource.write.partitionpath.field", "event_type")
> .option("hoodie.datasource.write.precombine.field", "event_time")
> .option("hoodie.datasource.write.keygenerator.class",
> "org.apache.hudi.keygen.ComplexKeyGenerator")//
> .option("hoodie.datasource.hive_sync.enable", "true")
> .option("hoodie.metrics.on", "true") .option("hoodie.metrics.reporter.type",
> "CLOUDWATCH") .option("hoodie.datasource.meta.sync.enable", "true")
> .option("hoodie.datasource.hive_sync.mode", "hms")
> .option("hoodie.datasource.hive_sync.database", "yxchang_nolf")
> .option("hoodie.datasource.hive_sync.table", tableName)
> .option("hoodie.datasource.hive_sync.partition_fields", "event_type")
> .option("hoodie.datasource.hive_sync.partition_extractor_class",
> "org.apache.hudi.hive.MultiPartKeysValueExtractor")
> .mode(SaveMode.Append) .save(tablePath)
> {code}
> Exception:
> {code:java}
> Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate
> class org.apache.hudi.hive.HiveSyncTool
> at
> org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:75)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> at
> org.apache.hudi.sync.common.util.SyncUtilHelpers.instantiateMetaSyncTool(SyncUtilHelpers.java:140)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> at
> org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:103)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> ... 106 more
> Caused by: java.lang.reflect.InvocationTargetException
> at jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native
> Method) ~[?:?]
> at
> jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
> ~[?:?]
> at
> jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
> ~[?:?]
> at
> java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500)
> ~[?:?]
> at java.lang.reflect.Constructor.newInstance(Constructor.java:481) ~[?:?]
> at
> org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:73)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> at
> org.apache.hudi.sync.common.util.SyncUtilHelpers.instantiateMetaSyncTool(SyncUtilHelpers.java:140)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> at
> org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:103)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> ... 106 more
> Caused by: java.lang.NullPointerException: Cannot invoke
> "String.endsWith(String)" because "basePath" is null
> at org.apache.hudi.metrics.Metrics.getBasePath(Metrics.java:195)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> at org.apache.hudi.metrics.Metrics.getInstance(Metrics.java:82)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> at
> org.apache.hudi.sync.common.metrics.HoodieMetaSyncMetrics.<init>(HoodieMetaSyncMetrics.java:62)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> at
> org.apache.hudi.sync.common.HoodieSyncTool.<init>(HoodieSyncTool.java:50)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> at org.apache.hudi.hive.HiveSyncTool.<init>(HiveSyncTool.java:112)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> at jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native
> Method) ~[?:?]
> at
> jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
> ~[?:?]
> at
> jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
> ~[?:?]
> at
> java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500)
> ~[?:?]
> at java.lang.reflect.Constructor.newInstance(Constructor.java:481) ~[?:?]
> at
> org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:73)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> at
> org.apache.hudi.sync.common.util.SyncUtilHelpers.instantiateMetaSyncTool(SyncUtilHelpers.java:140)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> at
> org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:103)
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
> ... 106 more {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)