Shawn Chang created HUDI-8950:
---------------------------------
Summary: Cloudwatch metrics not usable on Hudi 1.0
Key: HUDI-8950
URL: https://issues.apache.org/jira/browse/HUDI-8950
Project: Apache Hudi
Issue Type: Bug
Reporter: Shawn Chang
This change included on Hudi 1.0 require hoodie.base.path to exist in the
props: [https://github.com/apache/hudi/pull/11498]
But when running Hudi jobs with dataframe api, users are not required to
configure `hoodie.base.path` explicitly, causing metrics feature not usable:
Reproduction steps:
{code:java}
import org.apache.hudi.DataSourceWriteOptions
import org.apache.spark.sql.SaveMode
// use data frame api to create Hudi table with meta sync and metrics on
val r = scala.util.Randomval num = r.nextInt(99999)var tableName =
"yxchang_hudi_cow_simple_14_" + num
var tablePath = "s3://<bucket>/" + tableName + "/"
val df1 =
spark.read.format("parquet").load("s3://<bucket>/test-data/hudi/parquet-data/hudi.parquet")
df1.write.format("hudi") .option("hoodie.metadata.enable", "false")
.option("hoodie.table.name", tableName) .option("hoodie.database.name",
"yxchang_nolf") .option("hoodie.datasource.write.operation", "insert") // use
insert .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
.option("hoodie.datasource.write.recordkey.field", "event_id")
.option("hoodie.datasource.write.partitionpath.field", "event_type")
.option("hoodie.datasource.write.precombine.field", "event_time")
.option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.ComplexKeyGenerator")//
.option("hoodie.datasource.hive_sync.enable", "true")
.option("hoodie.metrics.on", "true") .option("hoodie.metrics.reporter.type",
"CLOUDWATCH") .option("hoodie.datasource.meta.sync.enable", "true")
.option("hoodie.datasource.hive_sync.mode", "hms")
.option("hoodie.datasource.hive_sync.database", "yxchang_nolf")
.option("hoodie.datasource.hive_sync.table", tableName)
.option("hoodie.datasource.hive_sync.partition_fields", "event_type")
.option("hoodie.datasource.hive_sync.partition_extractor_class",
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
.mode(SaveMode.Append) .save(tablePath)
{code}
Exception:
{code:java}
Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate
class org.apache.hudi.hive.HiveSyncTool
at
org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:75)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
at
org.apache.hudi.sync.common.util.SyncUtilHelpers.instantiateMetaSyncTool(SyncUtilHelpers.java:140)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
at
org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:103)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
... 106 more
Caused by: java.lang.reflect.InvocationTargetException
at jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native
Method) ~[?:?]
at
jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
~[?:?]
at
jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
~[?:?]
at
java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500) ~[?:?]
at java.lang.reflect.Constructor.newInstance(Constructor.java:481) ~[?:?]
at
org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:73)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
at
org.apache.hudi.sync.common.util.SyncUtilHelpers.instantiateMetaSyncTool(SyncUtilHelpers.java:140)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
at
org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:103)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
... 106 more
Caused by: java.lang.NullPointerException: Cannot invoke
"String.endsWith(String)" because "basePath" is null
at org.apache.hudi.metrics.Metrics.getBasePath(Metrics.java:195)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
at org.apache.hudi.metrics.Metrics.getInstance(Metrics.java:82)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
at
org.apache.hudi.sync.common.metrics.HoodieMetaSyncMetrics.<init>(HoodieMetaSyncMetrics.java:62)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
at
org.apache.hudi.sync.common.HoodieSyncTool.<init>(HoodieSyncTool.java:50)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
at org.apache.hudi.hive.HiveSyncTool.<init>(HiveSyncTool.java:112)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
at jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native
Method) ~[?:?]
at
jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
~[?:?]
at
jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
~[?:?]
at
java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500) ~[?:?]
at java.lang.reflect.Constructor.newInstance(Constructor.java:481) ~[?:?]
at
org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:73)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
at
org.apache.hudi.sync.common.util.SyncUtilHelpers.instantiateMetaSyncTool(SyncUtilHelpers.java:140)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
at
org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:103)
~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
... 106 more {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)