[ 
https://issues.apache.org/jira/browse/HUDI-8950?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

ASF GitHub Bot updated HUDI-8950:
---------------------------------
    Labels: pull-request-available  (was: )

> Cloudwatch metrics not usable on Hudi 1.0
> -----------------------------------------
>
>                 Key: HUDI-8950
>                 URL: https://issues.apache.org/jira/browse/HUDI-8950
>             Project: Apache Hudi
>          Issue Type: Bug
>            Reporter: Shawn Chang
>            Priority: Major
>              Labels: pull-request-available
>
> This change included on Hudi 1.0 require hoodie.base.path to exist in the 
> props: [https://github.com/apache/hudi/pull/11498]
>  
> But when running Hudi jobs with dataframe api, users are not required to 
> configure `hoodie.base.path` explicitly, causing metrics feature not usable:
>  
> Reproduction steps:
> {code:java}
> import org.apache.hudi.DataSourceWriteOptions
> import org.apache.spark.sql.SaveMode
> // use data frame api to create Hudi table with meta sync and metrics on
> val r = scala.util.Randomval num =  r.nextInt(99999)var tableName = 
> "yxchang_hudi_cow_simple_14_" + num
> var tablePath = "s3://<bucket>/" + tableName + "/"
> val df1 = 
> spark.read.format("parquet").load("s3://<bucket>/test-data/hudi/parquet-data/hudi.parquet")
> df1.write.format("hudi") .option("hoodie.metadata.enable", "false") 
> .option("hoodie.table.name", tableName) .option("hoodie.database.name", 
> "yxchang_nolf") .option("hoodie.datasource.write.operation", "insert") // use 
> insert .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE") 
> .option("hoodie.datasource.write.recordkey.field", "event_id") 
> .option("hoodie.datasource.write.partitionpath.field", "event_type")  
> .option("hoodie.datasource.write.precombine.field", "event_time") 
> .option("hoodie.datasource.write.keygenerator.class", 
> "org.apache.hudi.keygen.ComplexKeyGenerator")//  
> .option("hoodie.datasource.hive_sync.enable", "true") 
> .option("hoodie.metrics.on", "true") .option("hoodie.metrics.reporter.type", 
> "CLOUDWATCH") .option("hoodie.datasource.meta.sync.enable", "true") 
> .option("hoodie.datasource.hive_sync.mode", "hms") 
> .option("hoodie.datasource.hive_sync.database", "yxchang_nolf") 
> .option("hoodie.datasource.hive_sync.table", tableName) 
> .option("hoodie.datasource.hive_sync.partition_fields", "event_type") 
> .option("hoodie.datasource.hive_sync.partition_extractor_class", 
> "org.apache.hudi.hive.MultiPartKeysValueExtractor") 
> .mode(SaveMode.Append) .save(tablePath) 
> {code}
> Exception:
> {code:java}
> Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate 
> class org.apache.hudi.hive.HiveSyncTool
>     at 
> org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:75)
>  ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     at 
> org.apache.hudi.sync.common.util.SyncUtilHelpers.instantiateMetaSyncTool(SyncUtilHelpers.java:140)
>  ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     at 
> org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:103)
>  ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     ... 106 more
> Caused by: java.lang.reflect.InvocationTargetException
>     at jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native 
> Method) ~[?:?]
>     at 
> jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
>  ~[?:?]
>     at 
> jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>  ~[?:?]
>     at 
> java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500) 
> ~[?:?]
>     at java.lang.reflect.Constructor.newInstance(Constructor.java:481) ~[?:?]
>     at 
> org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:73)
>  ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     at 
> org.apache.hudi.sync.common.util.SyncUtilHelpers.instantiateMetaSyncTool(SyncUtilHelpers.java:140)
>  ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     at 
> org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:103)
>  ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     ... 106 more
> Caused by: java.lang.NullPointerException: Cannot invoke 
> "String.endsWith(String)" because "basePath" is null
>     at org.apache.hudi.metrics.Metrics.getBasePath(Metrics.java:195) 
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     at org.apache.hudi.metrics.Metrics.getInstance(Metrics.java:82) 
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     at 
> org.apache.hudi.sync.common.metrics.HoodieMetaSyncMetrics.<init>(HoodieMetaSyncMetrics.java:62)
>  ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     at 
> org.apache.hudi.sync.common.HoodieSyncTool.<init>(HoodieSyncTool.java:50) 
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     at org.apache.hudi.hive.HiveSyncTool.<init>(HiveSyncTool.java:112) 
> ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     at jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native 
> Method) ~[?:?]
>     at 
> jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
>  ~[?:?]
>     at 
> jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>  ~[?:?]
>     at 
> java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500) 
> ~[?:?]
>     at java.lang.reflect.Constructor.newInstance(Constructor.java:481) ~[?:?]
>     at 
> org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:73)
>  ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     at 
> org.apache.hudi.sync.common.util.SyncUtilHelpers.instantiateMetaSyncTool(SyncUtilHelpers.java:140)
>  ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     at 
> org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:103)
>  ~[hudi-spark3-bundle_2.12-1.0.0.jar:1.0.0]
>     ... 106 more {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to