vobukh opened a new issue #2774:
URL: https://github.com/apache/hudi/issues/2774


   I'm trying to expose hudi metrics to prometheus push gateway. 
   During the write operation I've got an error:
   
   > java.lang.NoSuchMethodError: 
io.prometheus.client.dropwizard.DropwizardExports.<init>(Lorg/apache/hudi/com/codahale/metrics/MetricRegistry;)V
      
           at 
org.apache.hudi.metrics.prometheus.PushGatewayReporter.<init>(PushGatewayReporter.java:60)
        at 
org.apache.hudi.metrics.prometheus.PushGatewayMetricsReporter.<init>(PushGatewayMetricsReporter.java:52)
        at 
org.apache.hudi.metrics.MetricsReporterFactory.createReporter(MetricsReporterFactory.java:72)
        at org.apache.hudi.metrics.Metrics.<init>(Metrics.java:50)
        at org.apache.hudi.metrics.Metrics.init(Metrics.java:98)
        at org.apache.hudi.metrics.HoodieMetrics.<init>(HoodieMetrics.java:61)
        at 
org.apache.hudi.client.AbstractHoodieWriteClient.<init>(AbstractHoodieWriteClient.java:133)
        at 
org.apache.hudi.client.AbstractHoodieWriteClient.<init>(AbstractHoodieWriteClient.java:120)
        at 
org.apache.hudi.client.SparkRDDWriteClient.<init>(SparkRDDWriteClient.java:79)
        at 
org.apache.hudi.DataSourceUtils.createHoodieClient(DataSourceUtils.java:193)
        at 
org.apache.hudi.HoodieSparkSqlWriter$$anonfun$3.apply(HoodieSparkSqlWriter.scala:166)
        at 
org.apache.hudi.HoodieSparkSqlWriter$$anonfun$3.apply(HoodieSparkSqlWriter.scala:166)
        at scala.Option.getOrElse(Option.scala:121)
        at 
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:166)
        at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:145)
        at 
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:152)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:140)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:193)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at 
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:189)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:140)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:117)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:115)
        at 
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:711)
        at 
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:711)
        at 
org.apache.spark.sql.execution.SQLExecution$$anonfun$withCustomExecutionEnv$1.apply(SQLExecution.scala:113)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:243)
        at 
org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:99)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:173)
        at 
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:711)
        at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:307)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:293)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:235)
        at 
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2332629565061356:27)
        at 
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2332629565061356:88)
        at 
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2332629565061356:90)
        at 
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-2332629565061356:92)
        at 
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw$$iw.<init>(command-2332629565061356:94)
        at 
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw.<init>(command-2332629565061356:96)
        at 
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw.<init>(command-2332629565061356:98)
        at 
line9f9f6513de974546a89434938c2b827041.$read$$iw.<init>(command-2332629565061356:100)
        at 
line9f9f6513de974546a89434938c2b827041.$read.<init>(command-2332629565061356:102)
        at 
line9f9f6513de974546a89434938c2b827041.$read$.<init>(command-2332629565061356:106)
        at 
line9f9f6513de974546a89434938c2b827041.$read$.<clinit>(command-2332629565061356)
        at 
line9f9f6513de974546a89434938c2b827041.$eval$.$print$lzycompute(<notebook>:7)
        at line9f9f6513de974546a89434938c2b827041.$eval$.$print(<notebook>:6)
        at line9f9f6513de974546a89434938c2b827041.$eval.$print(<notebook>)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:793)
        at 
scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1054)
        at 
scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:645)
        at 
scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:644)
        at 
scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
        at 
scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
        at 
scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:644)
        at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:576)
        at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:572)
        at 
com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:215)
        at 
com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply$mcV$sp(ScalaDriverLocal.scala:202)
        at 
com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:202)
        at 
com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:202)
        at 
com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:714)
        at 
com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:667)
        at 
com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:202)
        at 
com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$9.apply(DriverLocal.scala:396)
        at 
com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$9.apply(DriverLocal.scala:373)
        at 
com.databricks.logging.UsageLogging$$anonfun$withAttributionContext$1.apply(UsageLogging.scala:238)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
        at 
com.databricks.logging.UsageLogging$class.withAttributionContext(UsageLogging.scala:233)
        at 
com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:49)
        at 
com.databricks.logging.UsageLogging$class.withAttributionTags(UsageLogging.scala:275)
        at 
com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:49)
        at 
com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:373)
        at 
com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:644)
        at 
com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:644)
        at scala.util.Try$.apply(Try.scala:192)
        at 
com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:639)
        at 
com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:485)
        at 
com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:597)
        at 
com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:390)
        at 
com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:337)
        at 
com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:219)
        at java.lang.Thread.run(Thread.java:748)
   
   Only hoodie.properties files has been written to target endpoint.
   
   Steps to reproduce the behavior:
   
   1. Create Databrics cluster with Spark 2.4.5, scala 2.11, add spark config 
cluster property "spark.serializer org.apache.spark.serializer.KryoSerializer"
   2. add a library hudi-spark-bundle_2.11-0.6.0.jar, 
hudi-spark-bundle_2.11-0.7.0.jar or hudi-spark-bundle_2.11-0.8.0.jar to a 
cluster
   3. create a notebook and attach it to the cluster; code in the notebook:
   `
   import org.apache.hudi.DataSourceWriteOptions
   import org.apache.hudi.common.model.HoodieTableType
   import org.apache.hudi.config.{HoodieCompactionConfig, HoodieStorageConfig, 
HoodieWriteConfig}
   import org.apache.spark.sql.functions.lit
   import scala.util.Random
   
   import spark.implicits._
   
   def genRandomByte(): Byte = (Random.nextInt(256) - 128).toByte
   def genRandomRow(): Array[Byte] = Array.fill(10240)(genRandomByte())
   
   val numRows = 12288
   val df = 
spark.createDataFrame(List.fill(numRows)(genRandomRow()).zipWithIndex).toDF("data",
 "id")
   
   
   val ts = System.currentTimeMillis()
   val writeDf = df.withColumn("ts", lit(ts))
     .withColumn("id", $"id" + ts)
     .select("id", "data", "ts")
   
   val tablePath = "<please_put_your_file_system_endpoint_here>"
   
   writeDf.write.format("org.apache.hudi")
     .option(HoodieWriteConfig.TABLE_NAME, "test_1509")
     .option(DataSourceWriteOptions.OPERATION_OPT_KEY, 
DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL)
     .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY, 
HoodieTableType.MERGE_ON_READ.name)
     .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts")
     .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "id")
       .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "")
     .option(HoodieWriteConfig.UPSERT_PARALLELISM, "1")
     .option(HoodieWriteConfig.INSERT_PARALLELISM, "1")
     .option(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP, "1")
     .option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, "1" )
     .option(HoodieCompactionConfig.INLINE_COMPACT_PROP, "true" )
     .option("hoodie.logfile.to.parquet.compression.ratio", "1") // binary data 
is not compressed well
     .option("hoodie.logfile.max.size", String.valueOf(10 * 1024 * 1024))
     .option(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC, "snappy")
     .option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY, "false")
     .option("hoodie.metrics.on", "true")
     .option("hoodie.metrics.reporter.type", "PROMETHEUS_PUSHGATEWAY")
     .option("hoodie.metrics.pushgateway.host", 
"<please_put_your_prometheus_push_gateway_url_here>")
     .option( "hoodie.metrics.pushgateway.port", "80")
     .option("hoodie.metrics.pushgateway.job.name", "test-hudi-metrics-1509")
     .mode(SaveMode.Append)
     .save(tablePath)
   `
   
   I expect to find metrics in the push gateway, but instead the job failed 
with the stack trace like above.If you comment "hoodie.metrics.*" options it 
works fine.
   
   
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to