vobukh opened a new issue #2774:
URL: https://github.com/apache/hudi/issues/2774
I'm trying to expose hudi metrics to prometheus push gateway.
During the write operation I've got an error:
> java.lang.NoSuchMethodError:
io.prometheus.client.dropwizard.DropwizardExports.<init>(Lorg/apache/hudi/com/codahale/metrics/MetricRegistry;)V
at
org.apache.hudi.metrics.prometheus.PushGatewayReporter.<init>(PushGatewayReporter.java:60)
at
org.apache.hudi.metrics.prometheus.PushGatewayMetricsReporter.<init>(PushGatewayMetricsReporter.java:52)
at
org.apache.hudi.metrics.MetricsReporterFactory.createReporter(MetricsReporterFactory.java:72)
at org.apache.hudi.metrics.Metrics.<init>(Metrics.java:50)
at org.apache.hudi.metrics.Metrics.init(Metrics.java:98)
at org.apache.hudi.metrics.HoodieMetrics.<init>(HoodieMetrics.java:61)
at
org.apache.hudi.client.AbstractHoodieWriteClient.<init>(AbstractHoodieWriteClient.java:133)
at
org.apache.hudi.client.AbstractHoodieWriteClient.<init>(AbstractHoodieWriteClient.java:120)
at
org.apache.hudi.client.SparkRDDWriteClient.<init>(SparkRDDWriteClient.java:79)
at
org.apache.hudi.DataSourceUtils.createHoodieClient(DataSourceUtils.java:193)
at
org.apache.hudi.HoodieSparkSqlWriter$$anonfun$3.apply(HoodieSparkSqlWriter.scala:166)
at
org.apache.hudi.HoodieSparkSqlWriter$$anonfun$3.apply(HoodieSparkSqlWriter.scala:166)
at scala.Option.getOrElse(Option.scala:121)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:166)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:145)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:152)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:140)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:193)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:189)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:140)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:117)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:115)
at
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:711)
at
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:711)
at
org.apache.spark.sql.execution.SQLExecution$$anonfun$withCustomExecutionEnv$1.apply(SQLExecution.scala:113)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:243)
at
org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:99)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:173)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:711)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:307)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:293)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:235)
at
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2332629565061356:27)
at
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2332629565061356:88)
at
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2332629565061356:90)
at
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-2332629565061356:92)
at
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw$$iw.<init>(command-2332629565061356:94)
at
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw$$iw.<init>(command-2332629565061356:96)
at
line9f9f6513de974546a89434938c2b827041.$read$$iw$$iw.<init>(command-2332629565061356:98)
at
line9f9f6513de974546a89434938c2b827041.$read$$iw.<init>(command-2332629565061356:100)
at
line9f9f6513de974546a89434938c2b827041.$read.<init>(command-2332629565061356:102)
at
line9f9f6513de974546a89434938c2b827041.$read$.<init>(command-2332629565061356:106)
at
line9f9f6513de974546a89434938c2b827041.$read$.<clinit>(command-2332629565061356)
at
line9f9f6513de974546a89434938c2b827041.$eval$.$print$lzycompute(<notebook>:7)
at line9f9f6513de974546a89434938c2b827041.$eval$.$print(<notebook>:6)
at line9f9f6513de974546a89434938c2b827041.$eval.$print(<notebook>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:793)
at
scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1054)
at
scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:645)
at
scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:644)
at
scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
at
scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
at
scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:644)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:576)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:572)
at
com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:215)
at
com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply$mcV$sp(ScalaDriverLocal.scala:202)
at
com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:202)
at
com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:202)
at
com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:714)
at
com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:667)
at
com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:202)
at
com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$9.apply(DriverLocal.scala:396)
at
com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$9.apply(DriverLocal.scala:373)
at
com.databricks.logging.UsageLogging$$anonfun$withAttributionContext$1.apply(UsageLogging.scala:238)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at
com.databricks.logging.UsageLogging$class.withAttributionContext(UsageLogging.scala:233)
at
com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:49)
at
com.databricks.logging.UsageLogging$class.withAttributionTags(UsageLogging.scala:275)
at
com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:49)
at
com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:373)
at
com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:644)
at
com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:644)
at scala.util.Try$.apply(Try.scala:192)
at
com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:639)
at
com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:485)
at
com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:597)
at
com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:390)
at
com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:337)
at
com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:219)
at java.lang.Thread.run(Thread.java:748)
Only hoodie.properties files has been written to target endpoint.
Steps to reproduce the behavior:
1. Create Databrics cluster with Spark 2.4.5, scala 2.11, add spark config
cluster property "spark.serializer org.apache.spark.serializer.KryoSerializer"
2. add a library hudi-spark-bundle_2.11-0.6.0.jar,
hudi-spark-bundle_2.11-0.7.0.jar or hudi-spark-bundle_2.11-0.8.0.jar to a
cluster
3. create a notebook and attach it to the cluster; code in the notebook:
`
import org.apache.hudi.DataSourceWriteOptions
import org.apache.hudi.common.model.HoodieTableType
import org.apache.hudi.config.{HoodieCompactionConfig, HoodieStorageConfig,
HoodieWriteConfig}
import org.apache.spark.sql.functions.lit
import scala.util.Random
import spark.implicits._
def genRandomByte(): Byte = (Random.nextInt(256) - 128).toByte
def genRandomRow(): Array[Byte] = Array.fill(10240)(genRandomByte())
val numRows = 12288
val df =
spark.createDataFrame(List.fill(numRows)(genRandomRow()).zipWithIndex).toDF("data",
"id")
val ts = System.currentTimeMillis()
val writeDf = df.withColumn("ts", lit(ts))
.withColumn("id", $"id" + ts)
.select("id", "data", "ts")
val tablePath = "<please_put_your_file_system_endpoint_here>"
writeDf.write.format("org.apache.hudi")
.option(HoodieWriteConfig.TABLE_NAME, "test_1509")
.option(DataSourceWriteOptions.OPERATION_OPT_KEY,
DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL)
.option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY,
HoodieTableType.MERGE_ON_READ.name)
.option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts")
.option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "id")
.option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "")
.option(HoodieWriteConfig.UPSERT_PARALLELISM, "1")
.option(HoodieWriteConfig.INSERT_PARALLELISM, "1")
.option(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP, "1")
.option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, "1" )
.option(HoodieCompactionConfig.INLINE_COMPACT_PROP, "true" )
.option("hoodie.logfile.to.parquet.compression.ratio", "1") // binary data
is not compressed well
.option("hoodie.logfile.max.size", String.valueOf(10 * 1024 * 1024))
.option(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC, "snappy")
.option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY, "false")
.option("hoodie.metrics.on", "true")
.option("hoodie.metrics.reporter.type", "PROMETHEUS_PUSHGATEWAY")
.option("hoodie.metrics.pushgateway.host",
"<please_put_your_prometheus_push_gateway_url_here>")
.option( "hoodie.metrics.pushgateway.port", "80")
.option("hoodie.metrics.pushgateway.job.name", "test-hudi-metrics-1509")
.mode(SaveMode.Append)
.save(tablePath)
`
I expect to find metrics in the push gateway, but instead the job failed
with the stack trace like above.If you comment "hoodie.metrics.*" options it
works fine.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]