vinothsiva1989 opened a new issue #2053:
URL: https://github.com/apache/hudi/issues/2053
**Describe the problem you faced**
Hive sync error while creating Copy_on_Write method
**To Reproduce**
Steps to reproduce the behavior:
step1 : spark-shell \
--packages
org.apache.hudi:hudi-spark-bundle_2.11:0.6.0,org.apache.spark:spark-avro_2.11:2.4.6
\
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
--conf 'spark.sql.hive.convertMetastoreParquet=false'\
step2:
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.functions._
import org.apache.hudi.DataSourceWriteOptions
import org.apache.hudi.config.HoodieWriteConfig
import org.apache.hudi.hive.MultiPartKeysValueExtractor
step: 3
val inputDataPath =
"hdfs://impractice/user/vinoth.siva/hudi_parquet/000000_0"
val hudiTableName = "hudi_cow"
val hudiTablePath = "/user/vinoth.siva/hudi_cow"
step4:
val hudiOptions = Map[String,String](
DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY -> "pk_id",
DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY -> "created_at",
HoodieWriteConfig.TABLE_NAME -> hudiTableName,
DataSourceWriteOptions.OPERATION_OPT_KEY ->
DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL,
DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY -> "COPY_ON_WRITE",
DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY -> "updated_at",
DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY -> "true",
DataSourceWriteOptions.HIVE_TABLE_OPT_KEY -> hudiTableName,
DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY -> "created_at",
DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY ->
classOf[MultiPartKeysValueExtractor].getName
)
step5:
val temp = spark.read.format("parquet").load(inputDataPath)
step6:
val fullDF = temp.withColumn("op",lit("I"))
step7:
fullDF.write.format("org.apache.hudi").options(hudiOptions).mode(SaveMode.Overwrite).save(hudiTablePath)
**Expected behavior**
A clear and concise description of what you expected to happen.
**Environment Description**
* Hudi version :0.5.3
* Spark version :2.4.6
* Hive version :1.2.1000.2.6.5.1100-53
* Hadoop version :2.7
* Storage (HDFS/S3/GCS..) :hdfs
* Running on Docker? (yes/no) ⛔ no
**Additional context**
Add any other context about the problem here.
scala>
fullDF.write.format("org.apache.hudi").options(hudiOptions).mode(SaveMode.Overwrite).save(hudiTablePath)
20/08/29 14:08:54 WARN hudi.DefaultSource:
hoodie.datasource.write.storage.type is deprecated and will be removed in a
later release; Please use hoodie.datasource.write.table.type
20/08/29 14:08:55 WARN hudi.HoodieSparkSqlWriter$: hoodie table at
/user/vinoth.siva/hudi_cow already exists. Deleting existing data & overwriting
with new data.
20/08/29 14:09:05 WARN conf.Configuration: core-site.xml:an attempt to
override final parameter: fs.defaultFS; Ignoring.
20/08/29 14:09:05 WARN conf.Configuration: core-site.xml:an attempt to
override final parameter: fs.defaultFS; Ignoring.
20/08/29 14:09:05 WARN conf.Configuration: core-site.xml:an attempt to
override final parameter: fs.defaultFS; Ignoring.
20/08/29 14:09:05 WARN conf.Configuration: core-site.xml:an attempt to
override final parameter: fs.defaultFS; Ignoring.
20/08/29 14:09:05 WARN conf.Configuration: core-site.xml:an attempt to
override final parameter: fs.defaultFS; Ignoring.
20/08/29 14:09:05 WARN conf.Configuration: core-site.xml:an attempt to
override final parameter: fs.defaultFS; Ignoring.
20/08/29 14:09:05 WARN conf.Configuration: core-site.xml:an attempt to
override final parameter: fs.defaultFS; Ignoring.
20/08/29 14:09:07 WARN conf.Configuration: core-site.xml:an attempt to
override final parameter: fs.defaultFS; Ignoring.
20/08/29 14:09:07 WARN conf.Configuration: core-site.xml:an attempt to
override final parameter: fs.defaultFS; Ignoring.
20/08/29 14:09:07 WARN conf.Configuration: core-site.xml:an attempt to
override final parameter: fs.defaultFS; Ignoring.
20/08/29 14:09:11 ERROR hive.HiveSyncTool: Got runtime exception when hive
syncing
org.apache.hudi.hive.HoodieHiveSyncException: Failed in executing SQL CREATE
EXTERNAL TABLE IF NOT EXISTS `default`.`hudi_cow`( `_hoodie_commit_time`
string, `_hoodie_commit_seqno` string, `_hoodie_record_key` string,
`_hoodie_partition_path` string, `_hoodie_file_name` string, `op` string,
`pk_id` int, `name` string, `value` int, `updated_at` bigint) PARTITIONED BY
(`created_at` bigint) ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS
INPUTFORMAT 'org.apache.hudi.hadoop.HoodieParquetInputFormat' OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' LOCATION
'/user/vinoth.siva/hudi_cow'
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQL(HoodieHiveClient.java:352)
at
org.apache.hudi.hive.HoodieHiveClient.createTable(HoodieHiveClient.java:262)
at
org.apache.hudi.hive.HiveSyncTool.syncSchema(HiveSyncTool.java:176)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:130)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:94)
at
org.apache.hudi.HoodieSparkSqlWriter$.org$apache$hudi$HoodieSparkSqlWriter$$syncHive(HoodieSparkSqlWriter.scala:321)
at
org.apache.hudi.HoodieSparkSqlWriter$$anonfun$metaSync$2.apply(HoodieSparkSqlWriter.scala:363)
at
org.apache.hudi.HoodieSparkSqlWriter$$anonfun$metaSync$2.apply(HoodieSparkSqlWriter.scala:359)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:78)
at
org.apache.hudi.HoodieSparkSqlWriter$.metaSync(HoodieSparkSqlWriter.scala:359)
at
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:417)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:205)
at
org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:125)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at
org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:83)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:81)
at
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:677)
at
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:677)
at
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:677)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:286)
at
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:272)
at
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:230)
at
$line25.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:37)
at
$line25.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:42)
at $line25.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:44)
at $line25.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:46)
at $line25.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:48)
at $line25.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:50)
at $line25.$read$$iw$$iw$$iw$$iw.<init>(<console>:52)
at $line25.$read$$iw$$iw$$iw.<init>(<console>:54)
at $line25.$read$$iw$$iw.<init>(<console>:56)
at $line25.$read$$iw.<init>(<console>:58)
at $line25.$read.<init>(<console>:60)
at $line25.$read$.<init>(<console>:64)
at $line25.$read$.<clinit>(<console>)
at $line25.$eval$.$print$lzycompute(<console>:7)
at $line25.$eval$.$print(<console>:6)
at $line25.$eval.$print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:793)
at
scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1054)
at
scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:645)
at
scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:644)
at
scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
at
scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
at
scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:644)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:576)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:572)
at
scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:819)
at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:691)
at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:404)
at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:425)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:285)
at org.apache.spark.repl.SparkILoop.runClosure(SparkILoop.scala:159)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:182)
at org.apache.spark.repl.Main$.doMain(Main.scala:78)
at org.apache.spark.repl.Main$.main(Main.scala:58)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
at
org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: org.apache.hive.service.cli.HiveSQLException: Error while
compiling statement: FAILED: SemanticException Cannot find class
'org.apache.hudi.hadoop.HoodieParquetInputFormat'
at org.apache.hive.jdbc.Utils.verifySuccess(Utils.java:256)
at org.apache.hive.jdbc.Utils.verifySuccessWithInfo(Utils.java:242)
at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:254)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQL(HoodieHiveClient.java:350)
... 84 more
Caused by: org.apache.hive.service.cli.HiveSQLException: Error while
compiling statement: FAILED: SemanticException Cannot find class
'org.apache.hudi.hadoop.HoodieParquetInputFormat'
at
org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:324)
at
org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:148)
at
org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:228)
at
org.apache.hive.service.cli.operation.Operation.run(Operation.java:264)
at
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:479)
at
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:466)
at
org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:315)
at
org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:514)
at
org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1377)
at
org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1362)
at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
at
org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
at
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: Cannot find
class 'org.apache.hudi.hadoop.HoodieParquetInputFormat'
at
org.apache.hadoop.hive.ql.parse.ParseUtils.ensureClassExists(ParseUtils.java:232)
at
org.apache.hadoop.hive.ql.parse.StorageFormat.fillStorageFormat(StorageFormat.java:57)
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:11228)
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genResolvedParseTree(SemanticAnalyzer.java:10398)
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:10486)
at
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:219)
at
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:248)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:475)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:331)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1238)
at
org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1232)
at
org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:146)
... 15 more
Caused by: java.lang.ClassNotFoundException:
org.apache.hudi.hadoop.HoodieParquetInputFormat
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at
org.apache.hadoop.hive.ql.parse.ParseUtils.ensureClassExists(ParseUtils.java:230)
... 26 more
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]