stackls opened a new issue, #5493:
URL: https://github.com/apache/hudi/issues/5493

   While processing 200 tables sequentially using Hudi for delta records, each 
time randomly 3 to 4 tables are getting failed with any of below two errors.  
It's not same tables which are getting failed after each run. Shared the hudi 
hive sync configs below. 
   
   Error1 - org.apache.hudi.hive.HoodieHiveSyncException: Cannot create hive 
connection jdbc:hive2://server:10000/
   Error2-  Caused by: java.lang.IllegalArgumentException: Comparison method 
violates its general contract!
   
   **Environment Description**
   
   * Hudi version : 0.5.0
   * Spark version : 2.4.4
   * Hive version : 3.1.2
   * Hadoop version : 3.2.1
   * Storage (HDFS/S3/GCS..) : S3
   * Running on Docker? (yes/no) : no 
   
   Hive configs -
           "hoodie.datasource.hive_sync.database": "value",
           "hoodie.datasource.hive_sync.table": "value",
           "hoodie.datasource.hive_sync.jdbcurl":"value" ,
           "hoodie.datasource.hive_sync.username": "value",
           "hoodie.datasource.hive_sync.password": "value",
           "hoodie.datasource.hive_sync.enable": "true",
           "hoodie.datasource.hive_sync.assume_date_partitioning": "value",
           "hoodie.datasource.hive_sync.partition_extractor_class": "value"
   
   Error 1 :
   : org.apache.hudi.exception.HoodieUpsertException: Failed to upsert for 
commit time 20220503123113
        at org.apache.hudi.HoodieWriteClient.upsert(HoodieWriteClient.java:177)
        at 
org.apache.hudi.DataSourceUtils.doWriteOperation(DataSourceUtils.java:183)
        at 
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:143)
        at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:91)
        at 
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:156)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at 
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:83)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:83)
        at 
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:676)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:84)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:165)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:74)
        at 
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676)
        at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:290)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229)
        at sun.reflect.GeneratedMethodAccessor219.invoke(Unknown Source)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
        at py4j.Gateway.invoke(Gateway.java:282)
        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
        at py4j.commands.CallCommand.execute(CallCommand.java:79)
        at py4j.GatewayConnection.run(GatewayConnection.java:238)
        at java.lang.Thread.run(Thread.java:750)
   Caused by: java.lang.IllegalArgumentException: Comparison method violates 
its general contract!
        at java.util.TimSort.mergeLo(TimSort.java:777)
        at java.util.TimSort.mergeAt(TimSort.java:514)
        at java.util.TimSort.mergeForceCollapse(TimSort.java:457)
        at java.util.TimSort.sort(TimSort.java:254)
        at java.util.Arrays.sort(Arrays.java:1512)
        at java.util.ArrayList.sort(ArrayList.java:1464)
        at java.util.stream.SortedOps$RefSortingSink.end(SortedOps.java:392)
        at java.util.stream.Sink$ChainedReference.end(Sink.java:258)
        at java.util.stream.Sink$ChainedReference.end(Sink.java:258)
        at java.util.stream.Sink$ChainedReference.end(Sink.java:258)
        at java.util.stream.Sink$ChainedReference.end(Sink.java:258)
        at java.util.stream.Sink$ChainedReference.end(Sink.java:258)
        at 
java.util.stream.AbstractPipeline.copyIntoWithCancel(AbstractPipeline.java:500)
        at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:486)
        at 
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
        at java.util.stream.FindOps$FindOp.evaluateSequential(FindOps.java:152)
        at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
        at 
java.util.stream.ReferencePipeline.findFirst(ReferencePipeline.java:531)
        at 
org.apache.hudi.table.HoodieMergeOnReadTable$MergeOnReadUpsertPartitioner.getSmallFiles(HoodieMergeOnReadTable.java:379)
        at 
org.apache.hudi.table.HoodieCopyOnWriteTable$UpsertPartitioner.assignInserts(HoodieCopyOnWriteTable.java:615)
        at 
org.apache.hudi.table.HoodieCopyOnWriteTable$UpsertPartitioner.<init>(HoodieCopyOnWriteTable.java:578)
        at 
org.apache.hudi.table.HoodieMergeOnReadTable$MergeOnReadUpsertPartitioner.<init>(HoodieMergeOnReadTable.java:349)
        at 
org.apache.hudi.table.HoodieMergeOnReadTable.getUpsertPartitioner(HoodieMergeOnReadTable.java:106)
        at 
org.apache.hudi.HoodieWriteClient.getPartitioner(HoodieWriteClient.java:439)
        at 
org.apache.hudi.HoodieWriteClient.upsertRecordsInternal(HoodieWriteClient.java:424)
        at org.apache.hudi.HoodieWriteClient.upsert(HoodieWriteClient.java:172)
        ... 32 more
   Error2:
   py4j.protocol.Py4JJavaError: An error occurred while calling o30236.save.
   : org.apache.hudi.hive.HoodieHiveSyncException: Cannot create hive 
connection jdbc:hive2://server:10000/
        at 
org.apache.hudi.hive.HoodieHiveClient.createHiveConnection(HoodieHiveClient.java:559)
        at 
org.apache.hudi.hive.HoodieHiveClient.<init>(HoodieHiveClient.java:108)
        at org.apache.hudi.hive.HiveSyncTool.<init>(HiveSyncTool.java:60)
        at 
org.apache.hudi.HoodieSparkSqlWriter$.syncHive(HoodieSparkSqlWriter.scala:236)
        at 
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:169)
        at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:91)
        at 
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:156)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at 
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:83)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:83)
        at 
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:676)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:84)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:165)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:74)
        at 
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676)
        at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:290)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229)
        at sun.reflect.GeneratedMethodAccessor238.invoke(Unknown Source)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
        at py4j.Gateway.invoke(Gateway.java:282)
        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
        at py4j.commands.CallCommand.execute(CallCommand.java:79)
        at py4j.GatewayConnection.run(GatewayConnection.java:238)
        at java.lang.Thread.run(Thread.java:750)
   Caused by: java.sql.SQLException: Could not open client transport with JDBC 
Uri: jdbc:hive2://server:10000: java.net.ConnectException: Connection refused 
(Connection refused)
        at 
org.apache.hive.jdbc.HiveConnection.openTransport(HiveConnection.java:232)
        at org.apache.hive.jdbc.HiveConnection.<init>(HiveConnection.java:176)
        at org.apache.hive.jdbc.HiveDriver.connect(HiveDriver.java:105)
        at java.sql.DriverManager.getConnection(DriverManager.java:664)
        at java.sql.DriverManager.getConnection(DriverManager.java:247)
        at 
org.apache.hudi.hive.HoodieHiveClient.createHiveConnection(HoodieHiveClient.java:556)
        ... 34 more
   Caused by: org.apache.thrift.transport.TTransportException: 
java.net.ConnectException: Connection refused (Connection refused)
        at org.apache.thrift.transport.TSocket.open(TSocket.java:226)
        at 
org.apache.thrift.transport.TSaslTransport.open(TSaslTransport.java:266)
        at 
org.apache.thrift.transport.TSaslClientTransport.open(TSaslClientTransport.java:37)
        at 
org.apache.hive.jdbc.HiveConnection.openTransport(HiveConnection.java:204)
        ... 39 more
   Caused by: java.net.ConnectException: Connection refused (Connection refused)
        at java.net.PlainSocketImpl.socketConnect(Native Method)
        at 
java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
        at 
java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
        at 
java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
        at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
        at java.net.Socket.connect(Socket.java:607)
        at org.apache.thrift.transport.TSocket.open(TSocket.java:221)
        ... 42 more


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to