[ 
https://issues.apache.org/jira/browse/HUDI-2515?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vinoth Chandar updated HUDI-2515:
---------------------------------
    Labels: pull-request-available sev:critical  (was: pull-request-available)

> Add close when producing records failed
> ---------------------------------------
>
>                 Key: HUDI-2515
>                 URL: https://issues.apache.org/jira/browse/HUDI-2515
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: Common Core
>            Reporter: 董可伦
>            Assignee: 董可伦
>            Priority: Major
>              Labels: pull-request-available, sev:critical
>             Fix For: 0.10.0
>
>
> For now,when producing records failed,it will not close the connection.If a 
> large number of clients have such exceptions, not closing the connection will 
> lead to a large number of CLOSE_WAIT, finally causes almost all ports of the 
> server to be occupied, and the client writing data normally cannot obtain 
> port resources, resulting in failure. This will affect the resources of the 
> entire server.
> The detail exceptions:
>  
> {code:java}
> 2021-10-02 10:48:27,335 ERROR [pool-525-thread-1] 
> o.a.h.c.u.queue.BoundedInMemoryExecutor error producing records
> org.apache.parquet.io.InvalidRecordException: Parquet/Avro schema mismatch: 
> Avro field 'TRANSFER_RESULT' not found
>  at 
> org.apache.parquet.avro.AvroRecordConverter.getAvroField(AvroRecordConverter.java:225)
>  at 
> org.apache.parquet.avro.AvroRecordConverter.<init>(AvroRecordConverter.java:130)
>  at 
> org.apache.parquet.avro.AvroRecordConverter.<init>(AvroRecordConverter.java:95)
>  at 
> org.apache.parquet.avro.AvroRecordMaterializer.<init>(AvroRecordMaterializer.java:33)
>  at 
> org.apache.parquet.avro.AvroReadSupport.prepareForRead(AvroReadSupport.java:138)
>  at 
> org.apache.parquet.hadoop.InternalParquetRecordReader.initialize(InternalParquetRecordReader.java:183)
>  at org.apache.parquet.hadoop.ParquetReader.initReader(ParquetReader.java:156)
>  at org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:135)
>  at 
> org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:49)
>  at 
> org.apache.hudi.common.util.queue.IteratorBasedQueueProducer.produce(IteratorBasedQueueProducer.java:45)
>  at 
> org.apache.hudi.common.util.queue.BoundedInMemoryExecutor.lambda$null$0(BoundedInMemoryExecutor.java:92)
>  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at java.lang.Thread.run(Thread.java:748)
> 2021-10-02 10:48:28,324 ERROR [pool-525-thread-2] 
> o.a.h.c.u.queue.BoundedInMemoryExecutor error consuming records
> org.apache.hudi.exception.HoodieException: operation has failed
>  at 
> org.apache.hudi.common.util.queue.BoundedInMemoryQueue.throwExceptionIfFailed(BoundedInMemoryQueue.java:247)
>  at 
> org.apache.hudi.common.util.queue.BoundedInMemoryQueue.readNextRecord(BoundedInMemoryQueue.java:226)
>  at 
> org.apache.hudi.common.util.queue.BoundedInMemoryQueue.access$100(BoundedInMemoryQueue.java:52)
>  at 
> org.apache.hudi.common.util.queue.BoundedInMemoryQueue$QueueIterator.hasNext(BoundedInMemoryQueue.java:277)
>  at 
> org.apache.hudi.common.util.queue.BoundedInMemoryQueueConsumer.consume(BoundedInMemoryQueueConsumer.java:36)
>  at 
> org.apache.hudi.common.util.queue.BoundedInMemoryExecutor.lambda$null$2(BoundedInMemoryExecutor.java:121)
>  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at java.lang.Thread.run(Thread.java:748){code}
>  
>  
>  
> {code:java}
> netstat - nlp | grep 1019 | wc -l
> 1456
> tcp 1 0 ip:42280 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:46370 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:54822 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:51444 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:40062 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:34848 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:40574 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:55540 ip-:1019 CLOSE_WAIT
> tcp 0 0 ip:46554 ip-:1019 ESTABLISHED
> tcp 1 0 ip:37418 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:44476 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:40656 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:41044 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:36310 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:58766 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:39426 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:51552 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:32822 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:50938 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:60448 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:47028 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:49492 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:45274 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:38500 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:50781 ip-:1019 CLOSE_WAIT
> tcp 1 0 ip:35836 ip-:1019 CLOSE_WAIT
>  
> {code}
>  
>  
>  
> {code:java}
> 2021-10-03 06:43:55,377 WARN [Timer-Driven Process Thread-5] 
> o.apache.hadoop.hdfs.BlockReaderFactory I/O error constructing remote block 
> reader.
> java.net.BindException: Cannot assign requested address
>  at sun.nio.ch.Net.connect0(Native Method)
>  at sun.nio.ch.Net.connect(Net.java:454)
>  at sun.nio.ch.Net.connect(Net.java:446)
>  at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:648)
>  at 
> org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:192)
>  at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
>  at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3090)
>  at 
> org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:778)
>  at 
> org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:693)
>  at 
> org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:354)
>  at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
>  at 
> org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
>  at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:848)
>  at java.io.DataInputStream.read(DataInputStream.java:149)
>  at java.io.DataInputStream.read(DataInputStream.java:100)
>  at java.util.Properties$LineReader.readLine(Properties.java:435)
>  at java.util.Properties.load0(Properties.java:353)
>  at java.util.Properties.load(Properties.java:341)
>  at 
> org.apache.hudi.common.table.HoodieTableConfig.<init>(HoodieTableConfig.java:173)
>  at 
> org.apache.hudi.common.table.HoodieTableMetaClient.<init>(HoodieTableMetaClient.java:114)
>  at 
> org.apache.hudi.common.table.HoodieTableMetaClient.<init>(HoodieTableMetaClient.java:74)
>  at 
> org.apache.hudi.common.table.HoodieTableMetaClient$Builder.build(HoodieTableMetaClient.java:611)
>  at org.apache.hudi.table.HoodieJavaTable.create(HoodieJavaTable.java:48)
>  at 
> org.apache.hudi.client.HoodieJavaWriteClient.createTable(HoodieJavaWriteClient.java:91)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.rollbackFailedWrites(AbstractHoodieWriteClient.java:805)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.lambda$startCommit$ac3900ef$1(AbstractHoodieWriteClient.java:680)
>  at 
> org.apache.hudi.common.util.CleanerUtils.rollbackFailedWrites(CleanerUtils.java:143)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.startCommit(AbstractHoodieWriteClient.java:679)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:360)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:296)
>  at 
> org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27)
>  at 
> org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166)
>  at 
> org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208)
>  at 
> org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117)
>  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>  at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at java.lang.Thread.run(Thread.java:748)
> 2021-10-03 06:43:55,377 WARN [Timer-Driven Process Thread-5] 
> org.apache.hadoop.hdfs.DFSClient Failed to connect to /10.240.124.211:1019 
> for block, add to deadNodes and continue. java.net.BindException: Cannot 
> assign requested address
> java.net.BindException: Cannot assign requested address
>  at sun.nio.ch.Net.connect0(Native Method)
>  at sun.nio.ch.Net.connect(Net.java:454)
>  at sun.nio.ch.Net.connect(Net.java:446)
>  at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:648)
>  at 
> org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:192)
>  at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
>  at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3090)
>  at 
> org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:778)
>  at 
> org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:693)
>  at 
> org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:354)
>  at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
>  at 
> org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
>  at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:848)
>  at java.io.DataInputStream.read(DataInputStream.java:149)
>  at java.io.DataInputStream.read(DataInputStream.java:100)
>  at java.util.Properties$LineReader.readLine(Properties.java:435)
>  at java.util.Properties.load0(Properties.java:353)
>  at java.util.Properties.load(Properties.java:341)
>  at 
> org.apache.hudi.common.table.HoodieTableConfig.<init>(HoodieTableConfig.java:173)
>  at 
> org.apache.hudi.common.table.HoodieTableMetaClient.<init>(HoodieTableMetaClient.java:114)
>  at 
> org.apache.hudi.common.table.HoodieTableMetaClient.<init>(HoodieTableMetaClient.java:74)
>  at 
> org.apache.hudi.common.table.HoodieTableMetaClient$Builder.build(HoodieTableMetaClient.java:611)
>  at org.apache.hudi.table.HoodieJavaTable.create(HoodieJavaTable.java:48)
>  at 
> org.apache.hudi.client.HoodieJavaWriteClient.createTable(HoodieJavaWriteClient.java:91)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.rollbackFailedWrites(AbstractHoodieWriteClient.java:805)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.lambda$startCommit$ac3900ef$1(AbstractHoodieWriteClient.java:680)
>  at 
> org.apache.hudi.common.util.CleanerUtils.rollbackFailedWrites(CleanerUtils.java:143)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.startCommit(AbstractHoodieWriteClient.java:679)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:360)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:296)
>  at 
> org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27)
>  at 
> org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166)
>  at 
> org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208)
>  at 
> org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117)
>  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>  at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at java.lang.Thread.run(Thread.java:748){code}
>  
> {code:java}
> 2021-10-03 14:00:24,316 ERROR [Timer-Driven Process Thread-77] 
> o.a.nifi.processors.javaHudi.JavaHudi 
> JavaHudi[id=017c104f-3c46-1b91-674b-ca7b302d9690] write hudi error, : 
> org.apache.hudi.exception.HoodieException: Unable to find server port
> org.apache.hudi.exception.HoodieException: Unable to find server port
>  at org.apache.hudi.common.util.NetworkUtils.getHostname(NetworkUtils.java:37)
>  at 
> org.apache.hudi.client.embedded.EmbeddedTimelineService.setHostAddr(EmbeddedTimelineService.java:104)
>  at 
> org.apache.hudi.client.embedded.EmbeddedTimelineService.<init>(EmbeddedTimelineService.java:55)
>  at 
> org.apache.hudi.client.embedded.EmbeddedTimelineServerHelper.startTimelineService(EmbeddedTimelineServerHelper.java:70)
>  at 
> org.apache.hudi.client.embedded.EmbeddedTimelineServerHelper.createEmbeddedTimelineService(EmbeddedTimelineServerHelper.java:58)
>  at 
> org.apache.hudi.client.AbstractHoodieClient.startEmbeddedServerView(AbstractHoodieClient.java:109)
>  at 
> org.apache.hudi.client.AbstractHoodieClient.<init>(AbstractHoodieClient.java:77)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.<init>(AbstractHoodieWriteClient.java:133)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.<init>(AbstractHoodieWriteClient.java:121)
>  at 
> org.apache.hudi.client.HoodieJavaWriteClient.<init>(HoodieJavaWriteClient.java:53)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:357)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:296)
>  at 
> org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27)
>  at 
> org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166)
>  at 
> org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208)
>  at 
> org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117)
>  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>  at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at java.lang.Thread.run(Thread.java:748)
> Caused by: java.net.BindException: Address already in use (Bind failed)
>  at java.net.PlainSocketImpl.socketBind(Native Method)
>  at java.net.AbstractPlainSocketImpl.bind(AbstractPlainSocketImpl.java:387)
>  at java.net.ServerSocket.bind(ServerSocket.java:375)
>  at java.net.ServerSocket.<init>(ServerSocket.java:237)
>  at java.net.ServerSocket.<init>(ServerSocket.java:128)
>  at org.apache.hudi.common.util.NetworkUtils.getHostname(NetworkUtils.java:34)
>  ... 22 common frames omitted{code}
>  
>  
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to