[
https://issues.apache.org/jira/browse/HUDI-3669?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
HunterHunter updated HUDI-3669:
-------------------------------
Description:
Flink streaming write to hudi , The task runs fine about an hour after it
starts, and the communication is normal,When the task runs for a period of
time,An error is reported when BucketAssignFunction communicates with JM's
Timeline server, This error will occur under certain circumstances.
When the task restarts after an error is reported, it happens again after
running for a period of time,eventually cause the task to fail
in addition:I have modified NetworkUtils' method of getting ip as suggested
{code:java}
org.apache.hudi.exception.HoodieRemoteException: 10.18x.xx.xx:34805 failed to
respond at
org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.refresh(RemoteHoodieTableFileSystemView.java:420)
~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
at
org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.sync(RemoteHoodieTableFileSystemView.java:484)
~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
at
org.apache.hudi.common.table.view.PriorityBasedFileSystemView.sync(PriorityBasedFileSystemView.java:257)
~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
at
org.apache.hudi.sink.partitioner.profile.WriteProfile.reload(WriteProfile.java:252)
~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
at
org.apache.hudi.sink.partitioner.BucketAssigner.reload(BucketAssigner.java:211)
~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT] at
org.apache.hudi.sink.partitioner.BucketAssignFunction.notifyCheckpointComplete(BucketAssignFunction.java:234)
~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
at
org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator.notifyCheckpointComplete(AbstractUdfStreamOperator.java:130)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.StreamOperatorWrapper.notifyCheckpointComplete(StreamOperatorWrapper.java:99)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.SubtaskCheckpointCoordinatorImpl.notifyCheckpointComplete(SubtaskCheckpointCoordinatorImpl.java:334)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.StreamTask.notifyCheckpointComplete(StreamTask.java:1171)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$notifyCheckpointCompleteAsync$10(StreamTask.java:1136)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$notifyCheckpointOperation$12(StreamTask.java:1159)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.runThrowing(StreamTaskActionExecutor.java:50)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.mailbox.Mail.run(Mail.java:90)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMailsWhenDefaultActionUnavailable(MailboxProcessor.java:344)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMail(MailboxProcessor.java:330)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:202)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:684)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.StreamTask.executeInvoke(StreamTask.java:639)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.StreamTask.runWithCleanUpOnFail(StreamTask.java:650)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:623)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:779)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
org.apache.flink.runtime.taskmanager.Task.run(Task.java:566)
~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_201]Caused by:
org.apache.http.NoHttpResponseException: 10.18x.xx.xx:34805 failed to respond
at
org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:165)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:167)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:271)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:184)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:88)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:184)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:107)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
org.apache.http.client.fluent.Request.execute(Request.java:151)
~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT] at
org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.executeRequest(RemoteHoodieTableFileSystemView.java:176)
~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
at
org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.refresh(RemoteHoodieTableFileSystemView.java:418)
~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
... 23 more {code}
was:
Flink streaming write to hudi , The task runs fine about an hour after it
starts, and the communication is normal,When the task runs for a period of
time,An error is reported when BucketAssignFunction communicates with JM's
Timeline server, This error will occur under certain circumstances.
When the task restarts after an error is reported, it happens again after
running for a period of time,eventually cause the task to fail
in addition:I have modified NetworkUtils' method of getting ip as suggested
> HoodieRemoteException when task communicates with jm's timeline server: IP :
> port failed to respond
> ---------------------------------------------------------------------------------------------------
>
> Key: HUDI-3669
> URL: https://issues.apache.org/jira/browse/HUDI-3669
> Project: Apache Hudi
> Issue Type: Bug
> Components: flink
> Affects Versions: 0.10.1, 0.11.0
> Environment: Flink Standalone 1.13.3
> Reporter: HunterHunter
> Priority: Major
> Attachments: WechatIMG14.jpeg
>
>
> Flink streaming write to hudi , The task runs fine about an hour after it
> starts, and the communication is normal,When the task runs for a period of
> time,An error is reported when BucketAssignFunction communicates with JM's
> Timeline server, This error will occur under certain circumstances.
> When the task restarts after an error is reported, it happens again after
> running for a period of time,eventually cause the task to fail
> in addition:I have modified NetworkUtils' method of getting ip as suggested
> {code:java}
> org.apache.hudi.exception.HoodieRemoteException: 10.18x.xx.xx:34805 failed to
> respond at
> org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.refresh(RemoteHoodieTableFileSystemView.java:420)
> ~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
> at
> org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.sync(RemoteHoodieTableFileSystemView.java:484)
> ~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
> at
> org.apache.hudi.common.table.view.PriorityBasedFileSystemView.sync(PriorityBasedFileSystemView.java:257)
> ~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
> at
> org.apache.hudi.sink.partitioner.profile.WriteProfile.reload(WriteProfile.java:252)
> ~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
> at
> org.apache.hudi.sink.partitioner.BucketAssigner.reload(BucketAssigner.java:211)
> ~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
> at
> org.apache.hudi.sink.partitioner.BucketAssignFunction.notifyCheckpointComplete(BucketAssignFunction.java:234)
> ~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
> at
> org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator.notifyCheckpointComplete(AbstractUdfStreamOperator.java:130)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.StreamOperatorWrapper.notifyCheckpointComplete(StreamOperatorWrapper.java:99)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.SubtaskCheckpointCoordinatorImpl.notifyCheckpointComplete(SubtaskCheckpointCoordinatorImpl.java:334)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.StreamTask.notifyCheckpointComplete(StreamTask.java:1171)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$notifyCheckpointCompleteAsync$10(StreamTask.java:1136)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$notifyCheckpointOperation$12(StreamTask.java:1159)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.runThrowing(StreamTaskActionExecutor.java:50)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.mailbox.Mail.run(Mail.java:90)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMailsWhenDefaultActionUnavailable(MailboxProcessor.java:344)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMail(MailboxProcessor.java:330)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:202)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:684)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.StreamTask.executeInvoke(StreamTask.java:639)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.StreamTask.runWithCleanUpOnFail(StreamTask.java:650)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:623)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:779)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> org.apache.flink.runtime.taskmanager.Task.run(Task.java:566)
> ~[flink-dist_2.11-1.13.3-SNAPSHOT.jar:1.13.3-SNAPSHOT] at
> java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_201]Caused by:
> org.apache.http.NoHttpResponseException: 10.18x.xx.xx:34805 failed to respond
> at
> org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:165)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:167)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:271)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:184)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:88)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:184)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:107)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
> ~[flink-connector-vdp-1.13-SNAPSHOT.jar:?] at
> org.apache.http.client.fluent.Request.execute(Request.java:151)
> ~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
> at
> org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.executeRequest(RemoteHoodieTableFileSystemView.java:176)
> ~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
> at
> org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.refresh(RemoteHoodieTableFileSystemView.java:418)
> ~[hudi-flink-bundle_2.11-0.10-vipshop-SNAPSHOT.jar:0.10-vipshop-SNAPSHOT]
> ... 23 more {code}
--
This message was sent by Atlassian Jira
(v8.20.1#820001)