[
https://issues.apache.org/jira/browse/FLINK-17730?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17110242#comment-17110242
]
Robert Metzger commented on FLINK-17730:
----------------------------------------
Indeed, but these are very rare cases. We currently have ~100 builds a day, and
maybe 5 are failing because of that. and even if they are failing, they are
usually <10 minutes.
It's a tradeoff between "green builds" and wait time.
The real underlying issue is the network stability of our CI servers. If they
were more stable, then we would not have these issues at all.
> HadoopS3RecoverableWriterITCase.testRecoverAfterMultiplePersistsStateWithMultiPart
> times out
> --------------------------------------------------------------------------------------------
>
> Key: FLINK-17730
> URL: https://issues.apache.org/jira/browse/FLINK-17730
> Project: Flink
> Issue Type: Bug
> Components: Build System / Azure Pipelines, FileSystems, Tests
> Reporter: Robert Metzger
> Assignee: Robert Metzger
> Priority: Major
> Labels: pull-request-available, test-stability
>
> https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=1374&view=logs&j=d44f43ce-542c-597d-bf94-b0718c71e5e8&t=34f486e1-e1e4-5dd2-9c06-bfdd9b9c74a8
> After 5 minutes
> {code}
> 2020-05-15T06:56:38.1688341Z "main" #1 prio=5 os_prio=0
> tid=0x00007fa10800b800 nid=0x1161 runnable [0x00007fa110959000]
> 2020-05-15T06:56:38.1688709Z java.lang.Thread.State: RUNNABLE
> 2020-05-15T06:56:38.1689028Z at
> java.net.SocketInputStream.socketRead0(Native Method)
> 2020-05-15T06:56:38.1689496Z at
> java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
> 2020-05-15T06:56:38.1689921Z at
> java.net.SocketInputStream.read(SocketInputStream.java:171)
> 2020-05-15T06:56:38.1690316Z at
> java.net.SocketInputStream.read(SocketInputStream.java:141)
> 2020-05-15T06:56:38.1690723Z at
> sun.security.ssl.InputRecord.readFully(InputRecord.java:465)
> 2020-05-15T06:56:38.1691196Z at
> sun.security.ssl.InputRecord.readV3Record(InputRecord.java:593)
> 2020-05-15T06:56:38.1691608Z at
> sun.security.ssl.InputRecord.read(InputRecord.java:532)
> 2020-05-15T06:56:38.1692023Z at
> sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:975)
> 2020-05-15T06:56:38.1692558Z - locked <0x00000000b94644f8> (a
> java.lang.Object)
> 2020-05-15T06:56:38.1692946Z at
> sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:933)
> 2020-05-15T06:56:38.1693371Z at
> sun.security.ssl.AppInputStream.read(AppInputStream.java:105)
> 2020-05-15T06:56:38.1694151Z - locked <0x00000000b9464d20> (a
> sun.security.ssl.AppInputStream)
> 2020-05-15T06:56:38.1694908Z at
> org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137)
> 2020-05-15T06:56:38.1695475Z at
> org.apache.http.impl.io.SessionInputBufferImpl.read(SessionInputBufferImpl.java:198)
> 2020-05-15T06:56:38.1696007Z at
> org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:176)
> 2020-05-15T06:56:38.1696509Z at
> org.apache.http.conn.EofSensorInputStream.read(EofSensorInputStream.java:135)
> 2020-05-15T06:56:38.1696993Z at
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1697466Z at
> com.amazonaws.event.ProgressInputStream.read(ProgressInputStream.java:180)
> 2020-05-15T06:56:38.1698069Z at
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1698567Z at
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1699041Z at
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1699624Z at
> com.amazonaws.event.ProgressInputStream.read(ProgressInputStream.java:180)
> 2020-05-15T06:56:38.1700090Z at
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1700584Z at
> com.amazonaws.util.LengthCheckInputStream.read(LengthCheckInputStream.java:107)
> 2020-05-15T06:56:38.1701282Z at
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1701800Z at
> com.amazonaws.services.s3.internal.S3AbortableInputStream.read(S3AbortableInputStream.java:125)
> 2020-05-15T06:56:38.1702328Z at
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1702804Z at
> org.apache.hadoop.fs.s3a.S3AInputStream.lambda$read$3(S3AInputStream.java:445)
> 2020-05-15T06:56:38.1703270Z at
> org.apache.hadoop.fs.s3a.S3AInputStream$$Lambda$42/1204178174.execute(Unknown
> Source)
> 2020-05-15T06:56:38.1703677Z at
> org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:109)
> 2020-05-15T06:56:38.1704090Z at
> org.apache.hadoop.fs.s3a.Invoker.lambda$retry$3(Invoker.java:260)
> 2020-05-15T06:56:38.1704607Z at
> org.apache.hadoop.fs.s3a.Invoker$$Lambda$23/1991724700.execute(Unknown Source)
> 2020-05-15T06:56:38.1705115Z at
> org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:317)
> 2020-05-15T06:56:38.1705551Z at
> org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:256)
> 2020-05-15T06:56:38.1705937Z at
> org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:231)
> 2020-05-15T06:56:38.1706363Z at
> org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:441)
> 2020-05-15T06:56:38.1707052Z - locked <0x00000000b7d98b60> (a
> org.apache.hadoop.fs.s3a.S3AInputStream)
> 2020-05-15T06:56:38.1707438Z at
> java.io.DataInputStream.read(DataInputStream.java:149)
> 2020-05-15T06:56:38.1707904Z at
> org.apache.flink.runtime.fs.hdfs.HadoopDataInputStream.read(HadoopDataInputStream.java:94)
> 2020-05-15T06:56:38.1708366Z at
> sun.nio.cs.StreamDecoder.readBytes(StreamDecoder.java:284)
> 2020-05-15T06:56:38.1708770Z at
> sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:326)
> 2020-05-15T06:56:38.1709150Z at
> sun.nio.cs.StreamDecoder.read(StreamDecoder.java:178)
> 2020-05-15T06:56:38.1709784Z - locked <0x00000000b7d9a2b0> (a
> java.io.InputStreamReader)
> 2020-05-15T06:56:38.1710170Z at
> java.io.InputStreamReader.read(InputStreamReader.java:184)
> 2020-05-15T06:56:38.1710557Z at
> java.io.BufferedReader.fill(BufferedReader.java:161)
> 2020-05-15T06:56:38.1710956Z at
> java.io.BufferedReader.readLine(BufferedReader.java:324)
> 2020-05-15T06:56:38.1711552Z - locked <0x00000000b7d9a2b0> (a
> java.io.InputStreamReader)
> 2020-05-15T06:56:38.1711930Z at
> java.io.BufferedReader.readLine(BufferedReader.java:389)
> 2020-05-15T06:56:38.1712451Z at
> org.apache.flink.fs.s3hadoop.HadoopS3RecoverableWriterITCase.getContentsOfFile(HadoopS3RecoverableWriterITCase.java:423)
> 2020-05-15T06:56:38.1713152Z at
> org.apache.flink.fs.s3hadoop.HadoopS3RecoverableWriterITCase.testResumeAfterMultiplePersist(HadoopS3RecoverableWriterITCase.java:411)
> 2020-05-15T06:56:38.1713922Z at
> org.apache.flink.fs.s3hadoop.HadoopS3RecoverableWriterITCase.testResumeAfterMultiplePersistWithMultiPartUploads(HadoopS3RecoverableWriterITCase.java:364)
> 2020-05-15T06:56:38.1714804Z at
> org.apache.flink.fs.s3hadoop.HadoopS3RecoverableWriterITCase.testRecoverAfterMultiplePersistsStateWithMultiPart(HadoopS3RecoverableWriterITCase.java:344)
> 2020-05-15T06:56:38.1715395Z at
> sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 2020-05-15T06:56:38.1715807Z at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 2020-05-15T06:56:38.1716313Z at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 2020-05-15T06:56:38.1716754Z at
> java.lang.reflect.Method.invoke(Method.java:498)
> 2020-05-15T06:56:38.1717181Z at
> org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
> 2020-05-15T06:56:38.1717696Z at
> org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
> 2020-05-15T06:56:38.1718191Z at
> org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
> 2020-05-15T06:56:38.1718687Z at
> org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
> 2020-05-15T06:56:38.1719181Z at
> org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
> 2020-05-15T06:56:38.1719822Z at
> org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
> 2020-05-15T06:56:38.1720462Z at
> org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
> 2020-05-15T06:56:38.1720852Z at
> org.junit.rules.RunRules.evaluate(RunRules.java:20)
> 2020-05-15T06:56:38.1721321Z at
> org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
> 2020-05-15T06:56:38.1721770Z at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
> 2020-05-15T06:56:38.1722272Z at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
> 2020-05-15T06:56:38.1722731Z at
> org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
> 2020-05-15T06:56:38.1723133Z at
> org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
> 2020-05-15T06:56:38.1723561Z at
> org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
> 2020-05-15T06:56:38.1724113Z at
> org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
> 2020-05-15T06:56:38.1724630Z at
> org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
> 2020-05-15T06:56:38.1725086Z at
> org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
> 2020-05-15T06:56:38.1725553Z at
> org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
> 2020-05-15T06:56:38.1726019Z at
> org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
> 2020-05-15T06:56:38.1726422Z at
> org.junit.rules.RunRules.evaluate(RunRules.java:20)
> 2020-05-15T06:56:38.1726808Z at
> org.junit.runners.ParentRunner.run(ParentRunner.java:363)
> 2020-05-15T06:56:38.1727227Z at
> org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
> 2020-05-15T06:56:38.1727727Z at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
> 2020-05-15T06:56:38.1728245Z at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
> 2020-05-15T06:56:38.1728733Z at
> org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
> 2020-05-15T06:56:38.1729350Z at
> org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
> 2020-05-15T06:56:38.1729898Z at
> org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
> 2020-05-15T06:56:38.1730401Z at
> org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
> 2020-05-15T06:56:38.1730874Z at
> org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)