[ 
https://issues.apache.org/jira/browse/FLINK-17730?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17110242#comment-17110242
 ] 

Robert Metzger commented on FLINK-17730:
----------------------------------------

Indeed, but these are very rare cases. We currently have ~100 builds a day, and 
maybe 5 are failing because of that. and even if they are failing, they are 
usually  <10 minutes.
It's a tradeoff between "green builds" and wait time.
The real underlying issue is the network stability of our CI servers. If they 
were more stable, then we would not have these issues at all.

> HadoopS3RecoverableWriterITCase.testRecoverAfterMultiplePersistsStateWithMultiPart
>  times out
> --------------------------------------------------------------------------------------------
>
>                 Key: FLINK-17730
>                 URL: https://issues.apache.org/jira/browse/FLINK-17730
>             Project: Flink
>          Issue Type: Bug
>          Components: Build System / Azure Pipelines, FileSystems, Tests
>            Reporter: Robert Metzger
>            Assignee: Robert Metzger
>            Priority: Major
>              Labels: pull-request-available, test-stability
>
> https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=1374&view=logs&j=d44f43ce-542c-597d-bf94-b0718c71e5e8&t=34f486e1-e1e4-5dd2-9c06-bfdd9b9c74a8
> After 5 minutes 
> {code}
> 2020-05-15T06:56:38.1688341Z "main" #1 prio=5 os_prio=0 
> tid=0x00007fa10800b800 nid=0x1161 runnable [0x00007fa110959000]
> 2020-05-15T06:56:38.1688709Z    java.lang.Thread.State: RUNNABLE
> 2020-05-15T06:56:38.1689028Z  at 
> java.net.SocketInputStream.socketRead0(Native Method)
> 2020-05-15T06:56:38.1689496Z  at 
> java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
> 2020-05-15T06:56:38.1689921Z  at 
> java.net.SocketInputStream.read(SocketInputStream.java:171)
> 2020-05-15T06:56:38.1690316Z  at 
> java.net.SocketInputStream.read(SocketInputStream.java:141)
> 2020-05-15T06:56:38.1690723Z  at 
> sun.security.ssl.InputRecord.readFully(InputRecord.java:465)
> 2020-05-15T06:56:38.1691196Z  at 
> sun.security.ssl.InputRecord.readV3Record(InputRecord.java:593)
> 2020-05-15T06:56:38.1691608Z  at 
> sun.security.ssl.InputRecord.read(InputRecord.java:532)
> 2020-05-15T06:56:38.1692023Z  at 
> sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:975)
> 2020-05-15T06:56:38.1692558Z  - locked <0x00000000b94644f8> (a 
> java.lang.Object)
> 2020-05-15T06:56:38.1692946Z  at 
> sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:933)
> 2020-05-15T06:56:38.1693371Z  at 
> sun.security.ssl.AppInputStream.read(AppInputStream.java:105)
> 2020-05-15T06:56:38.1694151Z  - locked <0x00000000b9464d20> (a 
> sun.security.ssl.AppInputStream)
> 2020-05-15T06:56:38.1694908Z  at 
> org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137)
> 2020-05-15T06:56:38.1695475Z  at 
> org.apache.http.impl.io.SessionInputBufferImpl.read(SessionInputBufferImpl.java:198)
> 2020-05-15T06:56:38.1696007Z  at 
> org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:176)
> 2020-05-15T06:56:38.1696509Z  at 
> org.apache.http.conn.EofSensorInputStream.read(EofSensorInputStream.java:135)
> 2020-05-15T06:56:38.1696993Z  at 
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1697466Z  at 
> com.amazonaws.event.ProgressInputStream.read(ProgressInputStream.java:180)
> 2020-05-15T06:56:38.1698069Z  at 
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1698567Z  at 
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1699041Z  at 
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1699624Z  at 
> com.amazonaws.event.ProgressInputStream.read(ProgressInputStream.java:180)
> 2020-05-15T06:56:38.1700090Z  at 
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1700584Z  at 
> com.amazonaws.util.LengthCheckInputStream.read(LengthCheckInputStream.java:107)
> 2020-05-15T06:56:38.1701282Z  at 
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1701800Z  at 
> com.amazonaws.services.s3.internal.S3AbortableInputStream.read(S3AbortableInputStream.java:125)
> 2020-05-15T06:56:38.1702328Z  at 
> com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
> 2020-05-15T06:56:38.1702804Z  at 
> org.apache.hadoop.fs.s3a.S3AInputStream.lambda$read$3(S3AInputStream.java:445)
> 2020-05-15T06:56:38.1703270Z  at 
> org.apache.hadoop.fs.s3a.S3AInputStream$$Lambda$42/1204178174.execute(Unknown 
> Source)
> 2020-05-15T06:56:38.1703677Z  at 
> org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:109)
> 2020-05-15T06:56:38.1704090Z  at 
> org.apache.hadoop.fs.s3a.Invoker.lambda$retry$3(Invoker.java:260)
> 2020-05-15T06:56:38.1704607Z  at 
> org.apache.hadoop.fs.s3a.Invoker$$Lambda$23/1991724700.execute(Unknown Source)
> 2020-05-15T06:56:38.1705115Z  at 
> org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:317)
> 2020-05-15T06:56:38.1705551Z  at 
> org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:256)
> 2020-05-15T06:56:38.1705937Z  at 
> org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:231)
> 2020-05-15T06:56:38.1706363Z  at 
> org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:441)
> 2020-05-15T06:56:38.1707052Z  - locked <0x00000000b7d98b60> (a 
> org.apache.hadoop.fs.s3a.S3AInputStream)
> 2020-05-15T06:56:38.1707438Z  at 
> java.io.DataInputStream.read(DataInputStream.java:149)
> 2020-05-15T06:56:38.1707904Z  at 
> org.apache.flink.runtime.fs.hdfs.HadoopDataInputStream.read(HadoopDataInputStream.java:94)
> 2020-05-15T06:56:38.1708366Z  at 
> sun.nio.cs.StreamDecoder.readBytes(StreamDecoder.java:284)
> 2020-05-15T06:56:38.1708770Z  at 
> sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:326)
> 2020-05-15T06:56:38.1709150Z  at 
> sun.nio.cs.StreamDecoder.read(StreamDecoder.java:178)
> 2020-05-15T06:56:38.1709784Z  - locked <0x00000000b7d9a2b0> (a 
> java.io.InputStreamReader)
> 2020-05-15T06:56:38.1710170Z  at 
> java.io.InputStreamReader.read(InputStreamReader.java:184)
> 2020-05-15T06:56:38.1710557Z  at 
> java.io.BufferedReader.fill(BufferedReader.java:161)
> 2020-05-15T06:56:38.1710956Z  at 
> java.io.BufferedReader.readLine(BufferedReader.java:324)
> 2020-05-15T06:56:38.1711552Z  - locked <0x00000000b7d9a2b0> (a 
> java.io.InputStreamReader)
> 2020-05-15T06:56:38.1711930Z  at 
> java.io.BufferedReader.readLine(BufferedReader.java:389)
> 2020-05-15T06:56:38.1712451Z  at 
> org.apache.flink.fs.s3hadoop.HadoopS3RecoverableWriterITCase.getContentsOfFile(HadoopS3RecoverableWriterITCase.java:423)
> 2020-05-15T06:56:38.1713152Z  at 
> org.apache.flink.fs.s3hadoop.HadoopS3RecoverableWriterITCase.testResumeAfterMultiplePersist(HadoopS3RecoverableWriterITCase.java:411)
> 2020-05-15T06:56:38.1713922Z  at 
> org.apache.flink.fs.s3hadoop.HadoopS3RecoverableWriterITCase.testResumeAfterMultiplePersistWithMultiPartUploads(HadoopS3RecoverableWriterITCase.java:364)
> 2020-05-15T06:56:38.1714804Z  at 
> org.apache.flink.fs.s3hadoop.HadoopS3RecoverableWriterITCase.testRecoverAfterMultiplePersistsStateWithMultiPart(HadoopS3RecoverableWriterITCase.java:344)
> 2020-05-15T06:56:38.1715395Z  at 
> sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 2020-05-15T06:56:38.1715807Z  at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 2020-05-15T06:56:38.1716313Z  at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 2020-05-15T06:56:38.1716754Z  at 
> java.lang.reflect.Method.invoke(Method.java:498)
> 2020-05-15T06:56:38.1717181Z  at 
> org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
> 2020-05-15T06:56:38.1717696Z  at 
> org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
> 2020-05-15T06:56:38.1718191Z  at 
> org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
> 2020-05-15T06:56:38.1718687Z  at 
> org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
> 2020-05-15T06:56:38.1719181Z  at 
> org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
> 2020-05-15T06:56:38.1719822Z  at 
> org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
> 2020-05-15T06:56:38.1720462Z  at 
> org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
> 2020-05-15T06:56:38.1720852Z  at 
> org.junit.rules.RunRules.evaluate(RunRules.java:20)
> 2020-05-15T06:56:38.1721321Z  at 
> org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
> 2020-05-15T06:56:38.1721770Z  at 
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
> 2020-05-15T06:56:38.1722272Z  at 
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
> 2020-05-15T06:56:38.1722731Z  at 
> org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
> 2020-05-15T06:56:38.1723133Z  at 
> org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
> 2020-05-15T06:56:38.1723561Z  at 
> org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
> 2020-05-15T06:56:38.1724113Z  at 
> org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
> 2020-05-15T06:56:38.1724630Z  at 
> org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
> 2020-05-15T06:56:38.1725086Z  at 
> org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
> 2020-05-15T06:56:38.1725553Z  at 
> org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
> 2020-05-15T06:56:38.1726019Z  at 
> org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
> 2020-05-15T06:56:38.1726422Z  at 
> org.junit.rules.RunRules.evaluate(RunRules.java:20)
> 2020-05-15T06:56:38.1726808Z  at 
> org.junit.runners.ParentRunner.run(ParentRunner.java:363)
> 2020-05-15T06:56:38.1727227Z  at 
> org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
> 2020-05-15T06:56:38.1727727Z  at 
> org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
> 2020-05-15T06:56:38.1728245Z  at 
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
> 2020-05-15T06:56:38.1728733Z  at 
> org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
> 2020-05-15T06:56:38.1729350Z  at 
> org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
> 2020-05-15T06:56:38.1729898Z  at 
> org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
> 2020-05-15T06:56:38.1730401Z  at 
> org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
> 2020-05-15T06:56:38.1730874Z  at 
> org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to