[ https://issues.apache.org/jira/browse/BEAM-5650?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Ismaël Mejía updated BEAM-5650: ------------------------------- Summary: Timeout exceptions while reading a lot of files from a bounded source like S3 with Flink runner (was: Timeout exceptions while reading a lot of files from a bounded source like S3 when using TextIO) > Timeout exceptions while reading a lot of files from a bounded source like S3 > with Flink runner > ----------------------------------------------------------------------------------------------- > > Key: BEAM-5650 > URL: https://issues.apache.org/jira/browse/BEAM-5650 > Project: Beam > Issue Type: Bug > Components: runner-core, runner-flink > Reporter: Ankit Jhalaria > Assignee: Ankit Jhalaria > Priority: Major > Time Spent: 0.5h > Remaining Estimate: 0h > > * Using TextIO, I was trying to read around 850 files. > * Getting this exception while using FlinkRunner > > {code:java} > //Caused by: org.apache.flink.runtime.client.JobExecutionException: > java.io.IOException: com.amazonaws.SdkClientException: Unable to execute HTTP > request: Timeout waiting for connection from pool at > org.apache.flink.runtime.minicluster.MiniCluster.executeJobBlocking(MiniCluster.java:625) > at > org.apache.flink.streaming.api.environment.LocalStreamEnvironment.execute(LocalStreamEnvironment.java:123) > at > org.apache.beam.runners.flink.FlinkPipelineExecutionEnvironment.executePipeline(FlinkPipelineExecutionEnvironment.java:175) > at org.apache.beam.runners.flink.FlinkRunner.run(FlinkRunner.java:115) ... > 28 more Caused by: java.io.IOException: com.amazonaws.SdkClientException: > Unable to execute HTTP request: Timeout waiting for connection from pool at > org.apache.beam.sdk.io.aws.s3.S3ReadableSeekableByteChannel.read(S3ReadableSeekableByteChannel.java:91) > at > org.apache.beam.sdk.io.CompressedSource$CompressedReader$CountingChannel.read(CompressedSource.java:382) > at sun.nio.ch.ChannelInputStream.read(ChannelInputStream.java:65) at > sun.nio.ch.ChannelInputStream.read(ChannelInputStream.java:109) at > sun.nio.ch.ChannelInputStream.read(ChannelInputStream.java:103) at > java.io.FilterInputStream.read(FilterInputStream.java:133) at > java.io.PushbackInputStream.read(PushbackInputStream.java:186) at > org.apache.beam.repackaged.beam_sdks_java_core.com.google.common.io.ByteStreams.read(ByteStreams.java:859) > at > org.apache.beam.sdk.io.Compression$3.readDecompressed(Compression.java:81) at > org.apache.beam.sdk.io.CompressedSource$CompressionMode.createDecompressingChannel(CompressedSource.java:110) > at > org.apache.beam.sdk.io.CompressedSource$CompressedReader.startReading(CompressedSource.java:417) > at > org.apache.beam.sdk.io.FileBasedSource$FileBasedReader.startImpl(FileBasedSource.java:476) > at > org.apache.beam.sdk.io.OffsetBasedSource$OffsetBasedReader.start(OffsetBasedSource.java:249) > at > org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource$BoundedToUnboundedSourceAdapter$ResidualSource.advance(UnboundedReadFromBoundedSource.java:456) > at > org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource$BoundedToUnboundedSourceAdapter$ResidualSource.access$300(UnboundedReadFromBoundedSource.java:434) > at > org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource$BoundedToUnboundedSourceAdapter$Reader.advance(UnboundedReadFromBoundedSource.java:286) > at > org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource$BoundedToUnboundedSourceAdapter$Reader.start(UnboundedReadFromBoundedSource.java:279) > at > org.apache.beam.runners.flink.metrics.ReaderInvocationUtil.invokeStart(ReaderInvocationUtil.java:51) > at > org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper.run(UnboundedSourceWrapper.java:250) > at > org.apache.beam.runners.flink.FlinkStreamingTransformTranslators$UnboundedSourceWrapperNoValueWithRecordId.run(FlinkStreamingTransformTranslators.java:1299) > at > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:87) > at > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:56) > at > org.apache.flink.streaming.runtime.tasks.SourceStreamTask.run(SourceStreamTask.java:99) > at > org.apache.flink.streaming.runtime.tasks.StoppableSourceStreamTask.run(StoppableSourceStreamTask.java:45) > at > org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:306) > at org.apache.flink.runtime.taskmanager.Task.run(Task.java:703) at > java.lang.Thread.run(Thread.java:748) Caused by: > com.amazonaws.SdkClientException: Unable to execute HTTP request: Timeout > waiting for connection from pool at > com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleRetryableException(AmazonHttpClient.java:1116) > at > com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1066) > at > com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:743) > at > com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:717) > at > com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699) > at > com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667) > at > com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649) > at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513) at > com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4368) at > com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4315) at > com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1416) > at > org.apache.beam.sdk.io.aws.s3.S3ReadableSeekableByteChannel.read(S3ReadableSeekableByteChannel.java:89) > ... 26 more Caused by: org.apache.http.conn.ConnectionPoolTimeoutException: > Timeout waiting for connection from pool at > org.apache.http.impl.conn.PoolingHttpClientConnectionManager.leaseConnection(PoolingHttpClientConnectionManager.java:313) > at > org.apache.http.impl.conn.PoolingHttpClientConnectionManager$1.get(PoolingHttpClientConnectionManager.java:279) > at sun.reflect.GeneratedMethodAccessor19.invoke(Unknown Source) at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) at > com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70) > at com.amazonaws.http.conn.$Proxy65.get(Unknown Source) at > org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:191) > at > org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:185) at > org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185) > at > org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83) > at > org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:56) > at > com.amazonaws.http.apache.client.impl.SdkHttpClient.execute(SdkHttpClient.java:72) > at > com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1238) > at > com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1058) > ... 36 more{code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)