[
https://issues.apache.org/jira/browse/BEAM-6103?focusedWorklogId=168145&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-168145
]
ASF GitHub Bot logged work on BEAM-6103:
----------------------------------------
Author: ASF GitHub Bot
Created on: 21/Nov/18 02:37
Start Date: 21/Nov/18 02:37
Worklog Time Spent: 10m
Work Description: chamikaramj closed pull request #7097: [BEAM-6103] Adds
support for setting a HTTP write timeout for BigQueryIO
URL: https://github.com/apache/beam/pull/7097
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/util/RetryHttpRequestInitializer.java
b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/util/RetryHttpRequestInitializer.java
index 92b52adc072..92982276c8d 100644
---
a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/util/RetryHttpRequestInitializer.java
+++
b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/util/RetryHttpRequestInitializer.java
@@ -58,6 +58,8 @@
/** Http response timeout to use for hanging gets. */
private static final int HANGING_GET_TIMEOUT_SEC = 80;
+ private int writeTimeout;
+
/** Handlers used to provide additional logging information on unsuccessful
HTTP requests. */
private static class LoggingHttpBackOffHandler
implements HttpIOExceptionHandler, HttpUnsuccessfulResponseHandler {
@@ -214,6 +216,7 @@ public RetryHttpRequestInitializer(
this.sleeper = sleeper;
this.ignoredResponseCodes.addAll(additionalIgnoredResponseCodes);
this.responseInterceptor = responseInterceptor;
+ this.writeTimeout = 0;
}
@Override
@@ -221,6 +224,7 @@ public void initialize(HttpRequest request) throws
IOException {
// Set a timeout for hanging-gets.
// TODO: Do this exclusively for work requests.
request.setReadTimeout(HANGING_GET_TIMEOUT_SEC * 1000);
+ request.setWriteTimeout(this.writeTimeout);
LoggingHttpBackOffHandler loggingHttpBackOffHandler =
new LoggingHttpBackOffHandler(
@@ -241,4 +245,8 @@ public void initialize(HttpRequest request) throws
IOException {
request.setResponseInterceptor(responseInterceptor);
}
}
+
+ public void setWriteTimeout(int writeTimeout) {
+ this.writeTimeout = writeTimeout;
+ }
}
diff --git
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryOptions.java
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryOptions.java
index 6ab07a49cbc..d6a80a790c8 100644
---
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryOptions.java
+++
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryOptions.java
@@ -37,4 +37,12 @@
String getTempDatasetId();
void setTempDatasetId(String value);
+
+ @Description(
+ "If specified, the given write timeout will be set to HTTP requests
created to"
+ + "communicate with BigQuery service.")
+ @Default.Integer(0)
+ Integer getHTTPWriteTimeout();
+
+ void setHTTPWriteTimeout(Integer timeout);
}
diff --git
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
index 809b80a302c..cbdb44f8ade 100644
---
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
+++
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryServicesImpl.java
@@ -905,13 +905,16 @@ private static boolean nextBackOff(Sleeper sleeper,
BackOff backoff) throws Inte
/** Returns a BigQuery client builder using the specified {@link
BigQueryOptions}. */
private static Bigquery.Builder newBigQueryClient(BigQueryOptions options) {
+ RetryHttpRequestInitializer httpRequestInitializer =
+ new RetryHttpRequestInitializer(ImmutableList.of(404));
+ httpRequestInitializer.setWriteTimeout(options.getHTTPWriteTimeout());
return new Bigquery.Builder(
Transport.getTransport(),
Transport.getJsonFactory(),
chainHttpRequestInitializer(
options.getGcpCredential(),
// Do not log 404. It clutters the output and is possibly even
required by the caller.
- new RetryHttpRequestInitializer(ImmutableList.of(404))))
+ httpRequestInitializer))
.setApplicationName(options.getAppName())
.setGoogleClientRequestInitializer(options.getGoogleApiTrace());
}
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 168145)
Time Spent: 1h (was: 50m)
> Allow setting a HTTP write timeout to requests to BigQuery
> ----------------------------------------------------------
>
> Key: BEAM-6103
> URL: https://issues.apache.org/jira/browse/BEAM-6103
> Project: Beam
> Issue Type: Bug
> Components: io-java-gcp
> Affects Versions: Not applicable
> Reporter: Chamikara Jayalath
> Assignee: Chamikara Jayalath
> Priority: Major
> Time Spent: 1h
> Remaining Estimate: 0h
>
> Several users have reported that very occasionally some of the streaming
> insert requests to BigQuery are getting stuck.
>
> This can be mitigated by setting a write timeout for sockets and allowing
> runners to retry. But Java does not support setting write timeouts for
> non-NIO sockets [1] and this bug affects Google HTTP client library used by
> GCP IO.
>
> Google HTTP library was recently updated to support write timeouts [2]. So we
> can update BigQueryIO to utilize this feature and allow setting a write
> timeout. Setting write timeout results in more threads being created. So this
> path can be slightly expensive and hence should not be made the default path.
> [1] [https://bugs.java.com/bugdatabase/view_bug.do?bug_id=4031100]
> [2]
> [https://github.com/googleapis/google-http-java-client/releases/tag/v1.27.0]
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)