[ https://issues.apache.org/jira/browse/HADOOP-19120?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17832769#comment-17832769 ]
ASF GitHub Bot commented on HADOOP-19120: ----------------------------------------- anujmodi2021 commented on code in PR #6633: URL: https://github.com/apache/hadoop/pull/6633#discussion_r1546081799 ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java: ########## @@ -0,0 +1,422 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsApacheHttpExpect100Exception; +import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; +import org.apache.http.Header; +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpDelete; +import org.apache.http.client.methods.HttpEntityEnclosingRequestBase; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpHead; +import org.apache.http.client.methods.HttpPatch; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.client.methods.HttpRequestBase; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.util.EntityUtils; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APACHE_IMPL; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_GET; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_HEAD; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_POST; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_REQUEST_ID; +import static org.apache.http.entity.ContentType.TEXT_PLAIN; + +/** + * Implementation of {@link HttpOperation} for orchestrating server calls using + * Apache Http Client. + */ +public class AbfsAHCHttpOperation extends HttpOperation { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsAHCHttpOperation.class); + + /** + * Map to store the AbfsApacheHttpClient. Each instance of AbfsClient to have + * a unique AbfsApacheHttpClient instance. The key of the map is the UUID of the client. + */ + private static final Map<String, AbfsApacheHttpClient> + ABFS_APACHE_HTTP_CLIENT_MAP = new HashMap<>(); + + private AbfsApacheHttpClient abfsApacheHttpClient; + + private HttpRequestBase httpRequestBase; + + private HttpResponse httpResponse; + + private AbfsManagedHttpContext abfsHttpClientContext; + + private final AbfsRestOperationType abfsRestOperationType; + + private boolean connectionDisconnectedOnError = false; + + private AbfsApacheHttpExpect100Exception abfsApacheHttpExpect100Exception; + + private final boolean isPayloadRequest; + + private List<AbfsHttpHeader> requestHeaders; + + private AbfsAHCHttpOperation(final URL url, + final String method, + final List<AbfsHttpHeader> requestHeaders, + final AbfsRestOperationType abfsRestOperationType) { + super(LOG, url, method); + this.abfsRestOperationType = abfsRestOperationType; + this.requestHeaders = requestHeaders; + this.isPayloadRequest = isPayloadRequest(method); + } + + public AbfsAHCHttpOperation(final URL url, + final String method, + final List<AbfsHttpHeader> requestHeaders, + final AbfsConfiguration abfsConfiguration, + final String clientId, + final AbfsRestOperationType abfsRestOperationType) { + super(LOG, url, method); + this.abfsRestOperationType = abfsRestOperationType; + this.requestHeaders = requestHeaders; + setAbfsApacheHttpClient(abfsConfiguration, clientId); + this.isPayloadRequest = isPayloadRequest(method); + } + + public AbfsAHCHttpOperation(final URL url, + final String method, + final ArrayList<AbfsHttpHeader> requestHeaders, + final int httpStatus) { + this(url, method, requestHeaders, null); + setStatusCode(httpStatus); + } + + private void setAbfsApacheHttpClient(final AbfsConfiguration abfsConfiguration, + final String clientId) { Review Comment: Outdated. Please ignore > [ABFS]: ApacheHttpClient adaptation as network library > ------------------------------------------------------ > > Key: HADOOP-19120 > URL: https://issues.apache.org/jira/browse/HADOOP-19120 > Project: Hadoop Common > Issue Type: Sub-task > Components: fs/azure > Affects Versions: 3.5.0 > Reporter: Pranav Saxena > Assignee: Pranav Saxena > Priority: Major > Labels: pull-request-available > > Apache HttpClient is more feature-rich and flexible and gives application > more granular control over networking parameter. > ABFS currently relies on the JDK-net library. This library is managed by > OpenJDK and has no performance problem. However, it limits the application's > control over networking, and there are very few APIs and hooks exposed that > the application can use to get metrics, choose which and when a connection > should be reused. ApacheHttpClient will give important hooks to fetch > important metrics and control networking parameters. > A custom implementation of connection-pool is used. The implementation is > adapted from the JDK8 connection pooling. Reasons for doing it: > 1. PoolingHttpClientConnectionManager heuristic caches all the reusable > connections it has created. JDK's implementation only caches limited number > of connections. The limit is given by JVM system property > "http.maxConnections". If there is no system-property, it defaults to 5. > Connection-establishment latency increased with all the connections were > cached. Hence, adapting the pooling heuristic of JDK netlib, > 2. In PoolingHttpClientConnectionManager, it expects the application to > provide `setMaxPerRoute` and `setMaxTotal`, which the implementation uses as > the total number of connections it can create. For application using ABFS, it > is not feasible to provide a value in the initialisation of the > connectionManager. JDK's implementation has no cap on the number of > connections it can have opened on a moment. Hence, adapting the pooling > heuristic of JDK netlib, -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org