[ https://issues.apache.org/jira/browse/HADOOP-19120?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17832120#comment-17832120 ]
ASF GitHub Bot commented on HADOOP-19120: ----------------------------------------- saxenapranav commented on code in PR #6633: URL: https://github.com/apache/hadoop/pull/6633#discussion_r1544293126 ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsConnectionManager.java: ########## @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.fs.azurebfs.services.kac.KeepAliveCache; +import org.apache.http.HttpClientConnection; +import org.apache.http.config.Registry; +import org.apache.http.config.SocketConfig; +import org.apache.http.conn.ConnectionPoolTimeoutException; +import org.apache.http.conn.ConnectionRequest; +import org.apache.http.conn.HttpClientConnectionManager; +import org.apache.http.conn.HttpClientConnectionOperator; +import org.apache.http.conn.routing.HttpRoute; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.impl.conn.DefaultHttpClientConnectionOperator; +import org.apache.http.impl.conn.ManagedHttpClientConnectionFactory; +import org.apache.http.protocol.HttpContext; +import org.apache.http.util.Asserts; + +/** + * AbfsConnectionManager is a custom implementation of {@link HttpClientConnectionManager}. + * This implementation manages connection-pooling heuristics and custom implementation + * of {@link ManagedHttpClientConnectionFactory}. + */ +public class AbfsConnectionManager implements HttpClientConnectionManager { + + private final KeepAliveCache kac = KeepAliveCache.getInstance(); + + private final AbfsConnFactory httpConnectionFactory; + + private final HttpClientConnectionOperator connectionOperator; + + public AbfsConnectionManager(Registry<ConnectionSocketFactory> socketFactoryRegistry, + AbfsConnFactory connectionFactory) { + this.httpConnectionFactory = connectionFactory; + connectionOperator = new DefaultHttpClientConnectionOperator( + socketFactoryRegistry, null, null); + } + + @Override + public ConnectionRequest requestConnection(final HttpRoute route, + final Object state) { + return new ConnectionRequest() { + @Override + public HttpClientConnection get(final long timeout, + final TimeUnit timeUnit) + throws InterruptedException, ExecutionException, + ConnectionPoolTimeoutException { + try { + HttpClientConnection client = kac.get(route); + if (client != null && client.isOpen()) { + return client; + } + return httpConnectionFactory.create(route, null); + } catch (IOException ex) { + throw new ExecutionException(ex); + } + } + + @Override + public boolean cancel() { + return false; + } + }; + } + + /** + * Releases a connection for reuse. It can be reused only if validDuration is greater than 0. + * This method is called by {@link org.apache.http.impl.execchain} internal class `ConnectionHolder`. + * If it wants to reuse the connection, it will send a non-zero validDuration, else it will send 0. + * @param conn the connection to release + * @param newState the new state of the connection + * @param validDuration the duration for which the connection is valid + * @param timeUnit the time unit for the validDuration + */ + @Override + public void releaseConnection(final HttpClientConnection conn, + final Object newState, + final long validDuration, + final TimeUnit timeUnit) { + if (validDuration == 0) { + return; + } + if (conn.isOpen() && conn instanceof AbfsManagedApacheHttpConnection) { + HttpRoute route = ((AbfsManagedApacheHttpConnection) conn).getHttpRoute(); + if (route != null) { + kac.put(route, conn); + } + } + } + + @Override + public void connect(final HttpClientConnection conn, + final HttpRoute route, + final int connectTimeout, + final HttpContext context) throws IOException { + Asserts.check(conn instanceof AbfsManagedApacheHttpConnection, + "Connection not obtained from this manager"); + long start = System.currentTimeMillis(); + connectionOperator.connect((AbfsManagedApacheHttpConnection) conn, + route.getTargetHost(), route.getLocalSocketAddress(), + connectTimeout, SocketConfig.DEFAULT, context); + if (context instanceof AbfsManagedHttpContext) { + ((AbfsManagedHttpContext) context).setConnectTime( + System.currentTimeMillis() - start); + } + } + + @Override + public void upgrade(final HttpClientConnection conn, + final HttpRoute route, + final HttpContext context) throws IOException { + Asserts.check(conn instanceof AbfsManagedApacheHttpConnection, Review Comment: Earlier this and the connection class was public, which can lead to outside of abfs, creation of connection. Have made them package-protected now. Don't need these assert check, would remove them. > [ABFS]: ApacheHttpClient adaptation as network library > ------------------------------------------------------ > > Key: HADOOP-19120 > URL: https://issues.apache.org/jira/browse/HADOOP-19120 > Project: Hadoop Common > Issue Type: Sub-task > Components: fs/azure > Affects Versions: 3.5.0 > Reporter: Pranav Saxena > Assignee: Pranav Saxena > Priority: Major > Labels: pull-request-available > > Apache HttpClient is more feature-rich and flexible and gives application > more granular control over networking parameter. > ABFS currently relies on the JDK-net library. This library is managed by > OpenJDK and has no performance problem. However, it limits the application's > control over networking, and there are very few APIs and hooks exposed that > the application can use to get metrics, choose which and when a connection > should be reused. ApacheHttpClient will give important hooks to fetch > important metrics and control networking parameters. > A custom implementation of connection-pool is used. The implementation is > adapted from the JDK8 connection pooling. Reasons for doing it: > 1. PoolingHttpClientConnectionManager heuristic caches all the reusable > connections it has created. JDK's implementation only caches limited number > of connections. The limit is given by JVM system property > "http.maxConnections". If there is no system-property, it defaults to 5. > Connection-establishment latency increased with all the connections were > cached. Hence, adapting the pooling heuristic of JDK netlib, > 2. In PoolingHttpClientConnectionManager, it expects the application to > provide `setMaxPerRoute` and `setMaxTotal`, which the implementation uses as > the total number of connections it can create. For application using ABFS, it > is not feasible to provide a value in the initialisation of the > connectionManager. JDK's implementation has no cap on the number of > connections it can have opened on a moment. Hence, adapting the pooling > heuristic of JDK netlib, -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org