[ 
https://issues.apache.org/jira/browse/HADOOP-19120?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17832120#comment-17832120
 ] 

ASF GitHub Bot commented on HADOOP-19120:
-----------------------------------------

saxenapranav commented on code in PR #6633:
URL: https://github.com/apache/hadoop/pull/6633#discussion_r1544293126


##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsConnectionManager.java:
##########
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+import java.io.IOException;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.fs.azurebfs.services.kac.KeepAliveCache;
+import org.apache.http.HttpClientConnection;
+import org.apache.http.config.Registry;
+import org.apache.http.config.SocketConfig;
+import org.apache.http.conn.ConnectionPoolTimeoutException;
+import org.apache.http.conn.ConnectionRequest;
+import org.apache.http.conn.HttpClientConnectionManager;
+import org.apache.http.conn.HttpClientConnectionOperator;
+import org.apache.http.conn.routing.HttpRoute;
+import org.apache.http.conn.socket.ConnectionSocketFactory;
+import org.apache.http.impl.conn.DefaultHttpClientConnectionOperator;
+import org.apache.http.impl.conn.ManagedHttpClientConnectionFactory;
+import org.apache.http.protocol.HttpContext;
+import org.apache.http.util.Asserts;
+
+/**
+ * AbfsConnectionManager is a custom implementation of {@link 
HttpClientConnectionManager}.
+ * This implementation manages connection-pooling heuristics and custom 
implementation
+ * of {@link ManagedHttpClientConnectionFactory}.
+ */
+public class AbfsConnectionManager implements HttpClientConnectionManager {
+
+  private final KeepAliveCache kac = KeepAliveCache.getInstance();
+
+  private final AbfsConnFactory httpConnectionFactory;
+
+  private final HttpClientConnectionOperator connectionOperator;
+
+  public AbfsConnectionManager(Registry<ConnectionSocketFactory> 
socketFactoryRegistry,
+      AbfsConnFactory connectionFactory) {
+    this.httpConnectionFactory = connectionFactory;
+    connectionOperator = new DefaultHttpClientConnectionOperator(
+        socketFactoryRegistry, null, null);
+  }
+
+  @Override
+  public ConnectionRequest requestConnection(final HttpRoute route,
+      final Object state) {
+    return new ConnectionRequest() {
+      @Override
+      public HttpClientConnection get(final long timeout,
+          final TimeUnit timeUnit)
+          throws InterruptedException, ExecutionException,
+          ConnectionPoolTimeoutException {
+        try {
+          HttpClientConnection client = kac.get(route);
+          if (client != null && client.isOpen()) {
+            return client;
+          }
+          return httpConnectionFactory.create(route, null);
+        } catch (IOException ex) {
+          throw new ExecutionException(ex);
+        }
+      }
+
+      @Override
+      public boolean cancel() {
+        return false;
+      }
+    };
+  }
+
+  /**
+   * Releases a connection for reuse. It can be reused only if validDuration 
is greater than 0.
+   * This method is called by {@link org.apache.http.impl.execchain} internal 
class `ConnectionHolder`.
+   * If it wants to reuse the connection, it will send a non-zero 
validDuration, else it will send 0.
+   * @param conn the connection to release
+   * @param newState the new state of the connection
+   * @param validDuration the duration for which the connection is valid
+   * @param timeUnit the time unit for the validDuration
+   */
+  @Override
+  public void releaseConnection(final HttpClientConnection conn,
+      final Object newState,
+      final long validDuration,
+      final TimeUnit timeUnit) {
+    if (validDuration == 0) {
+      return;
+    }
+    if (conn.isOpen() && conn instanceof AbfsManagedApacheHttpConnection) {
+      HttpRoute route = ((AbfsManagedApacheHttpConnection) 
conn).getHttpRoute();
+      if (route != null) {
+        kac.put(route, conn);
+      }
+    }
+  }
+
+  @Override
+  public void connect(final HttpClientConnection conn,
+      final HttpRoute route,
+      final int connectTimeout,
+      final HttpContext context) throws IOException {
+    Asserts.check(conn instanceof AbfsManagedApacheHttpConnection,
+        "Connection not obtained from this manager");
+    long start = System.currentTimeMillis();
+    connectionOperator.connect((AbfsManagedApacheHttpConnection) conn,
+        route.getTargetHost(), route.getLocalSocketAddress(),
+        connectTimeout, SocketConfig.DEFAULT, context);
+    if (context instanceof AbfsManagedHttpContext) {
+      ((AbfsManagedHttpContext) context).setConnectTime(
+          System.currentTimeMillis() - start);
+    }
+  }
+
+  @Override
+  public void upgrade(final HttpClientConnection conn,
+      final HttpRoute route,
+      final HttpContext context) throws IOException {
+    Asserts.check(conn instanceof AbfsManagedApacheHttpConnection,

Review Comment:
   Earlier this and the connection class was public, which can lead to outside 
of abfs, creation of connection. Have made them package-protected now. Don't 
need these assert check, would remove them.





> [ABFS]: ApacheHttpClient adaptation as network library
> ------------------------------------------------------
>
>                 Key: HADOOP-19120
>                 URL: https://issues.apache.org/jira/browse/HADOOP-19120
>             Project: Hadoop Common
>          Issue Type: Sub-task
>          Components: fs/azure
>    Affects Versions: 3.5.0
>            Reporter: Pranav Saxena
>            Assignee: Pranav Saxena
>            Priority: Major
>              Labels: pull-request-available
>
> Apache HttpClient is more feature-rich and flexible and gives application 
> more granular control over networking parameter.
> ABFS currently relies on the JDK-net library. This library is managed by 
> OpenJDK and has no performance problem. However, it limits the application's 
> control over networking, and there are very few APIs and hooks exposed that 
> the application can use to get metrics, choose which and when a connection 
> should be reused. ApacheHttpClient will give important hooks to fetch 
> important metrics and control networking parameters.
> A custom implementation of connection-pool is used. The implementation is 
> adapted from the JDK8 connection pooling. Reasons for doing it:
> 1. PoolingHttpClientConnectionManager heuristic caches all the reusable 
> connections it has created. JDK's implementation only caches limited number 
> of connections. The limit is given by JVM system property 
> "http.maxConnections". If there is no system-property, it defaults to 5. 
> Connection-establishment latency increased with all the connections were 
> cached. Hence, adapting the pooling heuristic of JDK netlib,
> 2. In PoolingHttpClientConnectionManager, it expects the application to 
> provide `setMaxPerRoute` and `setMaxTotal`, which the implementation uses as 
> the total number of connections it can create. For application using ABFS, it 
> is not feasible to provide a value in the initialisation of the 
> connectionManager. JDK's implementation has no cap on the number of 
> connections it can have opened on a moment. Hence, adapting the pooling 
> heuristic of JDK netlib,



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to