This is an automated email from the ASF dual-hosted git repository.

fanng pushed a commit to branch branch-0.8
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/branch-0.8 by this push:
     new 75d7d38d9c [#5361] improvment(hadoop-catalog): Introduce timeout 
mechanism to get Hadoop File System. (#6221)
75d7d38d9c is described below

commit 75d7d38d9cde23a26cc92cc117201e8aa74bad94
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Jan 14 15:07:06 2025 +0800

    [#5361] improvment(hadoop-catalog): Introduce timeout mechanism to get 
Hadoop File System. (#6221)
    
    ### What changes were proposed in this pull request?
    
    Introduce a timeout mechanism when getting a Hadoop FileSystem instance.
    
    ### Why are the changes needed?
    
    
    Cloud filesystem like S3 and OSS(10 minutes) has a very long connection
    and can't be tune by configuration, this will cause deadlock as it will
    hold the tree lock for a long time
    
    Fix: #5361
    Fix: #6156
    
    ### Does this PR introduce _any_ user-facing change?
    
    N/A.
    
    ### How was this patch tested?
    
    Existing test.
    
    Co-authored-by: Qi Yu <[email protected]>
---
 LICENSE.bin                                        |  1 +
 catalogs/catalog-hadoop/build.gradle.kts           |  1 +
 .../catalog/hadoop/HadoopCatalogOperations.java    | 35 +++++++++++++++++++++-
 .../hadoop/HadoopCatalogPropertiesMetadata.java    | 11 +++++++
 .../org/apache/gravitino/lock/LockManager.java     |  9 ++++--
 docs/hadoop-catalog.md                             |  9 +++---
 6 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/LICENSE.bin b/LICENSE.bin
index effaa4ac4a..d1dddd5279 100644
--- a/LICENSE.bin
+++ b/LICENSE.bin
@@ -374,6 +374,7 @@
    Apache Arrow
    Rome
    Jettison
+   Awaitility
 
    This product bundles various third-party components also under the
    Apache Software Foundation License 1.1
diff --git a/catalogs/catalog-hadoop/build.gradle.kts 
b/catalogs/catalog-hadoop/build.gradle.kts
index d599a5e72f..3108d993c1 100644
--- a/catalogs/catalog-hadoop/build.gradle.kts
+++ b/catalogs/catalog-hadoop/build.gradle.kts
@@ -54,6 +54,7 @@ dependencies {
     exclude("org.fusesource.leveldbjni")
   }
   implementation(libs.slf4j.api)
+  implementation(libs.awaitility)
 
   compileOnly(libs.guava)
 
diff --git 
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
 
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
index 36177bea37..6c032414be 100644
--- 
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
+++ 
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
@@ -31,6 +31,8 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.gravitino.Catalog;
 import org.apache.gravitino.Entity;
@@ -71,6 +73,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.awaitility.Awaitility;
+import org.awaitility.core.ConditionTimeoutException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -755,6 +759,35 @@ public class HadoopCatalogOperations extends 
ManagedSchemaOperations
               scheme, path, fileSystemProvidersMap.keySet(), 
fileSystemProvidersMap.values()));
     }
 
-    return provider.getFileSystem(path, config);
+    int timeoutSeconds =
+        (int)
+            propertiesMetadata
+                .catalogPropertiesMetadata()
+                .getOrDefault(
+                    config, 
HadoopCatalogPropertiesMetadata.FILESYSTEM_CONNECTION_TIMEOUT_SECONDS);
+    try {
+      AtomicReference<FileSystem> fileSystem = new AtomicReference<>();
+      Awaitility.await()
+          .atMost(timeoutSeconds, TimeUnit.SECONDS)
+          .until(
+              () -> {
+                fileSystem.set(provider.getFileSystem(path, config));
+                return true;
+              });
+      return fileSystem.get();
+    } catch (ConditionTimeoutException e) {
+      throw new IOException(
+          String.format(
+              "Failed to get FileSystem for path: %s, scheme: %s, provider: 
%s, config: %s within %s "
+                  + "seconds, please check the configuration or increase the "
+                  + "file system connection timeout time by setting catalog 
property: %s",
+              path,
+              scheme,
+              provider,
+              config,
+              timeoutSeconds,
+              
HadoopCatalogPropertiesMetadata.FILESYSTEM_CONNECTION_TIMEOUT_SECONDS),
+          e);
+    }
   }
 }
diff --git 
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
 
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
index 22cf0d5b2c..3bdc125efc 100644
--- 
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
+++ 
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
@@ -53,6 +53,9 @@ public class HadoopCatalogPropertiesMetadata extends 
BaseCatalogPropertiesMetada
    */
   public static final String DEFAULT_FS_PROVIDER = 
"default-filesystem-provider";
 
+  static final String FILESYSTEM_CONNECTION_TIMEOUT_SECONDS = 
"filesystem-conn-timeout-secs";
+  static final int DEFAULT_GET_FILESYSTEM_TIMEOUT_SECONDS = 6;
+
   public static final String BUILTIN_LOCAL_FS_PROVIDER = "builtin-local";
   public static final String BUILTIN_HDFS_FS_PROVIDER = "builtin-hdfs";
 
@@ -82,6 +85,14 @@ public class HadoopCatalogPropertiesMetadata extends 
BaseCatalogPropertiesMetada
                   false /* immutable */,
                   BUILTIN_LOCAL_FS_PROVIDER, // please see 
LocalFileSystemProvider#name()
                   false /* hidden */))
+          .put(
+              FILESYSTEM_CONNECTION_TIMEOUT_SECONDS,
+              PropertyEntry.integerOptionalPropertyEntry(
+                  FILESYSTEM_CONNECTION_TIMEOUT_SECONDS,
+                  "Timeout to wait for to create the Hadoop file system client 
instance.",
+                  false /* immutable */,
+                  DEFAULT_GET_FILESYSTEM_TIMEOUT_SECONDS,
+                  false /* hidden */))
           // The following two are about authentication.
           .putAll(KERBEROS_PROPERTY_ENTRIES)
           .putAll(AuthenticationConfig.AUTHENTICATION_PROPERTY_ENTRIES)
diff --git a/core/src/main/java/org/apache/gravitino/lock/LockManager.java 
b/core/src/main/java/org/apache/gravitino/lock/LockManager.java
index 222dee8daa..d52c858cc4 100644
--- a/core/src/main/java/org/apache/gravitino/lock/LockManager.java
+++ b/core/src/main/java/org/apache/gravitino/lock/LockManager.java
@@ -26,6 +26,7 @@ import static 
org.apache.gravitino.Configs.TREE_LOCK_MIN_NODE_IN_MEMORY;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.text.SimpleDateFormat;
 import java.util.List;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
@@ -136,10 +137,14 @@ public class LockManager {
               // If the thread is holding the lock for more than 30 seconds, 
we will log it.
               if (System.currentTimeMillis() - ts > 30000) {
                 LOG.warn(
-                    "Dead lock detected for thread with identifier {} on node 
{}, threads that holding the node: {} ",
+                    "Thread with identifier {} holds the lock node {} for more 
than 30s since {}, please "
+                        + "check if some dead lock or thread hang like 
io-connection hangs",
                     threadIdentifier,
                     node,
-                    node.getHoldingThreadTimestamp());
+                    // SimpleDateFormat is not thread-safe, so we should 
create a new instance for
+                    // each time
+                    new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
+                        .format(node.getHoldingThreadTimestamp()));
               }
             });
   }
diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md
index 99e1dd7854..cbdae84689 100644
--- a/docs/hadoop-catalog.md
+++ b/docs/hadoop-catalog.md
@@ -23,10 +23,11 @@ Hadoop 3. If there's any compatibility issue, please create 
an [issue](https://g
 
 Besides the [common catalog 
properties](./gravitino-server-config.md#apache-gravitino-catalog-properties-configuration),
 the Hadoop catalog has the following properties:
 
-| Property Name          | Description                                        
| Default Value | Required | Since Version    |
-|------------------------|----------------------------------------------------|---------------|----------|------------------|
-| `location`             | The storage location managed by Hadoop catalog.    
| (none)        | No       | 0.5.0            |
-| `credential-providers` | The credential provider types, separated by comma. 
| (none)        | No       | 0.8.0-incubating |
+| Property Name                  | Description                                 
                                                        | Default Value | 
Required | Since Version    |
+|--------------------------------|-----------------------------------------------------------------------------------------------------|---------------|----------|------------------|
+| `location`                     | The storage location managed by Hadoop 
catalog.                                                     | (none)        | 
No       | 0.5.0            |
+| `filesystem-conn-timeout-secs` | The timeout of getting the file system 
using Hadoop FileSystem client instance. Time unit: seconds. | 6             | 
No       | 0.8.0-incubating |
+| `credential-providers`         | The credential provider types, separated by 
comma.                                                  | (none)        | No    
   | 0.8.0-incubating |
 
 Please refer to [Credential vending](./security/credential-vending.md) for 
more details about credential vending.
 

Reply via email to