This is an automated email from the ASF dual-hosted git repository.

anujmodi pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 7827dc63204 HADOOP-19494: [ABFS][FnsOverBlob] Fix Case Sensitivity 
Issue for hdi_isfolder metadata (#7496) (#7584)
7827dc63204 is described below

commit 7827dc6320433546bf69600f241d4b8df1856126
Author: Manish Bhatt <52626736+bhattmanis...@users.noreply.github.com>
AuthorDate: Tue Apr 8 02:28:44 2025 -0700

    HADOOP-19494: [ABFS][FnsOverBlob] Fix Case Sensitivity Issue for 
hdi_isfolder metadata (#7496) (#7584)
    
    Contributed by Manish Bhatt
    Reviewed by Anmol, Manika, Anuj
    
    Signed off by: Anuj Modi<anujm...@apache.org>
---
 .../contracts/services/BlobListXmlParser.java      |   2 +-
 .../fs/azurebfs/services/AbfsAHCHttpOperation.java |  16 +++
 .../fs/azurebfs/services/AbfsBlobClient.java       |   5 +-
 .../hadoop/fs/azurebfs/services/AbfsClient.java    |   3 +-
 .../fs/azurebfs/services/AbfsHttpOperation.java    |  34 +++++-
 .../fs/azurebfs/services/AbfsJdkHttpOperation.java |  16 +++
 .../ITestAzureBlobFileSystemFileStatus.java        |  60 +++++++++++
 .../ITestAzureBlobFileSystemListStatus.java        | 114 +++++++++++++++++++++
 8 files changed, 244 insertions(+), 6 deletions(-)

diff --git 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/BlobListXmlParser.java
 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/BlobListXmlParser.java
index 9cfbf237898..59207486312 100644
--- 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/BlobListXmlParser.java
+++ 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/BlobListXmlParser.java
@@ -207,7 +207,7 @@ public void endElement(final String uri,
     if (parentNode.equals(AbfsHttpConstants.XML_TAG_METADATA)) {
       currentBlobEntry.addMetadata(currentNode, value);
       // For Marker blobs hdi_isFolder will be present as metadata
-      if (AbfsHttpConstants.XML_TAG_HDI_ISFOLDER.equals(currentNode)) {
+      if 
(AbfsHttpConstants.XML_TAG_HDI_ISFOLDER.equalsIgnoreCase(currentNode)) {
         currentBlobEntry.setIsDirectory(Boolean.valueOf(value));
       }
     }
diff --git 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java
 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java
index dd585d32fb7..2569c6a4bd3 100644
--- 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java
+++ 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java
@@ -312,6 +312,22 @@ public Map<String, List<String>> getResponseHeaders() {
     return headers;
   }
 
+  /**{@inheritDoc}*/
+  @Override
+  public String getResponseHeaderIgnoreCase(final String headerName) {
+    Map<String, List<String>> responseHeaders = getResponseHeaders();
+    if (responseHeaders == null || responseHeaders.isEmpty()) {
+      return null;
+    }
+    // Search for the header value case-insensitively
+    return responseHeaders.entrySet().stream()
+        .filter(entry -> entry.getKey() != null
+            && entry.getKey().equalsIgnoreCase(headerName))
+        .flatMap(entry -> entry.getValue().stream())
+        .findFirst()
+        .orElse(null); // Return null if no match is found
+  }
+
   /**{@inheritDoc}*/
   @Override
   protected InputStream getContentInputStream()
diff --git 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java
 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java
index 8166cfb60df..32eba86774a 100644
--- 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java
+++ 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java
@@ -1137,7 +1137,8 @@ public AbfsRestOperation setPathProperties(final String 
path,
         this.createPathRestOp(path, false, false, false, null,
             contextEncryptionAdapter, tracingContext);
         // Make sure hdi_isFolder is added to the list of properties to be set.
-        boolean hdiIsFolderExists = 
properties.containsKey(XML_TAG_HDI_ISFOLDER);
+        boolean hdiIsFolderExists = properties.keySet()
+            .stream().anyMatch(XML_TAG_HDI_ISFOLDER::equalsIgnoreCase);
         if (!hdiIsFolderExists) {
           properties.put(XML_TAG_HDI_ISFOLDER, TRUE);
         }
@@ -1548,7 +1549,7 @@ public AbfsRestOperation deleteBlobPath(final Path 
blobPath,
    */
   @Override
   public boolean checkIsDir(AbfsHttpOperation result) {
-    String resourceType = result.getResponseHeader(X_MS_META_HDI_ISFOLDER);
+    String resourceType = 
result.getResponseHeaderIgnoreCase(X_MS_META_HDI_ISFOLDER);
     return resourceType != null && resourceType.equals(TRUE);
   }
 
diff --git 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java
 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java
index 8dcd78ea6b3..b1344f2fcdb 100644
--- 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java
+++ 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java
@@ -1630,7 +1630,8 @@ AbfsRestOperation getAbfsRestOperation(final 
AbfsRestOperationType operationType
    * @param requestHeaders  The list of HTTP headers for the request.
    * @return An AbfsRestOperation instance.
    */
-  AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType 
operationType,
+  @VisibleForTesting
+  public AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType 
operationType,
       final String httpMethod,
       final URL url,
       final List<AbfsHttpHeader> requestHeaders) {
diff --git 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java
 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java
index fc35aecd587..89c97b68baa 100644
--- 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java
+++ 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java
@@ -235,6 +235,14 @@ public final InputStream getListResultStream() {
 
   public abstract Map<String, List<String>> getResponseHeaders();
 
+  /**
+   * Get response header value for the given headerKey ignoring case.
+   *
+   * @param httpHeader header key.
+   * @return header value.
+   */
+  public abstract String getResponseHeaderIgnoreCase(String httpHeader);
+
   // Returns a trace message for the request
   @Override
   public String toString() {
@@ -743,7 +751,7 @@ public String getTracingContextSuffix() {
 
     @Override
     public String getResponseHeader(final String httpHeader) {
-      return "";
+      return EMPTY_STRING;
     }
 
     @Override
@@ -751,6 +759,12 @@ public Map<String, List<String>> getResponseHeaders() {
       return new HashMap<>();
     }
 
+    /**{@inheritDoc}*/
+    @Override
+    public String getResponseHeaderIgnoreCase(final String headerName) {
+      return EMPTY_STRING;
+    }
+
     @Override
     public void sendPayload(final byte[] buffer,
         final int offset,
@@ -787,6 +801,16 @@ public Map<String, List<String>> getResponseHeaders() {
       return new HashMap<>();
     }
 
+    /**{@inheritDoc}*/
+    @Override
+    public String getResponseHeaderIgnoreCase(final String httpHeader) {
+      // Directories on FNS-Blob are identified by a special metadata header.
+      if (httpHeader.equalsIgnoreCase(X_MS_META_HDI_ISFOLDER)) {
+        return TRUE;
+      }
+      return EMPTY_STRING;
+    }
+
     @Override
     public void processResponse(final byte[] buffer,
         final int offset,
@@ -935,7 +959,7 @@ public String getTracingContextSuffix() {
 
     @Override
     public String getResponseHeader(final String httpHeader) {
-      return "";
+      return EMPTY_STRING;
     }
 
     @Override
@@ -943,6 +967,12 @@ public Map<String, List<String>> getResponseHeaders() {
       return new HashMap<>();
     }
 
+    /**{@inheritDoc}*/
+    @Override
+    public String getResponseHeaderIgnoreCase(final String headerName) {
+      return EMPTY_STRING;
+    }
+
     @Override
     public void sendPayload(final byte[] buffer,
         final int offset,
diff --git 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java
 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java
index 20fe215e039..c233fae9e37 100644
--- 
a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java
+++ 
b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java
@@ -104,6 +104,22 @@ public Map<String, List<String>> getResponseHeaders() {
     return connection.getHeaderFields();
   }
 
+  /**{@inheritDoc}*/
+  @Override
+  public String getResponseHeaderIgnoreCase(final String headerName) {
+    Map<String, List<String>> responseHeaders = getResponseHeaders();
+    if (responseHeaders == null || responseHeaders.isEmpty()) {
+      return null;
+    }
+    // Search for the header value case-insensitively
+    return responseHeaders.entrySet().stream()
+        .filter(entry -> entry.getKey() != null
+            && entry.getKey().equalsIgnoreCase(headerName))
+        .flatMap(entry -> entry.getValue().stream())
+        .findFirst()
+        .orElse(null); // Return null if no match is found
+  }
+
   /**{@inheritDoc}*/
   public void sendPayload(byte[] buffer, int offset, int length)
       throws IOException {
diff --git 
a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java
 
b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java
index f226e5b61fb..2d7298f1e1b 100644
--- 
a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java
+++ 
b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java
@@ -29,9 +29,12 @@
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient;
+import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
 import org.apache.hadoop.fs.permission.FsPermission;
 
 import static 
org.apache.hadoop.fs.CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY;
+import static 
org.apache.hadoop.fs.azurebfs.ITestAzureBlobFileSystemListStatus.mockAbfsRestOperation;
+import static 
org.apache.hadoop.fs.azurebfs.ITestAzureBlobFileSystemListStatus.mockIngressClientHandler;
 import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists;
 import static org.apache.hadoop.test.LambdaTestUtils.intercept;
 import static org.mockito.ArgumentMatchers.any;
@@ -284,4 +287,61 @@ private void verifyFileNotFound(FileNotFoundException ex, 
String key) {
     Assertions.assertThat(ex).isNotNull();
     Assertions.assertThat(ex.getMessage()).contains(key);
   }
+
+  /**
+   * Test directory status with different HDI folder configuration,
+   * verifying the correct header and directory state.
+   */
+  private void testIsDirectory(boolean expected, String... configName) throws 
Exception {
+    try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) {
+      assumeBlobServiceType();
+      AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs);
+      // Mock the operation to modify the headers
+      mockAbfsRestOperation(abfsBlobClient, configName);
+
+      // Create the path and invoke mkdirs method
+      Path path = new Path("/testPath");
+      fs.mkdirs(path);
+
+      // Assert that the response header has the updated value
+      FileStatus fileStatus = fs.getFileStatus(path);
+
+      AbfsHttpOperation op = abfsBlobClient.getPathStatus(
+          path.toUri().getPath(),
+          true, getTestTracingContext(fs, true),
+          null).getResult();
+
+      Assertions.assertThat(abfsBlobClient.checkIsDir(op))
+          .describedAs("Directory should be marked as " + expected)
+          .isEqualTo(expected);
+
+      // Verify the header and directory state
+      Assertions.assertThat(fileStatus.isDirectory())
+          .describedAs("Expected directory state: " + expected)
+          .isEqualTo(expected);
+
+      fs.delete(path, true);
+    }
+  }
+
+  /**
+   * Test to verify the directory status with different HDI folder 
configurations.
+   * Verifying the correct header and directory state.
+   */
+  @Test
+  public void testIsDirectoryWithDifferentCases() throws Exception {
+    testIsDirectory(true,  "HDI_ISFOLDER");
+
+    testIsDirectory(true, "Hdi_ISFOLDER");
+
+    testIsDirectory(true, "Hdi_isfolder");
+
+    testIsDirectory(true, "hdi_isfolder");
+
+    testIsDirectory(false, "Hdi_isfolder1");
+
+    testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER", "Hdi_isfolder");
+
+    testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER1", "Test");
+  }
 }
diff --git 
a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java
 
b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java
index 1c4ef6d4212..7f7d96bd477 100644
--- 
a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java
+++ 
b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java
@@ -21,6 +21,7 @@
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.SocketTimeoutException;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.Callable;
@@ -40,7 +41,13 @@
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.azurebfs.constants.FSOperationType;
+import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations;
 import 
org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
+import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient;
+import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler;
+import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader;
+import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
+import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType;
 import org.apache.hadoop.fs.azurebfs.services.ListResponseData;
 import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
 import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil;
@@ -51,8 +58,11 @@
 import org.apache.hadoop.fs.contract.ContractTestUtils;
 
 import static java.net.HttpURLConnection.HTTP_OK;
+import static 
org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT;
 import static 
org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE;
 import static 
org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_LIST_MAX_RESULTS;
+import static 
org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_METADATA_PREFIX;
 import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX;
 import static 
org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE;
 import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs;
@@ -62,6 +72,8 @@
 
 import static org.apache.hadoop.test.LambdaTestUtils.intercept;
 import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyList;
+import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.ArgumentMatchers.nullable;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.when;
@@ -524,4 +536,106 @@ private void assertDirectoryFileStatus(final FileStatus 
fileStatus,
     Assertions.assertThat(fileStatus.getLen())
         .describedAs("Content Length Not as Expected").isEqualTo(0);
   }
+
+  /**
+   * Helper method to mock the AbfsRestOperation and modify the request 
headers.
+   *
+   * @param abfsBlobClient the mocked AbfsBlobClient
+   * @param newHeader the header to add in place of the old one
+   */
+  public static void mockAbfsRestOperation(AbfsBlobClient abfsBlobClient, 
String... newHeader) {
+    Mockito.doAnswer(invocation -> {
+      List<AbfsHttpHeader> requestHeaders = invocation.getArgument(3);
+
+      // Remove the actual HDI config header and add the new one
+      requestHeaders.removeIf(header ->
+          
HttpHeaderConfigurations.X_MS_META_HDI_ISFOLDER.equals(header.getName()));
+      for (String header : newHeader) {
+        requestHeaders.add(new AbfsHttpHeader(X_MS_METADATA_PREFIX + header, 
TRUE));
+      }
+
+      // Call the real method
+      return invocation.callRealMethod();
+    
}).when(abfsBlobClient).getAbfsRestOperation(eq(AbfsRestOperationType.PutBlob),
+        eq(HTTP_METHOD_PUT), any(URL.class), anyList());
+  }
+
+  /**
+   * Helper method to mock the AbfsBlobClient and set up the client handler.
+   *
+   * @param fs the AzureBlobFileSystem instance
+   * @return the mocked AbfsBlobClient
+   */
+  public static AbfsBlobClient mockIngressClientHandler(AzureBlobFileSystem 
fs) {
+    AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore());
+    AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler());
+    AbfsBlobClient abfsBlobClient = (AbfsBlobClient) Mockito.spy(
+        clientHandler.getClient());
+    fs.getAbfsStore().setClient(abfsBlobClient);
+    fs.getAbfsStore().setClientHandler(clientHandler);
+    Mockito.doReturn(abfsBlobClient).when(clientHandler).getIngressClient();
+    return abfsBlobClient;
+  }
+
+  /**
+   * Test directory status with different HDI folder configuration,
+   * verifying the correct header and directory state.
+   */
+  private void testIsDirectory(boolean expected, String... configName) throws 
Exception {
+    try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) {
+      assumeBlobServiceType();
+      AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs);
+      // Mock the operation to modify the headers
+      mockAbfsRestOperation(abfsBlobClient, configName);
+
+      // Create the path and invoke mkdirs method
+      Path path = new Path("/testPath");
+      fs.mkdirs(path);
+
+      // Assert that the response header has the updated value
+      FileStatus[] fileStatus = fs.listStatus(path.getParent());
+
+      AbfsHttpOperation op = abfsBlobClient.getPathStatus(
+          path.toUri().getPath(),
+          true, getTestTracingContext(fs, true),
+          null).getResult();
+
+      Assertions.assertThat(abfsBlobClient.checkIsDir(op))
+          .describedAs("Directory should be marked as " + expected)
+          .isEqualTo(expected);
+
+      // Verify the header and directory state
+      Assertions.assertThat(fileStatus.length)
+          .describedAs("Expected directory state: " + expected)
+          .isEqualTo(1);
+
+      // Verify the header and directory state
+      Assertions.assertThat(fileStatus[0].isDirectory())
+          .describedAs("Expected directory state: " + expected)
+          .isEqualTo(expected);
+
+      fs.delete(path, true);
+    }
+  }
+
+  /**
+   * Test to verify the directory status with different HDI folder 
configurations.
+   * Verifying the correct header and directory state.
+   */
+  @Test
+  public void testIsDirectoryWithDifferentCases() throws Exception {
+    testIsDirectory(true,  "HDI_ISFOLDER");
+
+    testIsDirectory(true, "Hdi_ISFOLDER");
+
+    testIsDirectory(true, "Hdi_isfolder");
+
+    testIsDirectory(true, "hdi_isfolder");
+
+    testIsDirectory(false, "Hdi_isfolder1");
+
+    testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER", "Hdi_isfolder");
+
+    testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER1", "Test");
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to