This is an automated email from the ASF dual-hosted git repository. anujmodi pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 7827dc63204 HADOOP-19494: [ABFS][FnsOverBlob] Fix Case Sensitivity Issue for hdi_isfolder metadata (#7496) (#7584) 7827dc63204 is described below commit 7827dc6320433546bf69600f241d4b8df1856126 Author: Manish Bhatt <52626736+bhattmanis...@users.noreply.github.com> AuthorDate: Tue Apr 8 02:28:44 2025 -0700 HADOOP-19494: [ABFS][FnsOverBlob] Fix Case Sensitivity Issue for hdi_isfolder metadata (#7496) (#7584) Contributed by Manish Bhatt Reviewed by Anmol, Manika, Anuj Signed off by: Anuj Modi<anujm...@apache.org> --- .../contracts/services/BlobListXmlParser.java | 2 +- .../fs/azurebfs/services/AbfsAHCHttpOperation.java | 16 +++ .../fs/azurebfs/services/AbfsBlobClient.java | 5 +- .../hadoop/fs/azurebfs/services/AbfsClient.java | 3 +- .../fs/azurebfs/services/AbfsHttpOperation.java | 34 +++++- .../fs/azurebfs/services/AbfsJdkHttpOperation.java | 16 +++ .../ITestAzureBlobFileSystemFileStatus.java | 60 +++++++++++ .../ITestAzureBlobFileSystemListStatus.java | 114 +++++++++++++++++++++ 8 files changed, 244 insertions(+), 6 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/BlobListXmlParser.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/BlobListXmlParser.java index 9cfbf237898..59207486312 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/BlobListXmlParser.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/BlobListXmlParser.java @@ -207,7 +207,7 @@ public void endElement(final String uri, if (parentNode.equals(AbfsHttpConstants.XML_TAG_METADATA)) { currentBlobEntry.addMetadata(currentNode, value); // For Marker blobs hdi_isFolder will be present as metadata - if (AbfsHttpConstants.XML_TAG_HDI_ISFOLDER.equals(currentNode)) { + if (AbfsHttpConstants.XML_TAG_HDI_ISFOLDER.equalsIgnoreCase(currentNode)) { currentBlobEntry.setIsDirectory(Boolean.valueOf(value)); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java index dd585d32fb7..2569c6a4bd3 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java @@ -312,6 +312,22 @@ public Map<String, List<String>> getResponseHeaders() { return headers; } + /**{@inheritDoc}*/ + @Override + public String getResponseHeaderIgnoreCase(final String headerName) { + Map<String, List<String>> responseHeaders = getResponseHeaders(); + if (responseHeaders == null || responseHeaders.isEmpty()) { + return null; + } + // Search for the header value case-insensitively + return responseHeaders.entrySet().stream() + .filter(entry -> entry.getKey() != null + && entry.getKey().equalsIgnoreCase(headerName)) + .flatMap(entry -> entry.getValue().stream()) + .findFirst() + .orElse(null); // Return null if no match is found + } + /**{@inheritDoc}*/ @Override protected InputStream getContentInputStream() diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 8166cfb60df..32eba86774a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -1137,7 +1137,8 @@ public AbfsRestOperation setPathProperties(final String path, this.createPathRestOp(path, false, false, false, null, contextEncryptionAdapter, tracingContext); // Make sure hdi_isFolder is added to the list of properties to be set. - boolean hdiIsFolderExists = properties.containsKey(XML_TAG_HDI_ISFOLDER); + boolean hdiIsFolderExists = properties.keySet() + .stream().anyMatch(XML_TAG_HDI_ISFOLDER::equalsIgnoreCase); if (!hdiIsFolderExists) { properties.put(XML_TAG_HDI_ISFOLDER, TRUE); } @@ -1548,7 +1549,7 @@ public AbfsRestOperation deleteBlobPath(final Path blobPath, */ @Override public boolean checkIsDir(AbfsHttpOperation result) { - String resourceType = result.getResponseHeader(X_MS_META_HDI_ISFOLDER); + String resourceType = result.getResponseHeaderIgnoreCase(X_MS_META_HDI_ISFOLDER); return resourceType != null && resourceType.equals(TRUE); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 8dcd78ea6b3..b1344f2fcdb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -1630,7 +1630,8 @@ AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType * @param requestHeaders The list of HTTP headers for the request. * @return An AbfsRestOperation instance. */ - AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType, + @VisibleForTesting + public AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType, final String httpMethod, final URL url, final List<AbfsHttpHeader> requestHeaders) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index fc35aecd587..89c97b68baa 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -235,6 +235,14 @@ public final InputStream getListResultStream() { public abstract Map<String, List<String>> getResponseHeaders(); + /** + * Get response header value for the given headerKey ignoring case. + * + * @param httpHeader header key. + * @return header value. + */ + public abstract String getResponseHeaderIgnoreCase(String httpHeader); + // Returns a trace message for the request @Override public String toString() { @@ -743,7 +751,7 @@ public String getTracingContextSuffix() { @Override public String getResponseHeader(final String httpHeader) { - return ""; + return EMPTY_STRING; } @Override @@ -751,6 +759,12 @@ public Map<String, List<String>> getResponseHeaders() { return new HashMap<>(); } + /**{@inheritDoc}*/ + @Override + public String getResponseHeaderIgnoreCase(final String headerName) { + return EMPTY_STRING; + } + @Override public void sendPayload(final byte[] buffer, final int offset, @@ -787,6 +801,16 @@ public Map<String, List<String>> getResponseHeaders() { return new HashMap<>(); } + /**{@inheritDoc}*/ + @Override + public String getResponseHeaderIgnoreCase(final String httpHeader) { + // Directories on FNS-Blob are identified by a special metadata header. + if (httpHeader.equalsIgnoreCase(X_MS_META_HDI_ISFOLDER)) { + return TRUE; + } + return EMPTY_STRING; + } + @Override public void processResponse(final byte[] buffer, final int offset, @@ -935,7 +959,7 @@ public String getTracingContextSuffix() { @Override public String getResponseHeader(final String httpHeader) { - return ""; + return EMPTY_STRING; } @Override @@ -943,6 +967,12 @@ public Map<String, List<String>> getResponseHeaders() { return new HashMap<>(); } + /**{@inheritDoc}*/ + @Override + public String getResponseHeaderIgnoreCase(final String headerName) { + return EMPTY_STRING; + } + @Override public void sendPayload(final byte[] buffer, final int offset, diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java index 20fe215e039..c233fae9e37 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java @@ -104,6 +104,22 @@ public Map<String, List<String>> getResponseHeaders() { return connection.getHeaderFields(); } + /**{@inheritDoc}*/ + @Override + public String getResponseHeaderIgnoreCase(final String headerName) { + Map<String, List<String>> responseHeaders = getResponseHeaders(); + if (responseHeaders == null || responseHeaders.isEmpty()) { + return null; + } + // Search for the header value case-insensitively + return responseHeaders.entrySet().stream() + .filter(entry -> entry.getKey() != null + && entry.getKey().equalsIgnoreCase(headerName)) + .flatMap(entry -> entry.getValue().stream()) + .findFirst() + .orElse(null); // Return null if no match is found + } + /**{@inheritDoc}*/ public void sendPayload(byte[] buffer, int offset, int length) throws IOException { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java index f226e5b61fb..2d7298f1e1b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java @@ -29,9 +29,12 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.permission.FsPermission; import static org.apache.hadoop.fs.CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY; +import static org.apache.hadoop.fs.azurebfs.ITestAzureBlobFileSystemListStatus.mockAbfsRestOperation; +import static org.apache.hadoop.fs.azurebfs.ITestAzureBlobFileSystemListStatus.mockIngressClientHandler; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.mockito.ArgumentMatchers.any; @@ -284,4 +287,61 @@ private void verifyFileNotFound(FileNotFoundException ex, String key) { Assertions.assertThat(ex).isNotNull(); Assertions.assertThat(ex.getMessage()).contains(key); } + + /** + * Test directory status with different HDI folder configuration, + * verifying the correct header and directory state. + */ + private void testIsDirectory(boolean expected, String... configName) throws Exception { + try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) { + assumeBlobServiceType(); + AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs); + // Mock the operation to modify the headers + mockAbfsRestOperation(abfsBlobClient, configName); + + // Create the path and invoke mkdirs method + Path path = new Path("/testPath"); + fs.mkdirs(path); + + // Assert that the response header has the updated value + FileStatus fileStatus = fs.getFileStatus(path); + + AbfsHttpOperation op = abfsBlobClient.getPathStatus( + path.toUri().getPath(), + true, getTestTracingContext(fs, true), + null).getResult(); + + Assertions.assertThat(abfsBlobClient.checkIsDir(op)) + .describedAs("Directory should be marked as " + expected) + .isEqualTo(expected); + + // Verify the header and directory state + Assertions.assertThat(fileStatus.isDirectory()) + .describedAs("Expected directory state: " + expected) + .isEqualTo(expected); + + fs.delete(path, true); + } + } + + /** + * Test to verify the directory status with different HDI folder configurations. + * Verifying the correct header and directory state. + */ + @Test + public void testIsDirectoryWithDifferentCases() throws Exception { + testIsDirectory(true, "HDI_ISFOLDER"); + + testIsDirectory(true, "Hdi_ISFOLDER"); + + testIsDirectory(true, "Hdi_isfolder"); + + testIsDirectory(true, "hdi_isfolder"); + + testIsDirectory(false, "Hdi_isfolder1"); + + testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER", "Hdi_isfolder"); + + testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER1", "Test"); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 1c4ef6d4212..7f7d96bd477 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -21,6 +21,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.net.SocketTimeoutException; +import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; @@ -40,7 +41,13 @@ import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType; import org.apache.hadoop.fs.azurebfs.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil; @@ -51,8 +58,11 @@ import org.apache.hadoop.fs.contract.ContractTestUtils; import static java.net.HttpURLConnection.HTTP_OK; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_LIST_MAX_RESULTS; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_METADATA_PREFIX; import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; @@ -62,6 +72,8 @@ import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.nullable; import static org.mockito.Mockito.times; import static org.mockito.Mockito.when; @@ -524,4 +536,106 @@ private void assertDirectoryFileStatus(final FileStatus fileStatus, Assertions.assertThat(fileStatus.getLen()) .describedAs("Content Length Not as Expected").isEqualTo(0); } + + /** + * Helper method to mock the AbfsRestOperation and modify the request headers. + * + * @param abfsBlobClient the mocked AbfsBlobClient + * @param newHeader the header to add in place of the old one + */ + public static void mockAbfsRestOperation(AbfsBlobClient abfsBlobClient, String... newHeader) { + Mockito.doAnswer(invocation -> { + List<AbfsHttpHeader> requestHeaders = invocation.getArgument(3); + + // Remove the actual HDI config header and add the new one + requestHeaders.removeIf(header -> + HttpHeaderConfigurations.X_MS_META_HDI_ISFOLDER.equals(header.getName())); + for (String header : newHeader) { + requestHeaders.add(new AbfsHttpHeader(X_MS_METADATA_PREFIX + header, TRUE)); + } + + // Call the real method + return invocation.callRealMethod(); + }).when(abfsBlobClient).getAbfsRestOperation(eq(AbfsRestOperationType.PutBlob), + eq(HTTP_METHOD_PUT), any(URL.class), anyList()); + } + + /** + * Helper method to mock the AbfsBlobClient and set up the client handler. + * + * @param fs the AzureBlobFileSystem instance + * @return the mocked AbfsBlobClient + */ + public static AbfsBlobClient mockIngressClientHandler(AzureBlobFileSystem fs) { + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler()); + AbfsBlobClient abfsBlobClient = (AbfsBlobClient) Mockito.spy( + clientHandler.getClient()); + fs.getAbfsStore().setClient(abfsBlobClient); + fs.getAbfsStore().setClientHandler(clientHandler); + Mockito.doReturn(abfsBlobClient).when(clientHandler).getIngressClient(); + return abfsBlobClient; + } + + /** + * Test directory status with different HDI folder configuration, + * verifying the correct header and directory state. + */ + private void testIsDirectory(boolean expected, String... configName) throws Exception { + try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) { + assumeBlobServiceType(); + AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs); + // Mock the operation to modify the headers + mockAbfsRestOperation(abfsBlobClient, configName); + + // Create the path and invoke mkdirs method + Path path = new Path("/testPath"); + fs.mkdirs(path); + + // Assert that the response header has the updated value + FileStatus[] fileStatus = fs.listStatus(path.getParent()); + + AbfsHttpOperation op = abfsBlobClient.getPathStatus( + path.toUri().getPath(), + true, getTestTracingContext(fs, true), + null).getResult(); + + Assertions.assertThat(abfsBlobClient.checkIsDir(op)) + .describedAs("Directory should be marked as " + expected) + .isEqualTo(expected); + + // Verify the header and directory state + Assertions.assertThat(fileStatus.length) + .describedAs("Expected directory state: " + expected) + .isEqualTo(1); + + // Verify the header and directory state + Assertions.assertThat(fileStatus[0].isDirectory()) + .describedAs("Expected directory state: " + expected) + .isEqualTo(expected); + + fs.delete(path, true); + } + } + + /** + * Test to verify the directory status with different HDI folder configurations. + * Verifying the correct header and directory state. + */ + @Test + public void testIsDirectoryWithDifferentCases() throws Exception { + testIsDirectory(true, "HDI_ISFOLDER"); + + testIsDirectory(true, "Hdi_ISFOLDER"); + + testIsDirectory(true, "Hdi_isfolder"); + + testIsDirectory(true, "hdi_isfolder"); + + testIsDirectory(false, "Hdi_isfolder1"); + + testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER", "Hdi_isfolder"); + + testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER1", "Test"); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org