[ https://issues.apache.org/jira/browse/HADOOP-18656?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17833093#comment-17833093 ]
ASF GitHub Bot commented on HADOOP-18656: ----------------------------------------- anujmodi2021 commented on code in PR #6409: URL: https://github.com/apache/hadoop/pull/6409#discussion_r1547547655 ########## hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsPaginatedDelete.java: ########## @@ -0,0 +1,289 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider; +import org.apache.hadoop.fs.azurebfs.utils.AclTestHelpers; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclEntryScope; +import org.apache.hadoop.fs.permission.AclEntryType; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.util.Lists; + +import org.assertj.core.api.Assertions; +import org.junit.Assume; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.List; +import java.util.UUID; + +import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_PAGINATED; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_ID; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_SECRET; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CHECKACCESS_TEST_USER_GUID; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CLIENT_ID; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CLIENT_SECRET; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT; +import static org.apache.hadoop.fs.azurebfs.services.AbfsClientUtils.getHeaderValue; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public class ITestAbfsPaginatedDelete extends AbstractAbfsIntegrationTest { + + private AzureBlobFileSystem superUserFs; + private AzureBlobFileSystem firstTestUserFs; + + private boolean isHnsEnabled; + public ITestAbfsPaginatedDelete() throws Exception { + } + + @Override + public void setup() throws Exception { + isHnsEnabled = this.getConfiguration().getBoolean( + FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); + loadConfiguredFileSystem(); + super.setup(); + this.superUserFs = getFileSystem(); + + // Test User Credentials. + String firstTestUserGuid = getConfiguration().get( + FS_AZURE_BLOB_FS_CHECKACCESS_TEST_USER_GUID); + String clientId = getConfiguration().getString( + FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_ID, ""); + String clientSecret = getConfiguration().getString( + FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_SECRET, ""); + + if (isHnsEnabled) { + // setting up ACL permissions for test user + setFirstTestUserFsAuth(clientId, clientSecret); + setDefaultAclOnRoot(firstTestUserGuid); + } + } + + /** + * Test to check that recursive deletePath works with paginated enabled and + * disabled for both empty and non-empty directory. + * When enabled appropriate xMsVersion should be used. + * @throws Exception + */ + @Test + public void testRecursiveDeleteWithPagination() throws Exception { + testRecursiveDeleteWithPaginationInternal(false, true, + AbfsHttpConstants.ApiVersion.DEC_12_2019); + testRecursiveDeleteWithPaginationInternal(false, true, + AbfsHttpConstants.ApiVersion.AUG_03_2023); + testRecursiveDeleteWithPaginationInternal(false, false, + AbfsHttpConstants.ApiVersion.DEC_12_2019); + testRecursiveDeleteWithPaginationInternal(false, false, + AbfsHttpConstants.ApiVersion.AUG_03_2023); + testRecursiveDeleteWithPaginationInternal(true, true, + AbfsHttpConstants.ApiVersion.DEC_12_2019); + testRecursiveDeleteWithPaginationInternal(true, false, + AbfsHttpConstants.ApiVersion.AUG_03_2023); + } + + /** + * Test to check that non-recursive delete works with both paginated enabled + * and disabled only for empty directories. + * Pagination should not be set when recursive is false. + * @throws Exception + */ + @Test + public void testNonRecursiveDeleteWithPagination() throws Exception { + testNonRecursiveDeleteWithPaginationInternal(true); + testNonRecursiveDeleteWithPaginationInternal(false); + } + + /** + * Test to check that with pagination enabled, invalid CT will fail + * @throws Exception + */ + @Test + public void testRecursiveDeleteWithInvalidCT() throws Exception { + testRecursiveDeleteWithInvalidCTInternal(true); + testRecursiveDeleteWithInvalidCTInternal(false); + } + + private void testRecursiveDeleteWithPaginationInternal(boolean isEmptyDir, + boolean isPaginatedDeleteEnabled, + AbfsHttpConstants.ApiVersion xMsVersion) throws Exception { + final AzureBlobFileSystem fs = getUserFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, true); + Path testPath; + if (isEmptyDir) { + testPath = new Path("/emptyPath" + StringUtils.right( + UUID.randomUUID().toString(), 10)); + fs.mkdirs(testPath); + } else { + testPath = createSmallDir(); + } + + // Set the paginated enabled value and xMsVersion at spiedClient level. + AbfsClient spiedClient = Mockito.spy(fs.getAbfsStore().getClient()); + ITestAbfsClient.setAbfsClientField(spiedClient, "xMsVersion", xMsVersion); + Mockito.doReturn(isPaginatedDeleteEnabled).when(spiedClient).getIsPaginatedDeleteEnabled(); + + AbfsRestOperation op = spiedClient.deletePath( + testPath.toString(), true, null, testTracingContext); + + // Getting the xMsVersion that was used to make the request + String xMsVersionUsed = getHeaderValue(op.getRequestHeaders(), X_MS_VERSION); + String urlUsed = op.getUrl().toString(); + + // Assert that appropriate xMsVersion and query param was used to make request + if (isPaginatedDeleteEnabled) { + Assertions.assertThat(urlUsed) + .describedAs("Url must have paginated = true as query param") + .contains(QUERY_PARAM_PAGINATED); + if (xMsVersion.compareTo(AbfsHttpConstants.ApiVersion.AUG_03_2023) < 0) { + Assertions.assertThat(xMsVersionUsed) + .describedAs("Request was made with wrong x-ms-version") + .isEqualTo(AbfsHttpConstants.ApiVersion.AUG_03_2023.toString()); + } else if (xMsVersion.compareTo(AbfsHttpConstants.ApiVersion.AUG_03_2023) >= 0) { + Assertions.assertThat(xMsVersionUsed) + .describedAs("Request was made with wrong x-ms-version") + .isEqualTo(xMsVersion.toString()); + } + } else { + Assertions.assertThat(urlUsed) + .describedAs("Url must not have paginated = true as query param") + .doesNotContain(QUERY_PARAM_PAGINATED); + Assertions.assertThat(xMsVersionUsed) + .describedAs("Request was made with wrong x-ms-version") + .isEqualTo(xMsVersion.toString()); + } + + // Assert that deletion was successful in every scenario. + AbfsRestOperationException e = intercept(AbfsRestOperationException.class, () -> + spiedClient.getPathStatus(testPath.toString(), false, testTracingContext, null)); + Assertions.assertThat(e.getStatusCode()) + .describedAs("Path should have been deleted").isEqualTo(HTTP_NOT_FOUND); Review Comment: Makes sense. Factored out to new method. Added the e.toString() in message so that it will be a part of assertion error in case assertion fails. > ABFS: Support for Pagination in Recursive Directory Delete > ----------------------------------------------------------- > > Key: HADOOP-18656 > URL: https://issues.apache.org/jira/browse/HADOOP-18656 > Project: Hadoop Common > Issue Type: Sub-task > Components: fs/azure > Affects Versions: 3.3.5 > Reporter: Sree Bhattacharyya > Assignee: Anuj Modi > Priority: Minor > Labels: pull-request-available > > Today, when a recursive delete is issued for a large directory in ADLS Gen2 > (HNS) account, the directory deletion happens in O(1) but in backend ACL > Checks are done recursively for each object inside that directory which in > case of large directory could lead to request time out. Pagination is > introduced in the Azure Storage Backend for these ACL checks. > More information on how pagination works can be found on public documentation > of [Azure Delete Path > API|https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/delete?view=rest-storageservices-datalakestoragegen2-2019-12-12]. > This PR contains changes to support this from client side. To trigger > pagination, client needs to add a new query parameter "paginated" and set it > to true along with recursive set to true. In return if the directory is > large, server might return a continuation token back to the caller. If caller > gets back a continuation token, it has to call the delete API again with > continuation token along with recursive and pagination set to true. This is > similar to directory delete of FNS account. > Pagination is available only in versions "2023-08-03" onwards. > PR also contains functional tests to verify driver works well with different > combinations of recursive and pagination features for HNS. > Full E2E testing of pagination requires large dataset to be created and hence > not added as part of driver test suite. But extensive E2E testing has been > performed. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org