[ 
https://issues.apache.org/jira/browse/HADOOP-18012?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17703330#comment-17703330
 ] 

ASF GitHub Bot commented on HADOOP-18012:
-----------------------------------------

steveloughran commented on code in PR #5488:
URL: https://github.com/apache/hadoop/pull/5488#discussion_r1143834262


##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java:
##########
@@ -441,11 +441,19 @@ public boolean rename(final Path src, final Path dst) 
throws IOException {
       return dstFileStatus.isDirectory() ? false : true;
     }
 
+    boolean isNamespaceEnabled = 
abfsStore.getIsNamespaceEnabled(tracingContext);
+
     // Non-HNS account need to check dst status on driver side.
-    if (!abfsStore.getIsNamespaceEnabled(tracingContext) && dstFileStatus == 
null) {
+    if (!isNamespaceEnabled && dstFileStatus == null) {
       dstFileStatus = tryGetFileStatus(qualifiedDstPath, tracingContext);
     }
 
+    // for Non-HNS accounts, rename resiliency cannot be maintained
+    // as eTags are not preserved in rename

Review Comment:
   it's a bit of an ugly way to do it. if it is being set on every call, really 
it's a parameter which should be passed down directly



##########
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java:
##########
@@ -123,6 +141,189 @@ public void testRenameFailuresDueToIncompleteMetadata() 
throws Exception {
 
   }
 
+  AbfsClient getMockAbfsClient() throws IOException {
+    AzureBlobFileSystem fs = getFileSystem();
+
+    // adding mock objects to current AbfsClient
+    AbfsClient spyClient = Mockito.spy(fs.getAbfsStore().getClient());
+
+    Mockito.doAnswer(answer -> {
+      AbfsRestOperation op = new 
AbfsRestOperation(AbfsRestOperationType.RenamePath,
+              spyClient, HTTP_METHOD_PUT, answer.getArgument(0), 
answer.getArgument(1));
+      AbfsRestOperation spiedOp = Mockito.spy(op);
+      addSpyBehavior(spiedOp, op, spyClient);
+      return spiedOp;
+    }).when(spyClient).createRenameRestOperation(nullable(URL.class), 
nullable(List.class));
+
+    return spyClient;
+
+  }
+
+  private void addSpyBehavior(final AbfsRestOperation spiedRestOp,
+                              final AbfsRestOperation normalRestOp,
+                              final AbfsClient client)
+          throws IOException {
+    AbfsHttpOperation failingOperation = 
Mockito.spy(normalRestOp.createHttpOperation());
+    AbfsHttpOperation normalOp1 = normalRestOp.createHttpOperation();
+    
normalOp1.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION,
+            client.getAccessToken());
+    executeThenFail(client, failingOperation, normalOp1);
+    AbfsHttpOperation normalOp2 = normalRestOp.createHttpOperation();
+    
normalOp2.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION,
+            client.getAccessToken());
+
+    when(spiedRestOp.createHttpOperation())
+            .thenReturn(failingOperation)
+            .thenReturn(normalOp2);
+  }
+
+  /**
+   * Mock an idempotency failure by executing the normal operation, then
+   * raising an IOE.
+   * @param failingOperation failing operation
+   * @param normalOp good operation
+   * @throws IOException failure
+   */
+  private void executeThenFail(final AbfsClient client,
+                               final AbfsHttpOperation failingOperation,
+                               final AbfsHttpOperation normalOp)
+          throws IOException {
+    Mockito.doAnswer(answer -> {
+      LOG.info("Executing first attempt with post-operation fault injection");
+      final byte[] buffer = answer.getArgument(0);
+      final int offset = answer.getArgument(1);
+      final int length = answer.getArgument(2);
+      client.getSharedKeyCredentials().signRequest(
+              normalOp.getConnection(),
+              length);
+      normalOp.sendRequest(buffer, offset, length);
+      normalOp.processResponse(buffer, offset, length);
+      LOG.info("Actual outcome is {} \"{}\" \"{}\"; injecting failure",
+              normalOp.getStatusCode(),
+              normalOp.getStorageErrorCode(),
+              normalOp.getStorageErrorMessage());
+      throw new SocketException("connection-reset");
+    }).when(failingOperation).sendRequest(Mockito.nullable(byte[].class),
+            Mockito.nullable(int.class), Mockito.nullable(int.class));
+  }
+
+  @Test
+  public void testRenameRecoverySrcDestEtagSame() throws IOException {
+    AzureBlobFileSystem fs = getFileSystem();
+    AzureBlobFileSystemStore abfsStore = fs.getAbfsStore();
+    TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+    
Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext));
+
+    AbfsClient mockClient = getMockAbfsClient();
+
+    String base = "/" + getMethodName();
+    String path1 = base + "/dummyFile1";
+    String path2 = base + "/dummyFile2";
+    touch(new Path(path1));
+
+    abfsStore.setClient(mockClient);
+
+    // checking correct count in AbfsCounters
+    AbfsCounters counter = mockClient.getAbfsCounters();
+    Long connMadeBeforeRename = counter.getIOStatistics().counters().
+            get(AbfsStatistic.CONNECTIONS_MADE.getStatName());
+    Long renamePathAttemptsBeforeRename = counter.getIOStatistics().counters().
+            get(AbfsStatistic.RENAME_PATH_ATTEMPTS.getStatName());
+
+    // 404 and retry, send sourceEtag as null
+    // source eTag matches -> rename should pass even when execute throws 
exception
+    fs.rename(new Path(path1), new Path(path2));
+
+    // validating stat counters after rename
+    Long connMadeAfterRename = counter.getIOStatistics().counters().
+            get(AbfsStatistic.CONNECTIONS_MADE.getStatName());
+    Long renamePathAttemptsAfterRename = counter.getIOStatistics().counters().
+            get(AbfsStatistic.RENAME_PATH_ATTEMPTS.getStatName());
+
+    // 4 calls should have happened in total for rename
+    // 1 -> original rename rest call, 2 -> first retry,
+    // +2 for getPathStatus calls
+    assertEquals(Long.valueOf(connMadeBeforeRename+4), connMadeAfterRename);
+
+    // the RENAME_PATH_ATTEMPTS stat should be incremented by 1
+    // retries happen internally within AbfsRestOperation execute()
+    // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() 
is called
+    assertEquals(Long.valueOf(renamePathAttemptsBeforeRename+1), 
renamePathAttemptsAfterRename);
+  }
+
+  @Test
+  public void testRenameRecoverySrcDestEtagDifferent() throws Exception {
+    AzureBlobFileSystem fs = getFileSystem();
+    AzureBlobFileSystemStore abfsStore = fs.getAbfsStore();
+    TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+    
Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext));
+
+    AbfsClient mockClient = getMockAbfsClient();
+
+    String path1 = "/dummyFile1";
+    String path2 = "/dummyFile2";
+
+    fs.create(new Path(path2));
+
+    abfsStore.setClient(mockClient);
+
+    // source eTag does not match -> rename should be a failure
+    intercept(FileNotFoundException.class, () ->
+            fs.rename(new Path(path1), new Path(path2)));
+
+  }
+
+  @Test
+  public void testRenameRecoveryFailsForDir() throws Exception {
+    AzureBlobFileSystem fs = getFileSystem();
+    AzureBlobFileSystemStore abfsStore = fs.getAbfsStore();
+    TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+    
Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext));
+
+    AbfsClient mockClient = getMockAbfsClient();
+
+    String dir1 = "/dummyDir1";
+    String dir2 = "/dummyDir2";
+
+    Path path1 = new Path(dir1);
+    Path path2 = new Path(dir2);
+
+    fs.mkdirs(path1);
+
+    abfsStore.setClient(mockClient);
+
+    // checking correct count in AbfsCounters
+    AbfsCounters counter = mockClient.getAbfsCounters();
+    Long connMadeBeforeRename = counter.getIOStatistics().counters().
+            get(AbfsStatistic.CONNECTIONS_MADE.getStatName());
+    Long renamePathAttemptsBeforeRename = counter.getIOStatistics().counters().
+            get(AbfsStatistic.RENAME_PATH_ATTEMPTS.getStatName());
+
+    // source eTag does not match -> rename should be a failure
+    boolean renameResult = fs.rename(path1, path2);
+    assertEquals(false, renameResult);
+
+    // validating stat counters after rename
+    Long connMadeAfterRename = counter.getIOStatistics().counters().

Review Comment:
   you can use IOStatisticAssertions here, such as to get the counter (with 
asserts that the value is there), and creating an assertJ assertion chain from 
a value.
   
   
   ```
   long connMadeBeforeRename = lookupCounterStatistic(iostats, 
STORE_IO_REQUEST.getSymbol())
   
   ...
    assertThatStatisticCounter(iostats,
       STORE_IO_REQUEST.getSymbol())
      .isEqualTo(1 + connMadeBeforeRename);
   ```
   





> ABFS: Enable config controlled ETag check for Rename idempotency
> ----------------------------------------------------------------
>
>                 Key: HADOOP-18012
>                 URL: https://issues.apache.org/jira/browse/HADOOP-18012
>             Project: Hadoop Common
>          Issue Type: Sub-task
>          Components: fs/azure
>    Affects Versions: 3.3.2
>            Reporter: Sneha Vijayarajan
>            Assignee: Sree Bhattacharyya
>            Priority: Major
>              Labels: pull-request-available
>
> ABFS driver has a handling for rename idempotency which relies on LMT of the 
> destination file to conclude if the rename was successful or not when source 
> file is absent and if the rename request had entered retry loop.
> This handling is incorrect as LMT of the destination does not change on 
> rename. 
> This Jira will track the change to undo the current implementation and add a 
> new one where for an incoming rename operation, source file eTag is fetched 
> first and then rename is done only if eTag matches for the source file.
> As this is going to be a costly operation given an extra HEAD request is 
> added to each rename, this implementation will be guarded over a config and 
> can enabled by customers who have workloads that do multiple renames. 
> Long term plan to handle rename idempotency without HEAD request is being 
> discussed.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to