saxenapranav commented on code in PR #5488:
URL: https://github.com/apache/hadoop/pull/5488#discussion_r1145873137
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java:
##########
@@ -78,6 +79,8 @@
import static
org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*;
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*;
import static
org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND;
+import static
org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND;
+import static org.eclipse.jetty.util.StringUtil.isEmpty;
Review Comment:
lets use isEmpty from hadoop-common.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java:
##########
@@ -503,27 +512,50 @@ public AbfsRestOperation breakLease(final String path,
* took place.
* As rename recovery is only attempted if the source etag is non-empty,
* in normal rename operations rename recovery will never happen.
- * @param source path to source file
- * @param destination destination of rename.
- * @param continuation continuation.
- * @param tracingContext trace context
- * @param sourceEtag etag of source file. may be null or empty
+ *
+ * @param source path to source file
+ * @param destination destination of rename.
+ * @param continuation continuation.
+ * @param tracingContext trace context
+ * @param sourceEtag etag of source file. may be null or empty
* @param isMetadataIncompleteState was there a rename failure due to
* incomplete metadata state?
* @return AbfsClientRenameResult result of rename operation indicating the
* AbfsRest operation, rename recovery and incomplete metadata state failure.
* @throws AzureBlobFileSystemException failure, excluding any recovery from
overload failures.
*/
public AbfsClientRenameResult renamePath(
- final String source,
- final String destination,
- final String continuation,
- final TracingContext tracingContext,
- final String sourceEtag,
- boolean isMetadataIncompleteState)
- throws AzureBlobFileSystemException {
+ final String source,
+ final String destination,
+ final String continuation,
+ final TracingContext tracingContext,
+ String sourceEtag,
+ boolean isMetadataIncompleteState,
+ boolean isNamespaceEnabled)
+ throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
+ final boolean hasEtag = !isEmpty(sourceEtag);
+ boolean isDir = false;
Review Comment:
why isDir can't be true?
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java:
##########
@@ -556,99 +583,150 @@ public AbfsClientRenameResult renamePath(
// isMetadataIncompleteState is used for renameRecovery(as the 2nd
param).
return new AbfsClientRenameResult(op, isMetadataIncompleteState,
isMetadataIncompleteState);
} catch (AzureBlobFileSystemException e) {
- // If we have no HTTP response, throw the original exception.
- if (!op.hasResult()) {
- throw e;
- }
+ // If we have no HTTP response, throw the original exception.
+ if (!op.hasResult()) {
+ throw e;
+ }
- // ref: HADOOP-18242. Rename failure occurring due to a rare case of
- // tracking metadata being in incomplete state.
- if (op.getResult().getStorageErrorCode()
- .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode())
- && !isMetadataIncompleteState) {
- //Logging
- ABFS_METADATA_INCOMPLETE_RENAME_FAILURE
- .info("Rename Failure attempting to resolve tracking metadata
state and retrying.");
-
- // Doing a HEAD call resolves the incomplete metadata state and
- // then we can retry the rename operation.
- AbfsRestOperation sourceStatusOp = getPathStatus(source, false,
- tracingContext);
- isMetadataIncompleteState = true;
- // Extract the sourceEtag, using the status Op, and set it
- // for future rename recovery.
- AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult();
- String sourceEtagAfterFailure =
extractEtagHeader(sourceStatusResult);
- renamePath(source, destination, continuation, tracingContext,
- sourceEtagAfterFailure, isMetadataIncompleteState);
- }
- // if we get out of the condition without a successful rename, then
- // it isn't metadata incomplete state issue.
- isMetadataIncompleteState = false;
-
- boolean etagCheckSucceeded = renameIdempotencyCheckOp(
- source,
- sourceEtag, op, destination, tracingContext);
- if (!etagCheckSucceeded) {
- // idempotency did not return different result
- // throw back the exception
- throw e;
- }
+ // ref: HADOOP-18242. Rename failure occurring due to a rare case of
+ // tracking metadata being in incomplete state.
+ if (op.getResult().getStorageErrorCode()
+ .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode())
+ && !isMetadataIncompleteState) {
+ //Logging
+ ABFS_METADATA_INCOMPLETE_RENAME_FAILURE
+ .info("Rename Failure attempting to resolve tracking metadata
state and retrying.");
+
+ // Doing a HEAD call resolves the incomplete metadata state and
+ // then we can retry the rename operation.
+ AbfsRestOperation sourceStatusOp = getPathStatus(source, false,
+ tracingContext);
+ isMetadataIncompleteState = true;
+ // Extract the sourceEtag, using the status Op, and set it
+ // for future rename recovery.
+ AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult();
+ String sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult);
+ renamePath(source, destination, continuation, tracingContext,
+ sourceEtagAfterFailure, isMetadataIncompleteState,
isNamespaceEnabled);
+ }
+ // if we get out of the condition without a successful rename, then
+ // it isn't metadata incomplete state issue.
+ isMetadataIncompleteState = false;
+
+ boolean etagCheckSucceeded = renameIdempotencyCheckOp(
+ source,
+ sourceEtag, op, destination, tracingContext, isDir);
+ if (!etagCheckSucceeded) {
+ // idempotency did not return different result
+ // throw back the exception
+ throw e;
+ }
return new AbfsClientRenameResult(op, true, isMetadataIncompleteState);
}
}
+ private boolean checkIsDir(AbfsHttpOperation result) {
+ String resourceType = result.getResponseHeader(
+ HttpHeaderConfigurations.X_MS_RESOURCE_TYPE);
+ return resourceType != null
+ && resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY);
+ }
+
+ @VisibleForTesting
+ AbfsRestOperation createRenameRestOperation(URL url, List<AbfsHttpHeader>
requestHeaders) {
+ AbfsRestOperation op = new AbfsRestOperation(
+ AbfsRestOperationType.RenamePath,
+ this,
+ HTTP_METHOD_PUT,
+ url,
+ requestHeaders);
+ return op;
+ }
+
private void incrementAbfsRenamePath() {
abfsCounters.incrementCounter(RENAME_PATH_ATTEMPTS, 1);
}
/**
* Check if the rename request failure is post a retry and if earlier rename
* request might have succeeded at back-end.
- *
+ * <p>
Review Comment:
closing tag.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java:
##########
@@ -1914,7 +1916,7 @@ public AbfsClient getClient() {
}
@VisibleForTesting
- void setClient(AbfsClient client) {
+ public void setClient(AbfsClient client) {
Review Comment:
lets not public it.
because fs.getAbfsStore is also public.
anyone can change the client.
Better to add new tests in org.apach.hadoop.fs.azurebfs
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java:
##########
@@ -556,99 +583,150 @@ public AbfsClientRenameResult renamePath(
// isMetadataIncompleteState is used for renameRecovery(as the 2nd
param).
return new AbfsClientRenameResult(op, isMetadataIncompleteState,
isMetadataIncompleteState);
} catch (AzureBlobFileSystemException e) {
- // If we have no HTTP response, throw the original exception.
- if (!op.hasResult()) {
- throw e;
- }
+ // If we have no HTTP response, throw the original exception.
+ if (!op.hasResult()) {
+ throw e;
+ }
- // ref: HADOOP-18242. Rename failure occurring due to a rare case of
- // tracking metadata being in incomplete state.
- if (op.getResult().getStorageErrorCode()
- .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode())
- && !isMetadataIncompleteState) {
- //Logging
- ABFS_METADATA_INCOMPLETE_RENAME_FAILURE
- .info("Rename Failure attempting to resolve tracking metadata
state and retrying.");
-
- // Doing a HEAD call resolves the incomplete metadata state and
- // then we can retry the rename operation.
- AbfsRestOperation sourceStatusOp = getPathStatus(source, false,
- tracingContext);
- isMetadataIncompleteState = true;
- // Extract the sourceEtag, using the status Op, and set it
- // for future rename recovery.
- AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult();
- String sourceEtagAfterFailure =
extractEtagHeader(sourceStatusResult);
- renamePath(source, destination, continuation, tracingContext,
- sourceEtagAfterFailure, isMetadataIncompleteState);
- }
- // if we get out of the condition without a successful rename, then
- // it isn't metadata incomplete state issue.
- isMetadataIncompleteState = false;
-
- boolean etagCheckSucceeded = renameIdempotencyCheckOp(
- source,
- sourceEtag, op, destination, tracingContext);
- if (!etagCheckSucceeded) {
- // idempotency did not return different result
- // throw back the exception
- throw e;
- }
+ // ref: HADOOP-18242. Rename failure occurring due to a rare case of
+ // tracking metadata being in incomplete state.
+ if (op.getResult().getStorageErrorCode()
+ .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode())
+ && !isMetadataIncompleteState) {
+ //Logging
+ ABFS_METADATA_INCOMPLETE_RENAME_FAILURE
+ .info("Rename Failure attempting to resolve tracking metadata
state and retrying.");
+
+ // Doing a HEAD call resolves the incomplete metadata state and
+ // then we can retry the rename operation.
+ AbfsRestOperation sourceStatusOp = getPathStatus(source, false,
+ tracingContext);
+ isMetadataIncompleteState = true;
+ // Extract the sourceEtag, using the status Op, and set it
+ // for future rename recovery.
+ AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult();
+ String sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult);
+ renamePath(source, destination, continuation, tracingContext,
+ sourceEtagAfterFailure, isMetadataIncompleteState,
isNamespaceEnabled);
+ }
+ // if we get out of the condition without a successful rename, then
+ // it isn't metadata incomplete state issue.
+ isMetadataIncompleteState = false;
+
+ boolean etagCheckSucceeded = renameIdempotencyCheckOp(
+ source,
+ sourceEtag, op, destination, tracingContext, isDir);
+ if (!etagCheckSucceeded) {
+ // idempotency did not return different result
+ // throw back the exception
+ throw e;
+ }
return new AbfsClientRenameResult(op, true, isMetadataIncompleteState);
}
}
+ private boolean checkIsDir(AbfsHttpOperation result) {
+ String resourceType = result.getResponseHeader(
+ HttpHeaderConfigurations.X_MS_RESOURCE_TYPE);
+ return resourceType != null
+ && resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY);
+ }
+
+ @VisibleForTesting
+ AbfsRestOperation createRenameRestOperation(URL url, List<AbfsHttpHeader>
requestHeaders) {
+ AbfsRestOperation op = new AbfsRestOperation(
+ AbfsRestOperationType.RenamePath,
+ this,
+ HTTP_METHOD_PUT,
+ url,
+ requestHeaders);
+ return op;
+ }
+
private void incrementAbfsRenamePath() {
abfsCounters.incrementCounter(RENAME_PATH_ATTEMPTS, 1);
}
/**
* Check if the rename request failure is post a retry and if earlier rename
* request might have succeeded at back-end.
- *
+ * <p>
* If a source etag was passed in, and the error was 404, get the
* etag of any file at the destination.
* If it matches the source etag, then the rename is considered
* a success.
* Exceptions raised in the probe of the destination are swallowed,
* so that they do not interfere with the original rename failures.
- * @param source source path
- * @param op Rename request REST operation response with non-null HTTP
response
- * @param destination rename destination path
- * @param sourceEtag etag of source file. may be null or empty
+ *
+ * @param source source path
+ * @param op Rename request REST operation response with
non-null HTTP response
+ * @param destination rename destination path
+ * @param sourceEtag etag of source file. may be null or empty
* @param tracingContext Tracks identifiers for request header
* @return true if the file was successfully copied
*/
public boolean renameIdempotencyCheckOp(
- final String source,
- final String sourceEtag,
- final AbfsRestOperation op,
- final String destination,
- TracingContext tracingContext) {
+ final String source,
+ final String sourceEtag,
+ final AbfsRestOperation op,
+ final String destination,
+ TracingContext tracingContext,
+ final boolean isDir) {
Preconditions.checkArgument(op.hasResult(), "Operations has null HTTP
response");
- if ((op.isARetriedRequest())
- && (op.getResult().getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND)
- && isNotEmpty(sourceEtag)) {
+ LOG.debug("rename({}, {}) failure {}; retry={} isDir {} etag {}",
+ source, destination, op.getResult().getStatusCode(),
op.isARetriedRequest(), isDir, sourceEtag);
+ if (!(op.isARetriedRequest()
+ && (op.getResult().getStatusCode() ==
HttpURLConnection.HTTP_NOT_FOUND))) {
+ // only attempt recovery if the failure was a 404 on a retried rename
request.
+ return false;
+ }
- // Server has returned HTTP 404, which means rename source no longer
- // exists. Check on destination status and if its etag matches
- // that of the source, consider it to be a success.
- LOG.debug("rename {} to {} failed, checking etag of destination",
- source, destination);
+ if (isDir) {
+ // preservation of eTag in rename is inconsistent
+ // across different mechanisms of dir creation -
+ // creating through mkdirs preserves eTag and
+ // implicit creation of dir when creating a child file
+ // does not preserve eTag on rename.
+ // Hence directory recovery is not supported.
+ // log and fail.
+ LOG.info("rename directory {} to {} failed; unable to recover",
+ source, destination);
+ return false;
+ }
+ if (isNotEmpty(sourceEtag)) {
Review Comment:
should there be check on namespace. because on non-hns, etag is not
preserved.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java:
##########
@@ -503,27 +512,50 @@ public AbfsRestOperation breakLease(final String path,
* took place.
* As rename recovery is only attempted if the source etag is non-empty,
* in normal rename operations rename recovery will never happen.
- * @param source path to source file
- * @param destination destination of rename.
- * @param continuation continuation.
- * @param tracingContext trace context
- * @param sourceEtag etag of source file. may be null or empty
+ *
+ * @param source path to source file
+ * @param destination destination of rename.
+ * @param continuation continuation.
+ * @param tracingContext trace context
+ * @param sourceEtag etag of source file. may be null or empty
* @param isMetadataIncompleteState was there a rename failure due to
* incomplete metadata state?
* @return AbfsClientRenameResult result of rename operation indicating the
* AbfsRest operation, rename recovery and incomplete metadata state failure.
* @throws AzureBlobFileSystemException failure, excluding any recovery from
overload failures.
*/
public AbfsClientRenameResult renamePath(
- final String source,
- final String destination,
- final String continuation,
- final TracingContext tracingContext,
- final String sourceEtag,
- boolean isMetadataIncompleteState)
- throws AzureBlobFileSystemException {
+ final String source,
+ final String destination,
+ final String continuation,
+ final TracingContext tracingContext,
+ String sourceEtag,
+ boolean isMetadataIncompleteState,
+ boolean isNamespaceEnabled)
Review Comment:
lets not have isNamespaceEnabled as argument. reason being, it can not be
either true for some case and false for some case. it would have same value for
the whole lifecyle of abfsclient. Can we flow this field from
azureBlobFileSystemStore and have it a private field to the object.
##########
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java:
##########
@@ -18,19 +18,52 @@
package org.apache.hadoop.fs.azurebfs.services;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.SocketException;
+import java.net.URL;
+import java.time.Duration;
+import java.util.List;
+import java.util.Optional;
+import javax.net.ssl.HttpsURLConnection;
+
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys;
+import org.apache.hadoop.fs.statistics.IOStatisticAssertions;
+import org.apache.hadoop.fs.statistics.IOStatistics;
+import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore;
import org.assertj.core.api.Assertions;
+import org.junit.Assume;
import org.junit.Test;
+import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.fs.Path;
+
+import org.apache.hadoop.fs.EtagSource;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest;
import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;
+import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore;
+import org.apache.hadoop.fs.azurebfs.commit.ResilientCommitByRename;
+import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations;
import
org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
import
org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
+import org.apache.hadoop.fs.azurebfs.AbfsStatistic;
+import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode;
+import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
import static
org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT;
-import static
org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND;
+import static
org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.*;
Review Comment:
wildcard. Please switch it off on the IDE :).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]