[
https://issues.apache.org/jira/browse/HADOOP-18872?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17783302#comment-17783302
]
ASF GitHub Bot commented on HADOOP-18872:
-----------------------------------------
steveloughran commented on code in PR #6019:
URL: https://github.com/apache/hadoop/pull/6019#discussion_r1383702079
##########
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java:
##########
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.assertj.core.api.Assertions;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
+import org.apache.hadoop.util.functional.FunctionRaisingIOE;
+
+import static java.net.HttpURLConnection.HTTP_OK;
+import static
org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_GET;
+import static org.apache.hadoop.fs.azurebfs.services.AuthType.OAuth;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.ArgumentMatchers.nullable;
+
+/**
+ * Utility class to help defining mock behavior on AbfsClient and
AbfsRestOperation
+ * objects which are protected inside services package.
+ */
+public final class AbfsClientTestUtil {
+
+ private AbfsClientTestUtil() {
+
+ }
+
+ public static void setMockAbfsRestOperationForListPathOperation(
+ final AbfsClient spiedClient,
+ FunctionRaisingIOE<AbfsHttpOperation, AbfsHttpOperation>
functionRaisingIOE)
+ throws Exception {
+ ExponentialRetryPolicy retryPolicy =
Mockito.mock(ExponentialRetryPolicy.class);
+ AbfsHttpOperation httpOperation = Mockito.mock(AbfsHttpOperation.class);
+ AbfsRestOperation abfsRestOperation = Mockito.spy(new AbfsRestOperation(
+ AbfsRestOperationType.ListPaths,
+ spiedClient,
+ HTTP_METHOD_GET,
+ null,
+ new ArrayList<>()
+ ));
+
+ Mockito.doReturn(abfsRestOperation).when(spiedClient).getAbfsRestOperation(
+ eq(AbfsRestOperationType.ListPaths), any(), any(), any());
+
+ addMockBehaviourToAbfsClient(spiedClient, retryPolicy);
+ addMockBehaviourToRestOpAndHttpOp(abfsRestOperation, httpOperation);
+
+ functionRaisingIOE.apply(httpOperation);
+ }
+
+ public static void addMockBehaviourToRestOpAndHttpOp(final AbfsRestOperation
abfsRestOperation,
Review Comment:
the title isn't descriptive enough: what mock behaviour? add some javadocs
atlest.
##########
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java:
##########
@@ -62,34 +80,101 @@ public ITestAzureBlobFileSystemListStatus() throws
Exception {
public void testListPath() throws Exception {
Configuration config = new Configuration(this.getRawConfiguration());
config.set(AZURE_LIST_MAX_RESULTS, "5000");
- final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem
- .newInstance(getFileSystem().getUri(), config);
- final List<Future<Void>> tasks = new ArrayList<>();
-
- ExecutorService es = Executors.newFixedThreadPool(10);
- for (int i = 0; i < TEST_FILES_NUMBER; i++) {
- final Path fileName = new Path("/test" + i);
- Callable<Void> callable = new Callable<Void>() {
- @Override
- public Void call() throws Exception {
- touch(fileName);
- return null;
- }
- };
-
- tasks.add(es.submit(callable));
- }
-
- for (Future<Void> task : tasks) {
- task.get();
+ try (final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem
+ .newInstance(getFileSystem().getUri(), config)) {
+ final List<Future<Void>> tasks = new ArrayList<>();
+
+ ExecutorService es = Executors.newFixedThreadPool(10);
+ for (int i = 0; i < TEST_FILES_NUMBER; i++) {
+ final Path fileName = new Path("/test" + i);
+ Callable<Void> callable = new Callable<Void>() {
+ @Override
+ public Void call() throws Exception {
+ touch(fileName);
+ return null;
+ }
+ };
+
+ tasks.add(es.submit(callable));
+ }
+
+ for (Future<Void> task : tasks) {
+ task.get();
+ }
+
+ es.shutdownNow();
+ fs.registerListener(
+ new
TracingHeaderValidator(getConfiguration().getClientCorrelationId(),
+ fs.getFileSystemId(), FSOperationType.LISTSTATUS, true,
0));
+ FileStatus[] files = fs.listStatus(new Path("/"));
+ assertEquals(TEST_FILES_NUMBER, files.length /* user directory */);
}
+ }
- es.shutdownNow();
- fs.registerListener(
- new TracingHeaderValidator(getConfiguration().getClientCorrelationId(),
- fs.getFileSystemId(), FSOperationType.LISTSTATUS, true, 0));
- FileStatus[] files = fs.listStatus(new Path("/"));
- assertEquals(TEST_FILES_NUMBER, files.length /* user directory */);
+ /**
+ * Test to verify that each paginated call to ListBlobs uses a new tracing
context.
+ * @throws Exception
+ */
+ @Test
+ public void testListPathTracingContext() throws Exception {
+ final AzureBlobFileSystem fs = getFileSystem();
+ final AzureBlobFileSystem spiedFs = Mockito.spy(fs);
+ final AzureBlobFileSystemStore spiedStore = Mockito.spy(fs.getAbfsStore());
+ final AbfsClient spiedClient = Mockito.spy(fs.getAbfsClient());
+ final TracingContext spiedTracingContext = Mockito.spy(
+ new TracingContext(
+ fs.getClientCorrelationId(), fs.getFileSystemId(),
+ FSOperationType.LISTSTATUS, true,
TracingHeaderFormat.ALL_ID_FORMAT, null));
+
+ Mockito.doReturn(spiedStore).when(spiedFs).getAbfsStore();
+ spiedStore.setClient(spiedClient);
+ spiedFs.setWorkingDirectory(new Path("/"));
+
+
AbfsClientTestUtil.setMockAbfsRestOperationForListPathOperation(spiedClient,
+ (httpOperation) -> {
+
+ ListResultEntrySchema entry = new ListResultEntrySchema()
+ .withName("a")
+ .withIsDirectory(true);
+ List<ListResultEntrySchema> paths = new ArrayList<>();
+ paths.add(entry);
+ paths.clear();
+ entry = new ListResultEntrySchema()
+ .withName("abc.txt")
+ .withIsDirectory(false);
+ paths.add(entry);
+ ListResultSchema schema1 = new ListResultSchema().withPaths(paths);
+ ListResultSchema schema2 = new ListResultSchema().withPaths(paths);
+
+ when(httpOperation.getListResultSchema()).thenReturn(schema1)
+ .thenReturn(schema2);
+ when(httpOperation.getResponseHeader(
+ HttpHeaderConfigurations.X_MS_CONTINUATION))
+ .thenReturn(TEST_CONTINUATION_TOKEN)
+ .thenReturn(EMPTY_STRING);
+
+ Stubber stubber = Mockito.doThrow(
+ new SocketTimeoutException(CONNECTION_TIMEOUT_JDK_MESSAGE));
+ stubber.doNothing().when(httpOperation).processResponse(
+ nullable(byte[].class), nullable(int.class),
nullable(int.class));
+
+
when(httpOperation.getStatusCode()).thenReturn(-1).thenReturn(HTTP_OK);
+ return httpOperation;
+ });
+
+ List<FileStatus> fileStatuses = new ArrayList<>();
+ spiedStore.listStatus(new Path("/"), "", fileStatuses, true, null,
spiedTracingContext);
+
+ // Assert that there were 2 paginated ListPath calls were made.
Review Comment:
is this really verify 2 calls, or 1? I don't care which, only that the
comment is consistent
##########
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java:
##########
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.assertj.core.api.Assertions;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
+import org.apache.hadoop.util.functional.FunctionRaisingIOE;
+
+import static java.net.HttpURLConnection.HTTP_OK;
+import static
org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_GET;
+import static org.apache.hadoop.fs.azurebfs.services.AuthType.OAuth;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.ArgumentMatchers.nullable;
+
+/**
+ * Utility class to help defining mock behavior on AbfsClient and
AbfsRestOperation
+ * objects which are protected inside services package.
+ */
+public final class AbfsClientTestUtil {
+
+ private AbfsClientTestUtil() {
+
+ }
+
+ public static void setMockAbfsRestOperationForListPathOperation(
+ final AbfsClient spiedClient,
+ FunctionRaisingIOE<AbfsHttpOperation, AbfsHttpOperation>
functionRaisingIOE)
+ throws Exception {
+ ExponentialRetryPolicy retryPolicy =
Mockito.mock(ExponentialRetryPolicy.class);
+ AbfsHttpOperation httpOperation = Mockito.mock(AbfsHttpOperation.class);
+ AbfsRestOperation abfsRestOperation = Mockito.spy(new AbfsRestOperation(
+ AbfsRestOperationType.ListPaths,
+ spiedClient,
+ HTTP_METHOD_GET,
+ null,
+ new ArrayList<>()
+ ));
+
+ Mockito.doReturn(abfsRestOperation).when(spiedClient).getAbfsRestOperation(
+ eq(AbfsRestOperationType.ListPaths), any(), any(), any());
+
+ addMockBehaviourToAbfsClient(spiedClient, retryPolicy);
+ addMockBehaviourToRestOpAndHttpOp(abfsRestOperation, httpOperation);
+
+ functionRaisingIOE.apply(httpOperation);
+ }
+
+ public static void addMockBehaviourToRestOpAndHttpOp(final AbfsRestOperation
abfsRestOperation,
+ final AbfsHttpOperation httpOperation) throws IOException {
+ HttpURLConnection httpURLConnection =
Mockito.mock(HttpURLConnection.class);
+ Mockito.doNothing().when(httpURLConnection)
+ .setRequestProperty(nullable(String.class), nullable(String.class));
+ Mockito.doReturn(httpURLConnection).when(httpOperation).getConnection();
+ Mockito.doReturn("").when(abfsRestOperation).getClientLatency();
+
Mockito.doReturn(httpOperation).when(abfsRestOperation).createHttpOperation();
+ }
+
+ public static void addMockBehaviourToAbfsClient(final AbfsClient abfsClient,
Review Comment:
again, clarify what mock behavior, including a clearer title
> ABFS: Misreporting Retry Count for Sub-sequential and Parallel Operations
> -------------------------------------------------------------------------
>
> Key: HADOOP-18872
> URL: https://issues.apache.org/jira/browse/HADOOP-18872
> Project: Hadoop Common
> Issue Type: Sub-task
> Components: build
> Affects Versions: 3.3.6
> Reporter: Anmol Asrani
> Assignee: Anuj Modi
> Priority: Major
> Labels: Bug, pull-request-available
>
> There was a bug identified where retry count in the client correlation id was
> wrongly reported for sub-sequential and parallel operations triggered by a
> single file system call. This was due to reusing same tracing context for all
> such calls.
> We create a new tracing context as soon as HDFS call comes. We keep on
> passing that same TC for all the client calls.
> For instance, when we get a createFile call, we first call metadata
> operations. If those metadata operations somehow succeeded after a few
> retries, the tracing context will have that many retry count in it. Now when
> actual call for create is made, same retry count will be used to construct
> the headers(clientCorrelationId). Alhough the create operation never failed,
> we will still see retry count from the previous request.
> Fix is to use a new tracing context object for all the network calls made.
> All the sub-sequential and parallel operations will have same primary request
> Id to correlate them, yet they will have their own tracing of retry count.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]