[
https://issues.apache.org/jira/browse/HADOOP-16202?focusedWorklogId=758606&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-758606
]
ASF GitHub Bot logged work on HADOOP-16202:
-------------------------------------------
Author: ASF GitHub Bot
Created on: 19/Apr/22 16:34
Start Date: 19/Apr/22 16:34
Worklog Time Spent: 10m
Work Description: steveloughran commented on code in PR #2584:
URL: https://github.com/apache/hadoop/pull/2584#discussion_r853274540
##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java:
##########
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.performance;
+
+
+import java.io.EOFException;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3ATestUtils;
+import org.apache.hadoop.fs.statistics.IOStatistics;
+
+import static
org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY;
+import static
org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL;
+import static
org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.readStream;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile;
+import static org.apache.hadoop.fs.s3a.Statistic.STREAM_READ_BYTES_READ_CLOSE;
+import static org.apache.hadoop.fs.s3a.Statistic.STREAM_READ_OPENED;
+import static
org.apache.hadoop.fs.s3a.Statistic.STREAM_READ_SEEK_BYTES_SKIPPED;
+import static org.apache.hadoop.fs.s3a.performance.OperationCost.NO_IO;
+import static
org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertDurationRange;
+import static
org.apache.hadoop.fs.statistics.IOStatisticAssertions.extractStatistics;
+import static
org.apache.hadoop.fs.statistics.IOStatisticAssertions.verifyStatisticCounterValue;
+import static
org.apache.hadoop.fs.statistics.IOStatisticsLogging.demandStringifyIOStatistics;
+import static
org.apache.hadoop.fs.statistics.StoreStatisticNames.ACTION_FILE_OPENED;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Cost of openFile().
+ */
+public class ITestS3AOpenCost extends AbstractS3ACostTest {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(ITestS3AOpenCost.class);
+
+ private Path testFile;
+
+ private FileStatus testFileStatus;
+
+ private long fileLength;
+
+ public ITestS3AOpenCost() {
+ super(true);
+ }
+
+ /**
+ * Setup creates a test file, saves is status and length
+ * to fields.
+ */
+ @Override
+ public void setup() throws Exception {
+ super.setup();
+ S3AFileSystem fs = getFileSystem();
+ testFile = methodPath();
+
+ writeTextFile(fs, testFile, "openfile", true);
+ testFileStatus = fs.getFileStatus(testFile);
+ fileLength = testFileStatus.getLen();
+ }
+
+ /**
+ * Test when openFile() performs GET requests when file status
+ * and length options are passed down.
+ * Note that the input streams only update the FS statistics
+ * in close(), so metrics cannot be verified until all operations
+ * on a stream are complete.
+ * This is slightly less than ideal.
+ */
+ @Test
+ public void testOpenFileWithStatusOfOtherFS() throws Throwable {
+ describe("Test cost of openFile with/without status; raw only");
+ S3AFileSystem fs = getFileSystem();
+
+ // now read that file back in using the openFile call.
+ // with a new FileStatus and a different path.
+ // this verifies that any FileStatus class/subclass is used
+ // as a source of the file length.
+ FileStatus st2 = new FileStatus(
+ fileLength, false,
+ testFileStatus.getReplication(),
+ testFileStatus.getBlockSize(),
+ testFileStatus.getModificationTime(),
+ testFileStatus.getAccessTime(),
+ testFileStatus.getPermission(),
+ testFileStatus.getOwner(),
+ testFileStatus.getGroup(),
+ new Path("gopher:///localhost/" + testFile.getName()));
+
+ // no IO in open
+ FSDataInputStream in = verifyMetrics(() ->
+ fs.openFile(testFile)
+ .withFileStatus(st2)
+ .build()
+ .get(),
+ always(NO_IO),
+ with(STREAM_READ_OPENED, 0));
+
+ // the stream gets opened during read
+ long readLen = verifyMetrics(() ->
+ readStream(in),
+ always(NO_IO),
Review Comment:
...added a new cost and changed all refs in this test suite to make clear
Issue Time Tracking
-------------------
Worklog Id: (was: 758606)
Time Spent: 21h 10m (was: 21h)
> Enhance openFile() for better read performance against object stores
> ---------------------------------------------------------------------
>
> Key: HADOOP-16202
> URL: https://issues.apache.org/jira/browse/HADOOP-16202
> Project: Hadoop Common
> Issue Type: Bug
> Components: fs, fs/s3, tools/distcp
> Affects Versions: 3.3.0
> Reporter: Steve Loughran
> Assignee: Steve Loughran
> Priority: Major
> Labels: pull-request-available
> Time Spent: 21h 10m
> Remaining Estimate: 0h
>
> The {{openFile()}} builder API lets us add new options when reading a file
> Add an option {{"fs.s3a.open.option.length"}} which takes a long and allows
> the length of the file to be declared. If set, *no check for the existence of
> the file is issued when opening the file*
> Also: withFileStatus() to take any FileStatus implementation, rather than
> only S3AFileStatus -and not check that the path matches the path being
> opened. Needed to support viewFS-style wrapping and mounting.
> and Adopt where appropriate to stop clusters with S3A reads switched to
> random IO from killing download/localization
> * fs shell copyToLocal
> * distcp
> * IOUtils.copy
--
This message was sent by Atlassian Jira
(v8.20.7#820007)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]