[hadoop] branch branch-2.10 updated: YARN-11197.Backport YARN-9608 - DecommissioningNodesWatcher should get lists of running applications on node from RMNode. (#4500)
This is an automated email from the ASF dual-hosted git repository. prabhujoseph pushed a commit to branch branch-2.10 in repository https://gitbox.apache.org/repos/asf/hadoop.git The following commit(s) were added to refs/heads/branch-2.10 by this push: new 24b86cc84e5 YARN-11197.Backport YARN-9608 - DecommissioningNodesWatcher should get lists of running applications on node from RMNode. (#4500) 24b86cc84e5 is described below commit 24b86cc84e552a316ef3efda1f88ee0b7f768b77 Author: Ashutosh Gupta AuthorDate: Tue Jun 28 05:03:55 2022 +0100 YARN-11197.Backport YARN-9608 - DecommissioningNodesWatcher should get lists of running applications on node from RMNode. (#4500) Co-authored-by: Ashutosh Gupta --- .../DecommissioningNodesWatcher.java | 47 ++--- .../TestDecommissioningNodesWatcher.java | 106 + 2 files changed, 95 insertions(+), 58 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java index ca3eb798414..707a8fb1ec2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java @@ -17,9 +17,11 @@ */ package org.apache.hadoop.yarn.server.resourcemanager; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.Timer; @@ -36,7 +38,6 @@ import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType; @@ -58,13 +59,8 @@ import org.apache.hadoop.yarn.util.MonotonicClock; * a DECOMMISSIONING node will be DECOMMISSIONED no later than * DECOMMISSIONING_TIMEOUT regardless of running containers or applications. * - * To be efficient, DecommissioningNodesWatcher skip tracking application - * containers on a particular node before the node is in DECOMMISSIONING state. - * It only tracks containers once the node is in DECOMMISSIONING state. * DecommissioningNodesWatcher basically is no cost when no node is - * DECOMMISSIONING. This sacrifices the possibility that the node once - * host containers of an application that is still running - * (the affected map tasks will be rescheduled). + * DECOMMISSIONING. */ public class DecommissioningNodesWatcher { private static final Log LOG = @@ -88,8 +84,8 @@ public class DecommissioningNodesWatcher { // number of running containers at the moment. private int numActiveContainers; -// All applications run on the node at or after decommissioningStartTime. -private Set appIds; +// All applications run on the node. +private List appIds; // First moment the node is observed in DECOMMISSIONED state. private long decommissionedTime; @@ -102,7 +98,7 @@ public class DecommissioningNodesWatcher { public DecommissioningNodeContext(NodeId nodeId, int timeoutSec) { this.nodeId = nodeId; - this.appIds = new HashSet(); + this.appIds = new ArrayList<>(); this.decommissioningStartTime = mclock.getTime(); this.timeoutMs = 1000L * timeoutSec; } @@ -164,9 +160,7 @@ public class DecommissioningNodesWatcher { context.updateTimeout(rmNode.getDecommissioningTimeout()); context.lastUpdateTime = now; - if (remoteNodeStatus.getKeepAliveApplications() != null) { -context.appIds.addAll(remoteNodeStatus.getKeepAliveApplications()); - } + context.appIds = rmNode.getRunningApps(); // Count number of active containers. int numActiveContainers = 0; @@ -176,14 +170,7 @@ public class DecommissioningNodesWatcher { newState == ContainerState.NEW) { numActiveContainers++; } -context.numActiveContainers = numActiveContainers; -ApplicationId aid = cs.getContainerId() -.getApplicationAttemptId().getApplicationId(); -if (!context.appIds.contains(aid)) { - context.appIds.add(aid
[hadoop] branch trunk updated (43112bd4726 -> a177232ebc3)
This is an automated email from the ASF dual-hosted git repository. prabhujoseph pushed a change to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git from 43112bd4726 HADOOP-18306: Warnings should not be shown on cli console when linux user not present on client (#4474). Contributed by swamirishi. add a177232ebc3 YARN-9822.TimelineCollectorWebService#putEntities blocked when ATSV2 HBase is down (#4492) No new revisions were added by this update. Summary of changes: .../yarn/api/records/timeline/TimelineHealth.java | 4 +- .../DocumentStoreTimelineReaderImpl.java | 4 +- .../DocumentStoreTimelineWriterImpl.java | 8 +- .../storage/HBaseTimelineReaderImpl.java | 2 +- .../storage/HBaseTimelineWriterImpl.java | 14 .../collector/TimelineCollector.java | 85 +++--- .../storage/FileSystemTimelineReaderImpl.java | 2 +- .../storage/FileSystemTimelineWriterImpl.java | 15 .../storage/NoOpTimelineWriterImpl.java| 7 ++ .../timelineservice/storage/TimelineWriter.java| 10 +++ .../collector/TestTimelineCollector.java | 66 - 11 files changed, 196 insertions(+), 21 deletions(-) - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
[hadoop] branch trunk updated: HADOOP-18306: Warnings should not be shown on cli console when linux user not present on client (#4474). Contributed by swamirishi.
This is an automated email from the ASF dual-hosted git repository. umamahesh pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git The following commit(s) were added to refs/heads/trunk by this push: new 43112bd4726 HADOOP-18306: Warnings should not be shown on cli console when linux user not present on client (#4474). Contributed by swamirishi. 43112bd4726 is described below commit 43112bd472661b4044808210a77ae938a120934f Author: swamirishi <47532440+swamiri...@users.noreply.github.com> AuthorDate: Mon Jun 27 17:20:58 2022 -0700 HADOOP-18306: Warnings should not be shown on cli console when linux user not present on client (#4474). Contributed by swamirishi. --- .../java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java index f4db520ac24..d0c4e11cbef 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java @@ -215,7 +215,7 @@ public class ShellBasedUnixGroupsMapping extends Configured groups = resolvePartialGroupNames(user, e.getMessage(), executor.getOutput()); } catch (PartialGroupNameException pge) { - LOG.warn("unable to return groups for user {}", user, pge); + LOG.debug("unable to return groups for user {}", user, pge); return EMPTY_GROUPS_SET; } } - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
[hadoop] branch trunk updated (25f8bdcd210 -> 823f5ee0d4c)
This is an automated email from the ASF dual-hosted git repository. stevel pushed a change to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git from 25f8bdcd210 HADOOP-18308 - Update to Apache LDAP API 2.0.x (#4477) add 823f5ee0d4c HADOOP-18242. ABFS Rename Failure when tracking metadata is in an incomplete state (#4331) No new revisions were added by this update. Summary of changes: .../hadoop/fs/azurebfs/AbfsCountersImpl.java | 6 +- .../apache/hadoop/fs/azurebfs/AbfsStatistic.java | 11 +- .../hadoop/fs/azurebfs/AzureBlobFileSystem.java| 2 +- .../fs/azurebfs/AzureBlobFileSystemStore.java | 30 - .../hadoop/fs/azurebfs/services/AbfsClient.java| 58 - .../azurebfs/services/AbfsClientRenameResult.java | 61 + .../ITestAzureBlobFileSystemDelegationSAS.java | 6 +- .../azurebfs/ITestAzureBlobFileSystemRename.java | 31 + .../fs/azurebfs/ITestCustomerProvidedKey.java | 4 +- .../services/TestAbfsRenameRetryRecovery.java | 139 + 10 files changed, 328 insertions(+), 20 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
[hadoop] 04/05: HADOOP-18105 Implement buffer pooling with weak references (#4263)
This is an automated email from the ASF dual-hosted git repository. mthakur pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/hadoop.git commit bfb7d020d10eb232e8efebc7567e4999abcb5df4 Author: Mukund Thakur AuthorDate: Thu Jun 2 03:38:06 2022 +0530 HADOOP-18105 Implement buffer pooling with weak references (#4263) part of HADOOP-18103. Required for vectored IO feature. None of current buffer pool implementation is complete. ElasticByteBufferPool doesn't use weak references and could lead to memory leak errors and DirectBufferPool doesn't support caller preferences of direct and heap buffers and has only fixed length buffer implementation. Contributed By: Mukund Thakur --- .../java/org/apache/hadoop/io/ByteBufferPool.java | 5 + .../apache/hadoop/io/ElasticByteBufferPool.java| 4 +- .../io/WeakReferencedElasticByteBufferPool.java| 155 ++ ...estMoreWeakReferencedElasticByteBufferPool.java | 97 + .../TestWeakReferencedElasticByteBufferPool.java | 232 + 5 files changed, 491 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteBufferPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteBufferPool.java index aa5f8731c54..b30e7cfb9c5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteBufferPool.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteBufferPool.java @@ -45,4 +45,9 @@ public interface ByteBufferPool { * @param buffera direct bytebuffer */ void putBuffer(ByteBuffer buffer); + + /** + * Clear the buffer pool thus releasing all the buffers. + */ + default void release() { } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ElasticByteBufferPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ElasticByteBufferPool.java index 6a162c3ff20..c4c29406227 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ElasticByteBufferPool.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ElasticByteBufferPool.java @@ -36,8 +36,8 @@ import org.apache.hadoop.classification.InterfaceStability; */ @InterfaceAudience.Public @InterfaceStability.Stable -public final class ElasticByteBufferPool implements ByteBufferPool { - private static final class Key implements Comparable { +public class ElasticByteBufferPool implements ByteBufferPool { + protected static final class Key implements Comparable { private final int capacity; private final long insertionTime; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WeakReferencedElasticByteBufferPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WeakReferencedElasticByteBufferPool.java new file mode 100644 index 000..c71c44e798a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WeakReferencedElasticByteBufferPool.java @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.lang.ref.WeakReference; +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.VisibleForTesting; + +/** + * Buffer pool implementation which uses weak references to store + * buffers in the pool, such that they are garbage collected when + * there are no references to the buffer during a gc run. This is + * important as direct buffers don't get garbage collected automatically + * during a gc run as they are not stored on heap memory. + * Also the buffers are stored in a tree map which helps in returning + * smallest buffer whose size is just greater than requested length. + * This is a thread safe implementation. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class WeakReferencedElastic
[hadoop] 05/05: HADOOP-18106: Handle memory fragmentation in S3A Vectored IO. (#4445)
This is an automated email from the ASF dual-hosted git repository. mthakur pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/hadoop.git commit c517b086f2c31c3b3891f53699642950575a8716 Author: Mukund Thakur AuthorDate: Tue Jun 21 03:45:40 2022 +0530 HADOOP-18106: Handle memory fragmentation in S3A Vectored IO. (#4445) part of HADOOP-18103. Handling memory fragmentation in S3A vectored IO implementation by allocating smaller user range requested size buffers and directly filling them from the remote S3 stream and skipping undesired data in between ranges. This patch also adds aborting active vectored reads when stream is closed or unbuffer() is called. Contributed By: Mukund Thakur Conflicts: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java --- .../org/apache/hadoop/fs/ChecksumFileSystem.java | 11 +- .../main/java/org/apache/hadoop/fs/FileRange.java | 12 ++ .../org/apache/hadoop/fs/PositionedReadable.java | 4 +- .../org/apache/hadoop/fs/RawLocalFileSystem.java | 16 +- .../org/apache/hadoop/fs/StreamCapabilities.java | 6 + .../hadoop/fs/{impl => }/VectoredReadUtils.java| 91 + .../apache/hadoop/fs/impl/CombinedFileRange.java | 1 - .../apache/hadoop/fs/{ => impl}/FileRangeImpl.java | 9 +- .../site/markdown/filesystem/fsdatainputstream.md | 11 +- .../fs/{impl => }/TestVectoredReadUtils.java | 149 +++ .../contract/AbstractContractVectoredReadTest.java | 198 ++-- .../hadoop/fs/contract/ContractTestUtils.java | 20 ++ .../localfs/TestLocalFSContractVectoredRead.java | 51 + .../org/apache/hadoop/fs/s3a/S3AInputStream.java | 205 ++--- .../contract/s3a/ITestS3AContractVectoredRead.java | 63 ++- .../fs/s3a/scale/AbstractSTestS3AHugeFiles.java| 15 +- .../hadoop/benchmark/VectoredReadBenchmark.java| 4 +- 17 files changed, 609 insertions(+), 257 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java index e612713cfed..b23df1713c0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java @@ -39,7 +39,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.impl.AbstractFSBuilderImpl; -import org.apache.hadoop.fs.impl.VectoredReadUtils; import org.apache.hadoop.fs.impl.CombinedFileRange; import org.apache.hadoop.fs.impl.FutureDataInputStreamBuilderImpl; import org.apache.hadoop.fs.impl.OpenFileParameters; @@ -55,6 +54,7 @@ import org.apache.hadoop.util.Progressable; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; import static org.apache.hadoop.fs.impl.StoreImplementationUtils.isProbeForSyncable; +import static org.apache.hadoop.fs.VectoredReadUtils.sortRanges; / * Abstract Checksumed FileSystem. @@ -166,7 +166,7 @@ public abstract class ChecksumFileSystem extends FilterFileSystem { * It verifies that data matches checksums. ***/ private static class ChecksumFSInputChecker extends FSInputChecker implements - IOStatisticsSource { + IOStatisticsSource, StreamCapabilities { private ChecksumFileSystem fs; private FSDataInputStream datas; private FSDataInputStream sums; @@ -408,7 +408,7 @@ public abstract class ChecksumFileSystem extends FilterFileSystem { int minSeek = minSeekForVectorReads(); int maxSize = maxReadSizeForVectorReads(); List dataRanges = - VectoredReadUtils.sortAndMergeRanges(ranges, bytesPerSum, + VectoredReadUtils.mergeSortedRanges(Arrays.asList(sortRanges(ranges)), bytesPerSum, minSeek, maxReadSizeForVectorReads()); List checksumRanges = findChecksumRanges(dataRanges, bytesPerSum, minSeek, maxSize); @@ -435,6 +435,11 @@ public abstract class ChecksumFileSystem extends FilterFileSystem { } } } + +@Override +public boolean hasCapability(String capability) { + return datas.hasCapability(capability); +} } private static class FSDataBoundedInputStream extends FSDataInputStream { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileRange.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Fi
[hadoop] 01/05: HADOOP-11867. Add a high-performance vectored read API. (#3904)
This is an automated email from the ASF dual-hosted git repository. mthakur pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/hadoop.git commit 5c348c41ab8ddb81146355570856e61e8d129a1e Author: Mukund Thakur AuthorDate: Tue Feb 1 19:52:38 2022 +0530 HADOOP-11867. Add a high-performance vectored read API. (#3904) part of HADOOP-18103. Add support for multiple ranged vectored read api in PositionedReadable. The default iterates through the ranges to read each synchronously, but the intent is that FSDataInputStream subclasses can make more efficient readers especially in object stores implementation. Also added implementation in S3A where smaller ranges are merged and sliced byte buffers are returned to the readers. All the merged ranged are fetched from S3 asynchronously. Contributed By: Owen O'Malley and Mukund Thakur Conflicts: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java pom.xml --- dev-support/Jenkinsfile| 2 +- .../apache/hadoop/fs/BufferedFSInputStream.java| 27 +- .../org/apache/hadoop/fs/ChecksumFileSystem.java | 206 --- .../org/apache/hadoop/fs/FSDataInputStream.java| 22 +- .../main/java/org/apache/hadoop/fs/FileRange.java | 55 +++ .../java/org/apache/hadoop/fs/FileRangeImpl.java | 69 .../org/apache/hadoop/fs/PositionedReadable.java | 43 ++- .../org/apache/hadoop/fs/RawLocalFileSystem.java | 108 +- .../apache/hadoop/fs/impl/CombinedFileRange.java | 71 .../apache/hadoop/fs/impl/VectoredReadUtils.java | 277 +++ .../site/markdown/filesystem/fsdatainputstream.md | 31 ++ .../contract/AbstractContractVectoredReadTest.java | 375 + .../localfs/TestLocalFSContractVectoredRead.java | 35 ++ .../rawlocal/TestRawLocalContractVectoredRead.java | 35 ++ .../hadoop/fs/impl/TestVectoredReadUtils.java | 344 +++ .../java/org/apache/hadoop/test/MoreAsserts.java | 37 +- hadoop-common-project/pom.xml | 1 - hadoop-project/pom.xml | 11 + .../org/apache/hadoop/fs/s3a/S3AFileSystem.java| 14 +- .../org/apache/hadoop/fs/s3a/S3AInputStream.java | 288 ++-- .../contract/s3a/ITestS3AContractVectoredRead.java | 54 +++ .../hadoop/fs/s3a/TestS3AInputStreamRetry.java | 3 +- .../hadoop-aws/src/test/resources/log4j.properties | 2 +- hadoop-tools/hadoop-benchmark/pom.xml | 94 ++ .../hadoop-benchmark/src/main/assembly/uber.xml| 33 ++ .../hadoop-benchmark/src/main/findbugs/exclude.xml | 22 ++ .../hadoop/benchmark/VectoredReadBenchmark.java| 245 ++ .../org/apache/hadoop/benchmark/package-info.java | 22 ++ hadoop-tools/pom.xml | 1 + 29 files changed, 2438 insertions(+), 89 deletions(-) diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 5b6fcb68f8d..9edd77b58e5 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -23,7 +23,7 @@ pipeline { options { buildDiscarder(logRotator(numToKeepStr: '5')) -timeout (time: 24, unit: 'HOURS') +timeout (time: 48, unit: 'HOURS') timestamps() checkoutToSubdirectory('src') } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java index 59345f5d25c..7f3171235c8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -22,6 +22,9 @@ import java.io.EOFException; import java.io.FileDescriptor; import java.io.IOException; import java.util.StringJoiner; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.function.IntFunction; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -158,8 +161,24 @@ public class BufferedFSInputStream extends BufferedInputStream @Override public String toString() { return new StringJoiner(", ", -BufferedFSInputStream.class.getSimpleName() + "[", "]") -.add("in=" + in) -.toString(); +BufferedFSInputStream.class.getSimpleName() + "[", "]") +.add("in=" + in) +.toString(); + } + + @Override + public int minSeekForVectorReads() { +return ((PositionedReadable) in).minSeekForVectorReads(); + } + + @Override + public in
[hadoop] branch branch-3.3 updated (4ba463069bd -> c517b086f2c)
This is an automated email from the ASF dual-hosted git repository. mthakur pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/hadoop.git from 4ba463069bd HADOOP-18288. Total requests and total requests per sec served by RPC servers (#4485) new 5c348c41ab8 HADOOP-11867. Add a high-performance vectored read API. (#3904) new 9f03f879633 HADOOP-18104: S3A: Add configs to configure minSeekForVectorReads and maxReadSizeForVectorReads (#3964) new bb5a17b1771 HADOOP-18107 Adding scale test for vectored reads for large file (#4273) new bfb7d020d10 HADOOP-18105 Implement buffer pooling with weak references (#4263) new c517b086f2c HADOOP-18106: Handle memory fragmentation in S3A Vectored IO. (#4445) The 5 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: dev-support/Jenkinsfile| 2 +- .../apache/hadoop/fs/BufferedFSInputStream.java| 27 +- .../org/apache/hadoop/fs/ChecksumFileSystem.java | 213 +-- .../org/apache/hadoop/fs/FSDataInputStream.java| 22 +- .../main/java/org/apache/hadoop/fs/FileRange.java | 67 .../org/apache/hadoop/fs/PositionedReadable.java | 41 ++- .../org/apache/hadoop/fs/RawLocalFileSystem.java | 110 +- .../org/apache/hadoop/fs/StreamCapabilities.java | 6 + .../org/apache/hadoop/fs/VectoredReadUtils.java| 292 +++ .../apache/hadoop/fs/impl/CombinedFileRange.java | 70 .../org/apache/hadoop/fs/impl/FileRangeImpl.java | 74 .../java/org/apache/hadoop/io/ByteBufferPool.java | 5 + .../apache/hadoop/io/ElasticByteBufferPool.java| 4 +- .../io/WeakReferencedElasticByteBufferPool.java| 155 .../site/markdown/filesystem/fsdatainputstream.md | 39 ++ .../apache/hadoop/fs/TestVectoredReadUtils.java| 371 +++ .../contract/AbstractContractVectoredReadTest.java | 406 + .../hadoop/fs/contract/ContractTestUtils.java | 84 + .../localfs/TestLocalFSContractVectoredRead.java | 86 + .../rawlocal/TestRawLocalContractVectoredRead.java | 35 ++ ...estMoreWeakReferencedElasticByteBufferPool.java | 97 + .../TestWeakReferencedElasticByteBufferPool.java | 232 .../java/org/apache/hadoop/test/MoreAsserts.java | 49 ++- hadoop-common-project/pom.xml | 1 - hadoop-project/pom.xml | 11 + .../java/org/apache/hadoop/fs/s3a/Constants.java | 26 ++ .../org/apache/hadoop/fs/s3a/S3AFileSystem.java| 39 +- .../org/apache/hadoop/fs/s3a/S3AInputStream.java | 391 +++- .../org/apache/hadoop/fs/s3a/S3AReadOpContext.java | 20 +- .../apache/hadoop/fs/s3a/VectoredIOContext.java| 78 .../fs/s3a/impl/GetContentSummaryOperation.java| 3 +- .../site/markdown/tools/hadoop-aws/performance.md | 30 ++ .../contract/s3a/ITestS3AContractVectoredRead.java | 159 .../hadoop/fs/s3a/TestS3AInputStreamRetry.java | 3 +- .../fs/s3a/scale/AbstractSTestS3AHugeFiles.java| 32 ++ .../hadoop-aws/src/test/resources/log4j.properties | 2 +- hadoop-tools/hadoop-benchmark/pom.xml | 94 + .../hadoop-benchmark/src/main/assembly/uber.xml| 33 ++ .../hadoop-benchmark/src/main/findbugs/exclude.xml | 22 ++ .../hadoop/benchmark/VectoredReadBenchmark.java| 245 + .../org/apache/hadoop/benchmark/package-info.java | 22 ++ hadoop-tools/pom.xml | 1 + 42 files changed, 3602 insertions(+), 97 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileRange.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/CombinedFileRange.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileRangeImpl.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WeakReferencedElasticByteBufferPool.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestVectoredReadUtils.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractVectoredRead.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/rawlocal/TestRawLocalContractVectoredRead.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/h
[hadoop] 02/05: HADOOP-18104: S3A: Add configs to configure minSeekForVectorReads and maxReadSizeForVectorReads (#3964)
This is an automated email from the ASF dual-hosted git repository. mthakur pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/hadoop.git commit 9f03f879633c201e04e594c43e6694d2fc64f0bb Author: Mukund Thakur AuthorDate: Sat Apr 30 04:17:33 2022 +0530 HADOOP-18104: S3A: Add configs to configure minSeekForVectorReads and maxReadSizeForVectorReads (#3964) Part of HADOOP-18103. Introducing fs.s3a.vectored.read.min.seek.size and fs.s3a.vectored.read.max.merged.size to configure min seek and max read during a vectored IO operation in S3A connector. These properties actually define how the ranges will be merged. To completely disable merging set fs.s3a.max.readsize.vectored.read to 0. Contributed By: Mukund Thakur --- .../site/markdown/filesystem/fsdatainputstream.md | 1 + .../contract/AbstractContractVectoredReadTest.java | 21 +++--- .../hadoop/fs/impl/TestVectoredReadUtils.java | 24 +++ .../java/org/apache/hadoop/test/MoreAsserts.java | 12 .../java/org/apache/hadoop/fs/s3a/Constants.java | 26 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java| 25 ++- .../org/apache/hadoop/fs/s3a/S3AInputStream.java | 21 ++ .../org/apache/hadoop/fs/s3a/S3AReadOpContext.java | 21 +- .../apache/hadoop/fs/s3a/VectoredIOContext.java| 78 ++ .../site/markdown/tools/hadoop-aws/performance.md | 30 + .../contract/s3a/ITestS3AContractVectoredRead.java | 54 ++- 11 files changed, 297 insertions(+), 16 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md index 0fe1772d266..e4a2830967e 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md @@ -474,6 +474,7 @@ end of first and start of next range is more than this value. Maximum number of bytes which can be read in one go after merging the ranges. Two ranges won't be merged if the combined data to be read is more than this value. +Essentially setting this to 0 will disable the merging of ranges. ## Consistency diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java index eee4b11e739..756c3de85cc 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java @@ -18,15 +18,6 @@ package org.apache.hadoop.fs.contract; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileRange; -import org.apache.hadoop.fs.FileRangeImpl; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.impl.FutureIOSupport; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.EOFException; import java.io.IOException; import java.nio.ByteBuffer; @@ -42,6 +33,15 @@ import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileRange; +import org.apache.hadoop.fs.FileRangeImpl; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.impl.FutureIOSupport; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; @@ -52,7 +52,7 @@ public abstract class AbstractContractVectoredReadTest extends AbstractFSContrac public static final int DATASET_LEN = 64 * 1024; private static final byte[] DATASET = ContractTestUtils.dataset(DATASET_LEN, 'a', 32); - private static final String VECTORED_READ_FILE_NAME = "vectored_file.txt"; + protected static final String VECTORED_READ_FILE_NAME = "vectored_file.txt"; private static final String VECTORED_READ_FILE_1MB_NAME = "vectored_file_1M.txt"; private static final byte[] DATASET_MB = ContractTestUtils.dataset(1024 * 1024, 'a', 256); @@ -172,6 +172,7 @@ public abstract class AbstractContractVectoredReadTest extends AbstractFSContrac } } + @Test public void testSameRanges() throws Exception { FileSystem fs = getFileSystem(); List fileRanges = new ArrayList<>(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java index f789f361905..cfd366701be 100644 --
[hadoop] 03/05: HADOOP-18107 Adding scale test for vectored reads for large file (#4273)
This is an automated email from the ASF dual-hosted git repository. mthakur pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/hadoop.git commit bb5a17b177151fcbe4fcdb63bc892dcf6498323c Author: Mukund Thakur AuthorDate: Thu Jun 2 03:35:54 2022 +0530 HADOOP-18107 Adding scale test for vectored reads for large file (#4273) part of HADOOP-18103. Contributed By: Mukund Thakur --- .../contract/AbstractContractVectoredReadTest.java | 86 +++--- .../hadoop/fs/contract/ContractTestUtils.java | 64 .../org/apache/hadoop/fs/s3a/S3AInputStream.java | 1 + .../org/apache/hadoop/fs/s3a/S3AReadOpContext.java | 1 - .../fs/s3a/impl/GetContentSummaryOperation.java| 3 +- .../fs/s3a/scale/AbstractSTestS3AHugeFiles.java| 33 + 6 files changed, 111 insertions(+), 77 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java index 756c3de85cc..e8c86b5dbbc 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java @@ -43,7 +43,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.impl.FutureIOSupport; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertDatasetEquals; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; +import static org.apache.hadoop.fs.contract.ContractTestUtils.validateVectoredReadResult; @RunWith(Parameterized.class) public abstract class AbstractContractVectoredReadTest extends AbstractFSContractTestBase { @@ -53,8 +55,6 @@ public abstract class AbstractContractVectoredReadTest extends AbstractFSContrac public static final int DATASET_LEN = 64 * 1024; private static final byte[] DATASET = ContractTestUtils.dataset(DATASET_LEN, 'a', 32); protected static final String VECTORED_READ_FILE_NAME = "vectored_file.txt"; - private static final String VECTORED_READ_FILE_1MB_NAME = "vectored_file_1M.txt"; - private static final byte[] DATASET_MB = ContractTestUtils.dataset(1024 * 1024, 'a', 256); private final IntFunction allocate; @@ -77,8 +77,6 @@ public abstract class AbstractContractVectoredReadTest extends AbstractFSContrac Path path = path(VECTORED_READ_FILE_NAME); FileSystem fs = getFileSystem(); createFile(fs, path, true, DATASET); -Path bigFile = path(VECTORED_READ_FILE_1MB_NAME); -createFile(fs, bigFile, true, DATASET_MB); } @Test @@ -99,7 +97,7 @@ public abstract class AbstractContractVectoredReadTest extends AbstractFSContrac CompletableFuture combinedFuture = CompletableFuture.allOf(completableFutures); combinedFuture.get(); - validateVectoredReadResult(fileRanges); + validateVectoredReadResult(fileRanges, DATASET); } } @@ -132,7 +130,7 @@ public abstract class AbstractContractVectoredReadTest extends AbstractFSContrac fileRanges.add(new FileRangeImpl(16 * 1024 + 101, 100)); try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { in.readVectored(fileRanges, allocate); - validateVectoredReadResult(fileRanges); + validateVectoredReadResult(fileRanges, DATASET); } } @@ -149,7 +147,7 @@ public abstract class AbstractContractVectoredReadTest extends AbstractFSContrac fileRanges.add(new FileRangeImpl(8*1024 - 101, 100)); try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { in.readVectored(fileRanges, allocate); - validateVectoredReadResult(fileRanges); + validateVectoredReadResult(fileRanges, DATASET); } } @@ -168,7 +166,7 @@ public abstract class AbstractContractVectoredReadTest extends AbstractFSContrac fileRanges.add(new FileRangeImpl(40*1024, 1024)); try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { in.readVectored(fileRanges, allocate); - validateVectoredReadResult(fileRanges); + validateVectoredReadResult(fileRanges, DATASET); } } @@ -184,24 +182,7 @@ public abstract class AbstractContractVectoredReadTest extends AbstractFSContrac .build(); try (FSDataInputStream in = builder.get()) { in.readVectored(fileRanges, allocate); - validateVectoredReadResult(fileRanges); -} - } - - @Test - public void testVectoredRead1MBFile() throws Exception { -FileSystem fs = getFileSystem(); -List fileRanges = new ArrayList<>(); -fileRanges.add(new FileRangeImpl(1293, 25837)); -CompletableFuture builder = -fs.openFile(path(VECTORED_READ_FILE_1MB_NAME)) -.build(); -
[hadoop] branch trunk updated: HADOOP-18308 - Update to Apache LDAP API 2.0.x (#4477)
This is an automated email from the ASF dual-hosted git repository. stevel pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git The following commit(s) were added to refs/heads/trunk by this push: new 25f8bdcd210 HADOOP-18308 - Update to Apache LDAP API 2.0.x (#4477) 25f8bdcd210 is described below commit 25f8bdcd210aa4f64a5d53d93628be0355bf68f5 Author: Colm O hEigeartaigh AuthorDate: Mon Jun 27 11:15:18 2022 +0100 HADOOP-18308 - Update to Apache LDAP API 2.0.x (#4477) Update the dependencies of the LDAP libraries used for testing: ldap-api.version = 2.0.0 apacheds.version = 2.0.0.AM26 Contributed by Colm O hEigeartaigh. --- .../security/authentication/client/TestKerberosAuthenticator.java | 2 +- hadoop-project/pom.xml| 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/client/TestKerberosAuthenticator.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/client/TestKerberosAuthenticator.java index 0d8f1c04137..bc316ef8cb8 100644 --- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/client/TestKerberosAuthenticator.java +++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/client/TestKerberosAuthenticator.java @@ -27,7 +27,7 @@ import java.nio.charset.CharacterCodingException; import javax.security.sasl.AuthenticationException; import org.apache.commons.codec.binary.Base64; -import org.apache.commons.lang.reflect.FieldUtils; +import org.apache.commons.lang3.reflect.FieldUtils; import org.apache.hadoop.minikdc.KerberosSecurityTestcase; import org.apache.hadoop.security.authentication.KerberosTestUtils; import org.apache.hadoop.security.authentication.server.AuthenticationFilter; diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index e8cb47efe4b..72504c1825e 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -111,8 +111,8 @@ 1.68 -2.0.0-M21 -1.0.0-M33 +2.0.0.AM26 +2.0.0 1.9.4 - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org