[
https://issues.apache.org/jira/browse/HDFS-16531?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17525232#comment-17525232
]
Ayush Saxena commented on HDFS-16531:
-------------------------------------
Just exploring. Guess this optimisation should work only if the replication
across the current & all the snapshots is same. Can try something like this if
this suits you [~sodonnell]
{noformat}
diff --git
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java
index a2c9f6bd76b..5073edb2361 100644
---
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java
+++
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java
@@ -402,7 +402,7 @@ static SetRepStatus unprotectedSetReplication(
INodeFile file = inode.asFile();
// Make sure the directory has sufficient quotas
short oldBR = file.getPreferredBlockReplication();
- if (oldBR == replication) {
+ if (oldBR == replication && file.isReplicationInSync()) {
// No need to do anything as the requested rep factor is the same as
// existing. Returning UNCHANGED to we can skip writing edits, but still
// log a successful audit message.
diff --git
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
index aa2b95d2ea6..59c7218f868 100644
---
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
+++
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
@@ -557,6 +557,16 @@ public short getPreferredBlockReplication() {
return (short) (ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits());
}
+ public boolean isReplicationInSync() {
+ short current = getFileReplication(CURRENT_STATE_ID);
+ FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
+ if (sf != null) {
+ return sf
+ .isReplicationInSyncWithCurrent(sf.isCurrentFileDeleted() ? -1 :
current);
+ }
+ return true;
+ }
+
/** Set the replication factor of this file. */
private void setFileReplication(short replication) {
long layoutRedundancy =
diff --git
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java
index 5263ef357bf..c948115a1a1 100644
---
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java
+++
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java
@@ -81,6 +81,21 @@ public short getMaxBlockRepInDiffs(FileDiff excluded) {
return max;
}
+ public boolean isReplicationInSyncWithCurrent(short currentRepl) {
+ for (FileDiff d : getDiffs()) {
+ if (d.snapshotINode != null) {
+ short replication = d.snapshotINode.getFileReplication();
+ if (currentRepl == -1) {
+ currentRepl = replication;
+ } else if (currentRepl != replication) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
boolean changedBetweenSnapshots(INodeFile file, Snapshot from, Snapshot to) {
int[] diffIndexPair = diffs.changedBetweenSnapshots(from, to);
if (diffIndexPair == null) {
{noformat}
> Avoid setReplication logging an edit record if old replication equals the new
> value
> -----------------------------------------------------------------------------------
>
> Key: HDFS-16531
> URL: https://issues.apache.org/jira/browse/HDFS-16531
> Project: Hadoop HDFS
> Issue Type: Improvement
> Components: namenode
> Reporter: Stephen O'Donnell
> Assignee: Stephen O'Donnell
> Priority: Major
> Labels: pull-request-available
> Fix For: 3.4.0, 3.2.4, 3.3.4
>
> Time Spent: 1h 20m
> Remaining Estimate: 0h
>
> I recently came across a NN log where about 800k setRep calls were made,
> setting the replication from 3 to 3 - ie leaving it unchanged.
> Even in a case like this, we log an edit record, an audit log, and perform
> some quota checks etc.
> I believe it should be possible to avoid some of the work if we check for
> oldRep == newRep and jump out of the method early.
--
This message was sent by Atlassian Jira
(v8.20.7#820007)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]