This is an automated email from the ASF dual-hosted git repository. xkrogen pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push: new 2067fcb6463 HDFS-16550. Allow JN edit cache size to be set as a fraction of heap memory (#4209) 2067fcb6463 is described below commit 2067fcb64638df2dde8100f44cce431baa23ceb8 Author: litao <tomlees...@gmail.com> AuthorDate: Wed Nov 30 23:44:21 2022 +0800 HDFS-16550. Allow JN edit cache size to be set as a fraction of heap memory (#4209) --- .../java/org/apache/hadoop/hdfs/DFSConfigKeys.java | 5 +++- .../hdfs/qjournal/server/JournaledEditsCache.java | 28 ++++++++++++++++------ .../src/main/resources/hdfs-default.xml | 18 +++++++++++++- .../site/markdown/HDFSHighAvailabilityWithQJM.md | 10 ++++++++ .../src/site/markdown/ObserverNameNode.md | 18 ++++++++++++++ .../qjournal/server/TestJournaledEditsCache.java | 21 ++++++++++++++++ 6 files changed, 91 insertions(+), 9 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index f766c48d7c5..0e10bc61c99 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -1424,7 +1424,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final long DFS_JOURNALNODE_SYNC_INTERVAL_DEFAULT = 2*60*1000L; public static final String DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY = "dfs.journalnode.edit-cache-size.bytes"; - public static final int DFS_JOURNALNODE_EDIT_CACHE_SIZE_DEFAULT = 1024 * 1024; + + public static final String DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY = + "dfs.journalnode.edit-cache-size.fraction"; + public static final float DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_DEFAULT = 0.5f; // Journal-node related configs for the client side. public static final String DFS_QJOURNAL_QUEUE_SIZE_LIMIT_KEY = "dfs.qjournal.queued-edits.limit.mb"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java index 65f54609ef3..339b7fa7b68 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream; import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp; import org.apache.hadoop.util.AutoCloseableLock; +import org.apache.hadoop.util.Preconditions; /** * An in-memory cache of edits in their serialized form. This is used to serve @@ -121,12 +122,18 @@ class JournaledEditsCache { // ** End lock-protected fields ** JournaledEditsCache(Configuration conf) { + float fraction = conf.getFloat(DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY, + DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_DEFAULT); + Preconditions.checkArgument((fraction > 0 && fraction < 1.0f), + String.format("Cache config %s is set at %f, it should be a positive float value, " + + "less than 1.0. The recommended value is less than 0.9.", + DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY, fraction)); capacity = conf.getInt(DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY, - DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_DEFAULT); + (int) (Runtime.getRuntime().maxMemory() * fraction)); if (capacity > 0.9 * Runtime.getRuntime().maxMemory()) { Journal.LOG.warn(String.format("Cache capacity is set at %d bytes but " + "maximum JVM memory is only %d bytes. It is recommended that you " + - "decrease the cache size or increase the heap size.", + "decrease the cache size/fraction or increase the heap size.", capacity, Runtime.getRuntime().maxMemory())); } Journal.LOG.info("Enabling the journaled edits cache with a capacity " + @@ -277,11 +284,12 @@ class JournaledEditsCache { initialize(INVALID_TXN_ID); Journal.LOG.warn(String.format("A single batch of edits was too " + "large to fit into the cache: startTxn = %d, endTxn = %d, " + - "input length = %d. The capacity of the cache (%s) must be " + + "input length = %d. The cache size (%s) or cache fraction (%s) must be " + "increased for it to work properly (current capacity %d)." + "Cache is now empty.", newStartTxn, newEndTxn, inputData.length, - DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY, capacity)); + DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY, + DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY, capacity)); return; } if (dataMap.isEmpty()) { @@ -388,10 +396,11 @@ class JournaledEditsCache { } else { return new CacheMissException(lowestTxnId - requestedTxnId, "Oldest txn ID available in the cache is %d, but requested txns " + - "starting at %d. The cache size (%s) may need to be increased " + - "to hold more transactions (currently %d bytes containing %d " + + "starting at %d. The cache size (%s) or cache fraction (%s) may need to be " + + "increased to hold more transactions (currently %d bytes containing %d " + "transactions)", lowestTxnId, requestedTxnId, - DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY, capacity, + DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY, + DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY, capacity, highestTxnId - lowestTxnId + 1); } } @@ -414,4 +423,9 @@ class JournaledEditsCache { } + @VisibleForTesting + int getCapacity() { + return capacity; + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 2a0a4945faa..bf7f99aa1fa 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -4945,7 +4945,7 @@ <property> <name>dfs.journalnode.edit-cache-size.bytes</name> - <value>1048576</value> + <value></value> <description> The size, in bytes, of the in-memory cache of edits to keep on the JournalNode. This cache is used to serve edits for tailing via the RPC-based @@ -4955,6 +4955,22 @@ </description> </property> +<property> + <name>dfs.journalnode.edit-cache-size.fraction</name> + <value>0.5f</value> + <description> + This ratio refers to the proportion of the maximum memory of the JVM. + Used to calculate the size of the edits cache that is kept in the JournalNode's memory. + This config is an alternative to the dfs.journalnode.edit-cache-size.bytes. + And it is used to serve edits for tailing via the RPC-based mechanism, and is only + enabled when dfs.ha.tail-edits.in-progress is true. Transactions range in size but + are around 200 bytes on average, so the default of 1MB can store around 5000 transactions. + So we can configure a reasonable value based on the maximum memory. The recommended value + is less than 0.9. If we set dfs.journalnode.edit-cache-size.bytes, this parameter will + not take effect. + </description> +</property> + <property> <name>dfs.journalnode.kerberos.internal.spnego.principal</name> <value></value> diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md index 5591f4f2245..b6b408db8b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md @@ -502,6 +502,16 @@ lag time will be much longer. The relevant configurations are: the oldest data in the cache was at transaction ID 20, a value of 10 would be added to the average. +* **dfs.journalnode.edit-cache-size.fraction** - This fraction refers to the proportion of + the maximum memory of the JVM. Used to calculate the size of the edits cache that is + kept in the JournalNode's memory. This config is an alternative to the + dfs.journalnode.edit-cache-size.bytes. And it is used to serve edits for tailing via + the RPC-based mechanism, and is only enabled when dfs.ha.tail-edits.in-progress is true. + Transactions range in size but are around 200 bytes on average, so the default of 1MB + can store around 5000 transactions. So we can configure a reasonable value based on + the maximum memory. The recommended value is less than 0.9. If we set + dfs.journalnode.edit-cache-size.bytes, this parameter will not take effect. + This feature is primarily useful in conjunction with the Standby/Observer Read feature. Using this feature, read requests can be serviced from non-active NameNodes; thus tailing in-progress edits provides these nodes with the ability to serve requests with data which is much more fresh. See the diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ObserverNameNode.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ObserverNameNode.md index 00aeb5bd2e0..74026ec8625 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ObserverNameNode.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ObserverNameNode.md @@ -194,6 +194,24 @@ few configurations to your **hdfs-site.xml**: <value>1048576</value> </property> +* **dfs.journalnode.edit-cache-size.fraction** - the fraction refers to + the proportion of the maximum memory of the JVM. + + Used to calculate the size of the edits cache that + is kept in the JournalNode's memory. + This config is an alternative to the dfs.journalnode.edit-cache-size.bytes. + And it is used to serve edits for tailing via the RPC-based mechanism, and is only + enabled when dfs.ha.tail-edits.in-progress is true. Transactions range in size but + are around 200 bytes on average, so the default of 1MB can store around 5000 transactions. + So we can configure a reasonable value based on the maximum memory. The recommended value + is less than 0.9. If we set dfs.journalnode.edit-cache-size.bytes, this parameter will + not take effect. + + <property> + <name>dfs.journalnode.edit-cache-size.fraction</name> + <value>0.5f</value> + </property> + * **dfs.namenode.accesstime.precision** -- whether to enable access time for HDFS file. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournaledEditsCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournaledEditsCache.java index 2a178a1547e..82b8b587694 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournaledEditsCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournaledEditsCache.java @@ -221,6 +221,27 @@ public class TestJournaledEditsCache { cache.retrieveEdits(-1, 10, new ArrayList<>()); } + @Test + public void testCacheSizeConfigs() { + // Assert the default configs. + Configuration config = new Configuration(); + cache = new JournaledEditsCache(config); + assertEquals((int) (Runtime.getRuntime().maxMemory() * 0.5f), cache.getCapacity()); + + // Set dfs.journalnode.edit-cache-size.bytes. + Configuration config1 = new Configuration(); + config1.setInt(DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY, 1); + config1.setFloat(DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY, 0.1f); + cache = new JournaledEditsCache(config1); + assertEquals(1, cache.getCapacity()); + + // Don't set dfs.journalnode.edit-cache-size.bytes. + Configuration config2 = new Configuration(); + config2.setFloat(DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY, 0.1f); + cache = new JournaledEditsCache(config2); + assertEquals((int) (Runtime.getRuntime().maxMemory() * 0.1f), cache.getCapacity()); + } + private void storeEdits(int startTxn, int endTxn) throws Exception { cache.storeEdits(createTxnData(startTxn, endTxn - startTxn + 1), startTxn, endTxn, NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org