Repository: zookeeper Updated Branches: refs/heads/master 477fa0724 -> fe25fed93
ZOOKEEPER-3071: Add a config parameter to control transaction log size Author: Suyog Mapara <[email protected]> Author: Suyog Mapara <[email protected]> Reviewers: [email protected], [email protected] Closes #567 from suyogmapara/ZOOKEEPER-3071 and squashes the following commits: 0a2899994 [Suyog Mapara] Addressing comments 9d5765181 [Suyog Mapara] Update documentation per comments. 5201329bc [Suyog Mapara] Updating documentation 6d2b72058 [Suyog Mapara] Addressing comments 9386e2b6c [Suyog Mapara] ZOOKEEPER-3071: Updating comment to include more details about the feature 5a28efd72 [Suyog Mapara] ZOOKEEPER-3071: Addressing comments and fixing incorrect merge. 1aa12f9d3 [Suyog Mapara] ZOOKEEPER-3071: Add a config parameter to control transaction log size Project: http://git-wip-us.apache.org/repos/asf/zookeeper/repo Commit: http://git-wip-us.apache.org/repos/asf/zookeeper/commit/fe25fed9 Tree: http://git-wip-us.apache.org/repos/asf/zookeeper/tree/fe25fed9 Diff: http://git-wip-us.apache.org/repos/asf/zookeeper/diff/fe25fed9 Branch: refs/heads/master Commit: fe25fed9390a159b24f4c4fa31e3a7911f2c3b81 Parents: 477fa07 Author: Suyog Mapara <[email protected]> Authored: Mon Nov 12 13:36:32 2018 -0800 Committer: Andor Molnar <[email protected]> Committed: Mon Nov 12 13:36:32 2018 -0800 ---------------------------------------------------------------------- .../main/resources/markdown/zookeeperAdmin.md | 19 +++ .../server/persistence/FileTxnLog.java | 52 ++++++++ .../server/persistence/FileTxnLogTest.java | 128 ++++++++++++++++++- 3 files changed, 197 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/zookeeper/blob/fe25fed9/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md ---------------------------------------------------------------------- diff --git a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md index cceaec3..bb7d792 100644 --- a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md +++ b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md @@ -618,6 +618,25 @@ property, when available, is noted below. reaches a runtime generated random value in the \[snapCount/2+1, snapCount] range.The default snapCount is 100,000. +* *txnLogSizeLimitInKb* : + (Java system property: **zookeeper.txnLogSizeLimitInKb**) + Zookeeper transaction log file can also be controlled more + directly using txnLogSizeLimitInKb. Larger txn logs can lead to + slower follower syncs when sync is done using transaction log. + This is because leader has to scan through the appropriate log + file on disk to find the transaction to start sync from. + This feature is turned off by this default and snapCount is the + only value that limits transaction log size. When enabled + Zookeeper will roll the log when either of the limit is hit. + Please note that actual log size can exceed this value by the size + of the serialized transaction. On the other hand, if this value is + set too close to (or smaller than) **preAllocSize**, + it can cause Zookeeper to roll the log for every tranasaction. While + this is not a correctness issue, this may cause severly degraded + performance. To avoid this and to get most out of this feature, it is + recommended to set the value to N * **preAllocSize** + where N >= 2. + * *maxClientCnxns* : (No Java system property) Limits the number of concurrent connections (at the socket http://git-wip-us.apache.org/repos/asf/zookeeper/blob/fe25fed9/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnLog.java ---------------------------------------------------------------------- diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnLog.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnLog.java index 25ef4a0..d95dac8 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnLog.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnLog.java @@ -106,6 +106,21 @@ public class FileTxnLog implements TxnLog { /** Maximum time we allow for elapsed fsync before WARNing */ private final static long fsyncWarningThresholdMS; + /** + * This parameter limit the size of each txnlog to a given limit (KB). + * It does not affect how often the system will take a snapshot [zookeeper.snapCount] + * We roll the txnlog when either of the two limits are reached. + * Also since we only roll the logs at transaction boundaries, actual file size can exceed + * this limit by the maximum size of a serialized transaction. + * The feature is disabled by default (-1) + */ + private static final String txnLogSizeLimitSetting = "zookeeper.txnLogSizeLimitInKb"; + + /** + * The actual txnlog size limit in bytes. + */ + private static long txnLogSizeLimit = -1; + static { LOG = LoggerFactory.getLogger(FileTxnLog.class); @@ -114,6 +129,15 @@ public class FileTxnLog implements TxnLog { if ((fsyncWarningThreshold = Long.getLong(ZOOKEEPER_FSYNC_WARNING_THRESHOLD_MS_PROPERTY)) == null) fsyncWarningThreshold = Long.getLong(FSYNC_WARNING_THRESHOLD_MS_PROPERTY, 1000); fsyncWarningThresholdMS = fsyncWarningThreshold; + + Long logSize = Long.getLong(txnLogSizeLimitSetting, -1); + if (logSize > 0) { + LOG.info("{} = {}", txnLogSizeLimitSetting, logSize); + + // Convert to bytes + logSize = logSize * 1024; + txnLogSizeLimit = logSize; + } } long lastZxidSeen; @@ -161,6 +185,24 @@ public class FileTxnLog implements TxnLog { } /** + * Set log size limit + */ + public static void setTxnLogSizeLimit(long size) { + txnLogSizeLimit = size; + } + + /** + * Return the current on-disk size of log size. This will be accurate only + * after commit() is called. Otherwise, unflushed txns may not be included. + */ + public synchronized long getCurrentLogSize() { + if (logFileWrite != null) { + return logFileWrite.length(); + } + return 0; + } + + /** * creates a checksum algorithm to be used * @return the checksum used for this txnlog */ @@ -352,6 +394,16 @@ public class FileTxnLog implements TxnLog { while (streamsToFlush.size() > 1) { streamsToFlush.removeFirst().close(); } + + // Roll the log file if we exceed the size limit + if(txnLogSizeLimit > 0) { + long logSize = getCurrentLogSize(); + + if (logSize > txnLogSizeLimit) { + LOG.debug("Log size limit reached: {}", logSize); + rollLog(); + } + } } /** http://git-wip-us.apache.org/repos/asf/zookeeper/blob/fe25fed9/zookeeper-server/src/test/java/org/apache/zookeeper/server/persistence/FileTxnLogTest.java ---------------------------------------------------------------------- diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/persistence/FileTxnLogTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/persistence/FileTxnLogTest.java index 77b7210..e0d34ab 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/persistence/FileTxnLogTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/persistence/FileTxnLogTest.java @@ -17,9 +17,13 @@ */ package org.apache.zookeeper.server.persistence; -import org.apache.zookeeper.ZKTestCase; -import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.*; +import org.apache.zookeeper.data.Stat; +import org.apache.zookeeper.proto.CreateRequest; +import org.apache.zookeeper.server.ServerCnxnFactory; import org.apache.zookeeper.server.ServerStats; +import org.apache.zookeeper.server.ZKDatabase; +import org.apache.zookeeper.server.ZooKeeperServer; import org.apache.zookeeper.test.ClientBase; import org.apache.zookeeper.txn.CreateTxn; import org.apache.zookeeper.txn.TxnHeader; @@ -31,6 +35,8 @@ import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.util.Arrays; +import java.util.HashSet; +import java.util.Random; import static org.hamcrest.core.Is.is; import static org.hamcrest.core.IsEqual.equalTo; @@ -135,4 +141,122 @@ public class FileTxnLogTest extends ZKTestCase { Assert.assertEquals((long) i + 1 , serverStats.getFsyncThresholdExceedCount()); } } + + private static String HOSTPORT = "127.0.0.1:" + PortAssignment.unique(); + private static final int CONNECTION_TIMEOUT = 3000; + + // Overhead is about 150 bytes for txn created in this test + private static final int NODE_SIZE = 1024; + private final long PREALLOCATE = 512; + private final long LOG_SIZE_LIMIT = 1024 * 4; + + /** + * Test that log size get update correctly + */ + @Test + public void testGetCurrentLogSize() throws Exception { + FileTxnLog.setTxnLogSizeLimit(-1); + File tmpDir = ClientBase.createTmpDir(); + FileTxnLog log = new FileTxnLog(tmpDir); + FileTxnLog.setPreallocSize(PREALLOCATE); + CreateRequest record = new CreateRequest(null, new byte[NODE_SIZE], + ZooDefs.Ids.OPEN_ACL_UNSAFE, 0); + int zxid = 1; + for (int i = 0; i < 4; i++) { + log.append(new TxnHeader(0, 0, zxid++, 0, 0), record); + LOG.debug("Current log size: " + log.getCurrentLogSize()); + } + log.commit(); + LOG.info("Current log size: " + log.getCurrentLogSize()); + Assert.assertTrue(log.getCurrentLogSize() > (zxid - 1) * NODE_SIZE); + for (int i = 0; i < 4; i++) { + log.append(new TxnHeader(0, 0, zxid++, 0, 0), record); + LOG.debug("Current log size: " + log.getCurrentLogSize()); + } + log.commit(); + LOG.info("Current log size: " + log.getCurrentLogSize()); + Assert.assertTrue(log.getCurrentLogSize() > (zxid - 1) * NODE_SIZE); + } + + /** + * Test that the server can correctly load the data when there are multiple + * txnlogs per snapshot + */ + @Test + public void testLogSizeLimit() throws Exception { + File tmpDir = ClientBase.createTmpDir(); + ClientBase.setupTestEnv(); + + // Need to override preallocate set by setupTestEnv() + // We don't need to unset these values since each unit test run in + // a separate JVM instance + FileTxnLog.setPreallocSize(PREALLOCATE); + FileTxnLog.setTxnLogSizeLimit(LOG_SIZE_LIMIT); + + ZooKeeperServer zks = new ZooKeeperServer(tmpDir, tmpDir, 3000); + final int PORT = Integer.parseInt(HOSTPORT.split(":")[1]); + ServerCnxnFactory f = ServerCnxnFactory.createFactory(PORT, -1); + f.startup(zks); + Assert.assertTrue("waiting for server being up ", + ClientBase.waitForServerUp(HOSTPORT, CONNECTION_TIMEOUT)); + ZooKeeper zk = new ZooKeeper(HOSTPORT, CONNECTION_TIMEOUT, event -> { }); + + // Generate transactions + HashSet<Long> zxids = new HashSet<Long>(); + byte[] bytes = new byte[NODE_SIZE]; + Random random = new Random(); + random.nextBytes(bytes); + + // We will create enough txn to generate 3 logs + long txnCount = LOG_SIZE_LIMIT / NODE_SIZE / 2 * 5; + + LOG.info("Creating " + txnCount + " txns"); + + try { + for (long i = 0; i < txnCount; i++) { + Stat stat = new Stat(); + zk.create("/node-" + i, bytes, ZooDefs.Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + zk.getData("/node-" + i, null, stat); + zxids.add(stat.getCzxid()); + } + + } finally { + zk.close(); + } + + // shutdown + f.shutdown(); + Assert.assertTrue("waiting for server to shutdown", + ClientBase.waitForServerDown(HOSTPORT, CONNECTION_TIMEOUT)); + + File logDir = new File(tmpDir, FileTxnSnapLog.version + FileTxnSnapLog.VERSION); + File[] txnLogs = FileTxnLog.getLogFiles(logDir.listFiles(), 0); + + Assert.assertEquals("Unexpected number of logs", 3, txnLogs.length); + + // Log size should not exceed limit by more than one node size; + long threshold = LOG_SIZE_LIMIT + NODE_SIZE; + LOG.info(txnLogs[0].getAbsolutePath()); + Assert.assertTrue( + "Exceed log size limit: " + txnLogs[0].length(), + threshold > txnLogs[0].length()); + LOG.info(txnLogs[1].getAbsolutePath()); + Assert.assertTrue( + "Exceed log size limit " + txnLogs[1].length(), + threshold > txnLogs[1].length()); + + // Start database only + zks = new ZooKeeperServer(tmpDir, tmpDir, 3000); + zks.startdata(); + + ZKDatabase db = zks.getZKDatabase(); + + for (long i = 0; i < txnCount; i++) { + Stat stat = new Stat(); + byte[] data = db.getData("/node-" + i, stat, null); + Assert.assertArrayEquals("Missmatch data", bytes, data); + Assert.assertTrue("Unknown zxid ", zxids.contains(stat.getMzxid())); + } + } }
