jiazhai closed pull request #754: Issue #753: Allow option to disable data sync 
on journal
URL: https://github.com/apache/bookkeeper/pull/754
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/bookkeeper-server/conf/bk_server.conf 
b/bookkeeper-server/conf/bk_server.conf
index b4f37e441..7bdfa0d78 100755
--- a/bookkeeper-server/conf/bk_server.conf
+++ b/bookkeeper-server/conf/bk_server.conf
@@ -296,6 +296,14 @@ journalDirectory=/tmp/bk-txn
 # Should we remove pages from page cache after force write
 # journalRemoveFromPageCache=false
 
+# Should the data be fsynced on journal before acknowledgment.
+# By default, data sync is enabled to guarantee durability of writes.
+# Beware: while disabling data sync in the Bookie journal might improve the 
bookie write performance, it will also
+# introduce the possibility of data loss. With no sync, the journal entries 
are written in the OS page cache but
+# not flushed to disk. In case of power failure, the affected bookie might 
lose the unflushed data. If the ledger
+# is replicated to multiple bookies, the chances of data loss are reduced 
though still present.
+# journalSyncData=true
+
 # Should we group journal force writes, which optimize group commit
 # for higher throughput
 # journalAdaptiveGroupWrites=true
diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Journal.java 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Journal.java
index 1f8aef400..1a3635c2a 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Journal.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Journal.java
@@ -499,6 +499,9 @@ static void writePaddingBytes(JournalChannel jc, ByteBuffer 
paddingBuffer, int j
     // should we hint the filesystem to remove pages from cache after force 
write
     private final boolean removePagesFromCache;
 
+    // Should data be fsynced on disk before triggering the callback
+    private final boolean syncData;
+
     private final LastLogMark lastLogMark = new LastLogMark(0, 0);
 
     /**
@@ -543,6 +546,7 @@ public Journal(File journalDirectory, ServerConfiguration 
conf, LedgerDirsManage
         this.maxJournalSize = conf.getMaxJournalSizeMB() * MB;
         this.journalPreAllocSize = conf.getJournalPreAllocSizeMB() * MB;
         this.journalWriteBufferSize = conf.getJournalWriteBufferSizeKB() * KB;
+        this.syncData = conf.getJournalSyncData();
         this.maxBackupJournals = conf.getMaxBackupJournals();
         this.forceWriteThread = new ForceWriteThread(this, 
conf.getJournalAdaptiveGroupWrites());
         this.maxGroupWaitInNanos = 
TimeUnit.MILLISECONDS.toNanos(conf.getJournalMaxGroupWaitMSec());
@@ -914,9 +918,25 @@ public void run() {
                             
forceWriteBatchEntriesStats.registerSuccessfulValue(toFlush.size());
                             
forceWriteBatchBytesStats.registerSuccessfulValue(batchSize);
 
-                            forceWriteRequests.put(new 
ForceWriteRequest(logFile, logId, lastFlushPosition, toFlush,
-                                    (lastFlushPosition > maxJournalSize), 
false));
-                            toFlush = new LinkedList<QueueEntry>();
+                            boolean shouldRolloverJournal = (lastFlushPosition 
> maxJournalSize);
+                            if (syncData) {
+                                // Trigger data sync to disk in the 
"Force-Write" thread. Callback will be triggered after data is committed to disk
+                                forceWriteRequests.put(new 
ForceWriteRequest(logFile, logId, lastFlushPosition, toFlush, 
shouldRolloverJournal, false));
+                                toFlush = new LinkedList<QueueEntry>();
+                            } else {
+                                // Data is already written on the file (though 
it might still be in the OS page-cache)
+                                lastLogMark.setCurLogMark(logId, 
lastFlushPosition);
+                                for (int i = 0; i < toFlush.size(); i++) {
+                                    cbThreadPool.execute((QueueEntry) 
toFlush.get(i));
+                                }
+
+                                toFlush.clear();
+                                if (shouldRolloverJournal) {
+                                    forceWriteRequests.put(new 
ForceWriteRequest(logFile, logId, lastFlushPosition,
+                                            new LinkedList<>(), 
shouldRolloverJournal, false));
+                                }
+                            }
+
                             batchSize = 0L;
                             // check whether journal file is over file limit
                             if (bc.position() > maxJournalSize) {
diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
index ad7002b6c..b49969d2a 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
@@ -71,6 +71,7 @@
     // Journal Parameters
     protected final static String MAX_JOURNAL_SIZE = "journalMaxSizeMB";
     protected final static String MAX_BACKUP_JOURNALS = "journalMaxBackups";
+    protected final static String JOURNAL_SYNC_DATA = "journalSyncData";
     protected final static String JOURNAL_ADAPTIVE_GROUP_WRITES = 
"journalAdaptiveGroupWrites";
     protected final static String JOURNAL_MAX_GROUP_WAIT_MSEC = 
"journalMaxGroupWaitMSec";
     protected final static String JOURNAL_BUFFERED_WRITES_THRESHOLD = 
"journalBufferedWritesThreshold";
@@ -1536,6 +1537,36 @@ public int getSkipListArenaMaxAllocSize() {
         return getInt(SKIP_LIST_MAX_ALLOC_ENTRY, 128 * 1024);
     }
 
+    /**
+     * Should the data be fsynced on journal before acknowledgment
+     *
+     * Default is true
+     *
+     * @return
+     */
+    public boolean getJournalSyncData() {
+        return getBoolean(JOURNAL_SYNC_DATA, true);
+    }
+
+    /**
+     * Enable or disable journal syncs.
+     * <p>
+     * By default, data sync is enabled to guarantee durability of writes.
+     * <p>
+     * Beware: while disabling data sync in the Bookie journal might improve 
the bookie write performance, it will also
+     * introduce the possibility of data loss. With no sync, the journal 
entries are written in the OS page cache but
+     * not flushed to disk. In case of power failure, the affected bookie 
might lose the unflushed data. If the ledger
+     * is replicated to multiple bookies, the chances of data loss are reduced 
though still present.
+     *
+     * @param syncData
+     *            whether to sync data on disk before acknowledgement
+     * @return server configuration object
+     */
+    public ServerConfiguration setJournalSyncData(boolean syncData) {
+        setProperty(JOURNAL_SYNC_DATA, syncData);
+        return this;
+    }
+
     /**
      * Should we group journal force writes
      *
diff --git 
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalNoSyncTest.java
 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalNoSyncTest.java
new file mode 100644
index 000000000..baa2d7dc5
--- /dev/null
+++ 
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalNoSyncTest.java
@@ -0,0 +1,63 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+package org.apache.bookkeeper.bookie;
+
+import java.util.Enumeration;
+
+import org.apache.bookkeeper.client.BookKeeper.DigestType;
+import org.apache.bookkeeper.client.LedgerEntry;
+import org.apache.bookkeeper.client.LedgerHandle;
+import org.apache.bookkeeper.test.BookKeeperClusterTestCase;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class BookieJournalNoSyncTest extends BookKeeperClusterTestCase {
+
+    public BookieJournalNoSyncTest() {
+        super(1);
+
+        baseConf.setJournalSyncData(false);
+    }
+
+    @Test
+    public void testWriteToJournal() throws Exception {
+        LedgerHandle lh = bkc.createLedger(1, 1, DigestType.CRC32, new 
byte[0]);
+
+        int N = 10;
+
+        long ledgerId = lh.getId();
+
+        for (int i = 0; i < N; i++) {
+            lh.addEntry(("entry-" + i).getBytes());
+        }
+
+        restartBookies();
+
+        LedgerHandle readLh = bkc.openLedger(ledgerId, DigestType.CRC32, new 
byte[0]);
+
+        Enumeration<LedgerEntry> entries = readLh.readEntries(0, N - 1);
+        for (int i = 0; i < N; i++) {
+            LedgerEntry entry = entries.nextElement();
+            Assert.assertEquals("entry-" + i, new String(entry.getEntry()));
+        }
+    }
+
+}


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to