Updated Branches:
  refs/heads/trunk e1f2a08fc -> e97803425

warn about sync delays every 5s instead of as-it-happens
patch by Benedict Elliott Smith; reviewed by jbellis for CASSANDRA-3578


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e9780342
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e9780342
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e9780342

Branch: refs/heads/trunk
Commit: e978034259de12619f2e4c32e29b034e9d6a2149
Parents: e1f2a08
Author: belliottsmith <[email protected]>
Authored: Tue Dec 3 19:40:51 2013 +0000
Committer: Jonathan Ellis <[email protected]>
Committed: Thu Dec 5 20:37:19 2013 -0600

----------------------------------------------------------------------
 .../db/commitlog/AbstractCommitLogService.java  | 38 +++++++++++++++++---
 .../db/commitlog/CommitLogReplayer.java         |  2 +-
 2 files changed, 35 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/e9780342/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java 
b/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java
index 2f9b236..caa2c65 100644
--- a/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java
+++ b/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java
@@ -28,6 +28,9 @@ import static 
org.apache.cassandra.db.commitlog.CommitLogSegment.Allocation;
 
 public abstract class AbstractCommitLogService
 {
+    // how often should we log syngs that lag behind our desired period
+    private static final long LAG_REPORT_INTERVAL = 
TimeUnit.MINUTES.toMillis(5);
+
     private final Thread thread;
     private volatile boolean shutdown = false;
 
@@ -59,6 +62,12 @@ public abstract class AbstractCommitLogService
         {
             public void run()
             {
+                long firstLagAt = 0;
+                long totalSyncDuration = 0; // total time spent syncing since 
firstLagAt
+                long syncExceededIntervalBy = 0; // time that syncs exceeded 
pollInterval since firstLagAt
+                int lagCount = 0;
+                int syncCount = 0;
+
                 boolean run = true;
                 while (run)
                 {
@@ -73,14 +82,35 @@ public abstract class AbstractCommitLogService
                         lastSyncedAt = syncStarted;
                         syncComplete.signalAll();
 
+
                         // sleep any time we have left before the next one is 
due
-                        long sleep = syncStarted + pollIntervalMillis - 
System.currentTimeMillis();
+                        long now = System.currentTimeMillis();
+                        long sleep = syncStarted + pollIntervalMillis - now;
                         if (sleep < 0)
                         {
-                            logger.warn(String.format("Commit log sync took 
longer than sync interval (by %.2fs), indicating it is a bottleneck", sleep / 
-1000d));
-                            // don't sleep, as we probably have work to do
-                            continue;
+                            // if we have lagged noticeably, update our lag 
counter
+                            if (firstLagAt == 0)
+                            {
+                                firstLagAt = now;
+                                totalSyncDuration = syncExceededIntervalBy = 
syncCount = lagCount = 0;
+                            }
+                            syncExceededIntervalBy -= sleep;
+                            lagCount++;
                         }
+                        syncCount++;
+                        totalSyncDuration += now - syncStarted;
+
+                        if (firstLagAt > 0 && now - firstLagAt >= 
LAG_REPORT_INTERVAL)
+                        {
+                            logger.warn(String.format("Out of %d commit log 
syncs over the past %ds with average duration of %.2fms, %d have exceeded the 
configured commit interval by an average of %.2fms",
+                                                      syncCount, (now - 
firstLagAt) / 1000, (double) totalSyncDuration / syncCount, lagCount, (double) 
syncExceededIntervalBy / lagCount));
+                            firstLagAt = 0;
+                        }
+
+                        // if we have lagged this round, we probably have work 
to do already so we don't sleep
+                        if (sleep < 0)
+                            continue;
+
                         try
                         {
                             haveWork.tryAcquire(sleep, TimeUnit.MILLISECONDS);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e9780342/src/java/org/apache/cassandra/db/commitlog/CommitLogReplayer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/commitlog/CommitLogReplayer.java 
b/src/java/org/apache/cassandra/db/commitlog/CommitLogReplayer.java
index c42ba9b..dff423a 100644
--- a/src/java/org/apache/cassandra/db/commitlog/CommitLogReplayer.java
+++ b/src/java/org/apache/cassandra/db/commitlog/CommitLogReplayer.java
@@ -260,7 +260,7 @@ public class CommitLogReplayer
                     break;
 
                 if (logger.isDebugEnabled())
-                    logger.debug("Replaying {} between {} and {}", file, 
offset, prevEnd);
+                    logger.debug("Replaying {} between {} and {}", file, 
offset, end);
 
                 reader.seek(offset);
 

Reply via email to