I would like to submit a fix for a bug in log file switching.
problem : In multi-threaded application when lot of threads are executing commits in parallel, empty log files might be created. Recovery log scan does not expect empty log files
while scanning log records to undo incomplete transactions..
Fix:
a) prevent empty log switches by rechecking the conditions that triggers the log switches
inside synchronized blocks.
b) Make backward scans skip the empty log files.
Please review the changes in the attached diff file.
I'm employed by IBM and have been working on the Cloudscape product for few years. This is my first
submission to derby.
-suresh
Index: java/engine/org/apache/derby/impl/store/raw/log/Scan.java
===================================================================
--- java/engine/org/apache/derby/impl/store/raw/log/Scan.java (revision 37344)
+++ java/engine/org/apache/derby/impl/store/raw/log/Scan.java (working copy)
@@ -328,6 +328,14 @@
// end of the file header, in other words,
there is at least
// one log record in this log file.
curpos = scan.getFilePointer();
+
+ // if the log file happens to be empty skip and
proceed.
+ // ideally this case should never occur because
log switch is
+ // not suppose to happen on an empty log file.
+ // But it is safer to put following check
incase if it ever
+ // happens to avoid any recovery issues.
+ if (curpos == LogToFile.LOG_FILE_HEADER_SIZE)
+ continue;
}
scan.seek(curpos - 4);
Index: java/engine/org/apache/derby/impl/store/raw/log/LogToFile.java
===================================================================
--- java/engine/org/apache/derby/impl/store/raw/log/LogToFile.java
(revision 37344)
+++ java/engine/org/apache/derby/impl/store/raw/log/LogToFile.java
(working copy)
@@ -1819,24 +1819,12 @@
/////////////////////////////////////////////////////
synchronized (this)
{
- // we have an empty log file here, refuse to switch.
- if (endPosition == LOG_FILE_HEADER_SIZE)
- {
- if (SanityManager.DEBUG)
- {
- Monitor.logMessage("not switching from
an empty log file (" +
- logFileNumber + ")");
- }
- return;
- }
-
// Make sure that this thread of control is guaranteed
to complete
// it's work of switching the log file without having to give up
// the semaphore to a backup or another flusher. Do this by
looping
// until we have the semaphore, the log is not being flushed, and
// the log is not frozen for backup. Track (2985).
- boolean waited = false;
while(logBeingFlushed | isFrozen)
{
try
@@ -1849,6 +1837,17 @@
}
}
+ // we have an empty log file here, refuse to switch.
+ if (endPosition == LOG_FILE_HEADER_SIZE)
+ {
+ if (SanityManager.DEBUG)
+ {
+ Monitor.logMessage("not switching from
an empty log file (" +
+ logFileNumber + ")");
+ }
+ return;
+ }
+
// log file isn't being flushed right now and logOut is
not being
// used.
StorageFile newLogFile =
getLogFileName(logFileNumber+1);
@@ -3721,11 +3720,14 @@
if ((logWrittenFromLastCheckPoint + potentialLastFlush) >
checkpointInterval &&
checkpointDaemon != null &&
!checkpointDaemonCalled && !inLogSwitch)
{
- //following synchronized block is required to make
- //sure only one checkpoint request get scheduled.
+ // following synchronized block is required to make
+ // sure only one checkpoint request get scheduled.
synchronized(this)
{
- if(!checkpointDaemonCalled)
+ // recheck if checkpoint is still required, it
is possible some other
+ // thread might have already scheduled a
checkpoint and completed it.
+ if ((logWrittenFromLastCheckPoint +
potentialLastFlush) > checkpointInterval &&
+ checkpointDaemon != null &&
!checkpointDaemonCalled && !inLogSwitch)
{
checkpointDaemonCalled = true;
checkpointDaemon.serviceNow(myClientNumber);
@@ -3741,11 +3743,14 @@
if (potentialLastFlush > logSwitchInterval &&
!checkpointDaemonCalled && !inLogSwitch)
{
- //following synchronized block is required
- //to make sure only one thread switches the log
file at a time.
+ // following synchronized block is required to
make sure only
+ // one thread switches the log file at a time.
synchronized(this)
{
- if(!inLogSwitch)
+ // recheck if log switch is still
required, it is possible some other
+ // thread might have already switched
the log file.
+ if (potentialLastFlush >
logSwitchInterval &&
+ !checkpointDaemonCalled &&
!inLogSwitch)
{
inLogSwitch = true;
switchLogFile();
