Hi!
I noticed that commit 3eb77eba5a changed the logic in
ProcessSyncRequests() (formerly mdsync()) so that if you have fsync=off,
the entries are not removed from the pendingOps hash table. I don't
think that was intended.
I propose the attached patch to move the test for enableFsync so that
the entries are removed from pendingOps again. It looks larger than it
really is because it re-indents the block of code that is now inside the
"if (enableFsync)" condition.
- Heikki
diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c
index 8282a476b4..022366c172 100644
--- a/src/backend/storage/sync/sync.c
+++ b/src/backend/storage/sync/sync.c
@@ -315,14 +315,6 @@ ProcessSyncRequests(void)
{
int failures;
- /*
- * If fsync is off then we don't have to bother opening the file at
- * all. (We delay checking until this point so that changing fsync on
- * the fly behaves sensibly.)
- */
- if (!enableFsync)
- continue;
-
/*
* If the entry is new then don't process it this time; it is new.
* Note "continue" bypasses the hash-remove call at the bottom of the
@@ -335,78 +327,88 @@ ProcessSyncRequests(void)
Assert((CycleCtr) (entry->cycle_ctr + 1) == sync_cycle_ctr);
/*
- * If in checkpointer, we want to absorb pending requests every so
- * often to prevent overflow of the fsync request queue. It is
- * unspecified whether newly-added entries will be visited by
- * hash_seq_search, but we don't care since we don't need to process
- * them anyway.
- */
- if (--absorb_counter <= 0)
- {
- AbsorbSyncRequests();
- absorb_counter = FSYNCS_PER_ABSORB;
- }
-
- /*
- * The fsync table could contain requests to fsync segments that have
- * been deleted (unlinked) by the time we get to them. Rather than
- * just hoping an ENOENT (or EACCES on Windows) error can be ignored,
- * what we do on error is absorb pending requests and then retry.
- * Since mdunlink() queues a "cancel" message before actually
- * unlinking, the fsync request is guaranteed to be marked canceled
- * after the absorb if it really was this case. DROP DATABASE likewise
- * has to tell us to forget fsync requests before it starts deletions.
+ * If fsync is off then we don't have to bother opening the file at
+ * all. (We delay checking until this point so that changing fsync on
+ * the fly behaves sensibly.)
*/
- for (failures = 0; !entry->canceled; failures++)
+ if (enableFsync)
{
- char path[MAXPGPATH];
-
- INSTR_TIME_SET_CURRENT(sync_start);
- if (syncsw[entry->tag.handler].sync_syncfiletag(&entry->tag,
- path) == 0)
- {
- /* Success; update statistics about sync timing */
- INSTR_TIME_SET_CURRENT(sync_end);
- sync_diff = sync_end;
- INSTR_TIME_SUBTRACT(sync_diff, sync_start);
- elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
- if (elapsed > longest)
- longest = elapsed;
- total_elapsed += elapsed;
- processed++;
-
- if (log_checkpoints)
- elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f msec",
- processed,
- path,
- (double) elapsed / 1000);
-
- break; /* out of retry loop */
- }
-
/*
- * It is possible that the relation has been dropped or truncated
- * since the fsync request was entered. Therefore, allow ENOENT,
- * but only if we didn't fail already on this file.
+ * If in checkpointer, we want to absorb pending requests every so
+ * often to prevent overflow of the fsync request queue. It is
+ * unspecified whether newly-added entries will be visited by
+ * hash_seq_search, but we don't care since we don't need to
+ * process them anyway.
*/
- if (!FILE_POSSIBLY_DELETED(errno) || failures > 0)
- ereport(data_sync_elevel(ERROR),
- (errcode_for_file_access(),
- errmsg("could not fsync file \"%s\": %m",
- path)));
- else
- ereport(DEBUG1,
- (errcode_for_file_access(),
- errmsg("could not fsync file \"%s\" but retrying: %m",
- path)));
+ if (--absorb_counter <= 0)
+ {
+ AbsorbSyncRequests();
+ absorb_counter = FSYNCS_PER_ABSORB;
+ }
/*
- * Absorb incoming requests and check to see if a cancel arrived
- * for this relation fork.
+ * The fsync table could contain requests to fsync segments that
+ * have been deleted (unlinked) by the time we get to them. Rather
+ * than just hoping an ENOENT (or EACCES on Windows) error can be
+ * ignored, what we do on error is absorb pending requests and
+ * then retry. Since mdunlink() queues a "cancel" message before
+ * actually unlinking, the fsync request is guaranteed to be
+ * marked canceled after the absorb if it really was this case.
+ * DROP DATABASE likewise has to tell us to forget fsync requests
+ * before it starts deletions.
*/
- AbsorbSyncRequests();
- absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
- } /* end retry loop */
+ for (failures = 0; !entry->canceled; failures++)
+ {
+ char path[MAXPGPATH];
+
+ INSTR_TIME_SET_CURRENT(sync_start);
+ if (syncsw[entry->tag.handler].sync_syncfiletag(&entry->tag,
+ path) == 0)
+ {
+ /* Success; update statistics about sync timing */
+ INSTR_TIME_SET_CURRENT(sync_end);
+ sync_diff = sync_end;
+ INSTR_TIME_SUBTRACT(sync_diff, sync_start);
+ elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
+ if (elapsed > longest)
+ longest = elapsed;
+ total_elapsed += elapsed;
+ processed++;
+
+ if (log_checkpoints)
+ elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f msec",
+ processed,
+ path,
+ (double) elapsed / 1000);
+
+ break; /* out of retry loop */
+ }
+
+ /*
+ * It is possible that the relation has been dropped or
+ * truncated since the fsync request was entered. Therefore,
+ * allow ENOENT, but only if we didn't fail already on this
+ * file.
+ */
+ if (!FILE_POSSIBLY_DELETED(errno) || failures > 0)
+ ereport(data_sync_elevel(ERROR),
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m",
+ path)));
+ else
+ ereport(DEBUG1,
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\" but retrying: %m",
+ path)));
+
+ /*
+ * Absorb incoming requests and check to see if a cancel
+ * arrived for this relation fork.
+ */
+ AbsorbSyncRequests();
+ absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
+ } /* end retry loop */
+ }
/* We are done with this entry, remove it */
if (hash_search(pendingOps, &entry->tag, HASH_REMOVE, NULL) == NULL)