pendingOps table is not cleared with fsync=off

Heikki Linnakangas Fri, 08 May 2020 14:22:08 -0700

Hi!

I noticed that commit 3eb77eba5a changed the logic inProcessSyncRequests() (formerly mdsync()) so that if you have fsync=off,the entries are not removed from the pendingOps hash table. I don'tthink that was intended.

I propose the attached patch to move the test for enableFsync so thatthe entries are removed from pendingOps again. It looks larger than itreally is because it re-indents the block of code that is now inside the"if (enableFsync)" condition.


- Heikki

diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c
index 8282a476b4..022366c172 100644
--- a/src/backend/storage/sync/sync.c
+++ b/src/backend/storage/sync/sync.c
@@ -315,14 +315,6 @@ ProcessSyncRequests(void)
 	{
 		int			failures;
 
-		/*
-		 * If fsync is off then we don't have to bother opening the file at
-		 * all.  (We delay checking until this point so that changing fsync on
-		 * the fly behaves sensibly.)
-		 */
-		if (!enableFsync)
-			continue;
-
 		/*
 		 * If the entry is new then don't process it this time; it is new.
 		 * Note "continue" bypasses the hash-remove call at the bottom of the
@@ -335,78 +327,88 @@ ProcessSyncRequests(void)
 		Assert((CycleCtr) (entry->cycle_ctr + 1) == sync_cycle_ctr);
 
 		/*
-		 * If in checkpointer, we want to absorb pending requests every so
-		 * often to prevent overflow of the fsync request queue.  It is
-		 * unspecified whether newly-added entries will be visited by
-		 * hash_seq_search, but we don't care since we don't need to process
-		 * them anyway.
-		 */
-		if (--absorb_counter <= 0)
-		{
-			AbsorbSyncRequests();
-			absorb_counter = FSYNCS_PER_ABSORB;
-		}
-
-		/*
-		 * The fsync table could contain requests to fsync segments that have
-		 * been deleted (unlinked) by the time we get to them. Rather than
-		 * just hoping an ENOENT (or EACCES on Windows) error can be ignored,
-		 * what we do on error is absorb pending requests and then retry.
-		 * Since mdunlink() queues a "cancel" message before actually
-		 * unlinking, the fsync request is guaranteed to be marked canceled
-		 * after the absorb if it really was this case. DROP DATABASE likewise
-		 * has to tell us to forget fsync requests before it starts deletions.
+		 * If fsync is off then we don't have to bother opening the file at
+		 * all.  (We delay checking until this point so that changing fsync on
+		 * the fly behaves sensibly.)
 		 */
-		for (failures = 0; !entry->canceled; failures++)
+		if (enableFsync)
 		{
-			char		path[MAXPGPATH];
-
-			INSTR_TIME_SET_CURRENT(sync_start);
-			if (syncsw[entry->tag.handler].sync_syncfiletag(&entry->tag,
-															path) == 0)
-			{
-				/* Success; update statistics about sync timing */
-				INSTR_TIME_SET_CURRENT(sync_end);
-				sync_diff = sync_end;
-				INSTR_TIME_SUBTRACT(sync_diff, sync_start);
-				elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
-				if (elapsed > longest)
-					longest = elapsed;
-				total_elapsed += elapsed;
-				processed++;
-
-				if (log_checkpoints)
-					elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f msec",
-						 processed,
-						 path,
-						 (double) elapsed / 1000);
-
-				break;			/* out of retry loop */
-			}
-
 			/*
-			 * It is possible that the relation has been dropped or truncated
-			 * since the fsync request was entered. Therefore, allow ENOENT,
-			 * but only if we didn't fail already on this file.
+			 * If in checkpointer, we want to absorb pending requests every so
+			 * often to prevent overflow of the fsync request queue.  It is
+			 * unspecified whether newly-added entries will be visited by
+			 * hash_seq_search, but we don't care since we don't need to
+			 * process them anyway.
 			 */
-			if (!FILE_POSSIBLY_DELETED(errno) || failures > 0)
-				ereport(data_sync_elevel(ERROR),
-						(errcode_for_file_access(),
-						 errmsg("could not fsync file \"%s\": %m",
-								path)));
-			else
-				ereport(DEBUG1,
-						(errcode_for_file_access(),
-						 errmsg("could not fsync file \"%s\" but retrying: %m",
-								path)));
+			if (--absorb_counter <= 0)
+			{
+				AbsorbSyncRequests();
+				absorb_counter = FSYNCS_PER_ABSORB;
+			}
 
 			/*
-			 * Absorb incoming requests and check to see if a cancel arrived
-			 * for this relation fork.
+			 * The fsync table could contain requests to fsync segments that
+			 * have been deleted (unlinked) by the time we get to them. Rather
+			 * than just hoping an ENOENT (or EACCES on Windows) error can be
+			 * ignored, what we do on error is absorb pending requests and
+			 * then retry. Since mdunlink() queues a "cancel" message before
+			 * actually unlinking, the fsync request is guaranteed to be
+			 * marked canceled after the absorb if it really was this case.
+			 * DROP DATABASE likewise has to tell us to forget fsync requests
+			 * before it starts deletions.
 			 */
-			AbsorbSyncRequests();
-			absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
-		}						/* end retry loop */
+			for (failures = 0; !entry->canceled; failures++)
+			{
+				char		path[MAXPGPATH];
+
+				INSTR_TIME_SET_CURRENT(sync_start);
+				if (syncsw[entry->tag.handler].sync_syncfiletag(&entry->tag,
+																path) == 0)
+				{
+					/* Success; update statistics about sync timing */
+					INSTR_TIME_SET_CURRENT(sync_end);
+					sync_diff = sync_end;
+					INSTR_TIME_SUBTRACT(sync_diff, sync_start);
+					elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
+					if (elapsed > longest)
+						longest = elapsed;
+					total_elapsed += elapsed;
+					processed++;
+
+					if (log_checkpoints)
+						elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f msec",
+							 processed,
+							 path,
+							 (double) elapsed / 1000);
+
+					break;		/* out of retry loop */
+				}
+
+				/*
+				 * It is possible that the relation has been dropped or
+				 * truncated since the fsync request was entered. Therefore,
+				 * allow ENOENT, but only if we didn't fail already on this
+				 * file.
+				 */
+				if (!FILE_POSSIBLY_DELETED(errno) || failures > 0)
+					ereport(data_sync_elevel(ERROR),
+							(errcode_for_file_access(),
+							 errmsg("could not fsync file \"%s\": %m",
+									path)));
+				else
+					ereport(DEBUG1,
+							(errcode_for_file_access(),
+							 errmsg("could not fsync file \"%s\" but retrying: %m",
+									path)));
+
+				/*
+				 * Absorb incoming requests and check to see if a cancel
+				 * arrived for this relation fork.
+				 */
+				AbsorbSyncRequests();
+				absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
+			}					/* end retry loop */
+		}
 
 		/* We are done with this entry, remove it */
 		if (hash_search(pendingOps, &entry->tag, HASH_REMOVE, NULL) == NULL)

pendingOps table is not cleared with fsync=off

Reply via email to