Simon Riggs <[email protected]> writes:
> Please post the patch rather than fixing directly. There's some subtle
> stuff there and it would be best to discuss first.
And here's a proposed patch for not throwing ERROR during replay of DROP
TABLESPACE. I had originally thought this would be a one-liner
s/ERROR/LOG/, but on inspection destroy_tablespace_directories() really
needs to be changed too, so that it doesn't throw error for unremovable
directories.
regards, tom lane
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
index 8ed6d06bb2370a54349f2a86ee13e08a381eacf2..ed0c9ea3ee5a372c6926f41fc0fc93ed7d80d6cd 100644
*** a/src/backend/commands/tablespace.c
--- b/src/backend/commands/tablespace.c
*************** create_tablespace_directories(const char
*** 626,634 ****
/*
* destroy_tablespace_directories
*
! * Attempt to remove filesystem infrastructure
*
! * 'redo' indicates we are redoing a drop from XLOG; okay if nothing there
*
* Returns TRUE if successful, FALSE if some subdirectory is not empty
*/
--- 626,638 ----
/*
* destroy_tablespace_directories
*
! * Attempt to remove filesystem infrastructure for the tablespace.
*
! * 'redo' indicates we are redoing a drop from XLOG; in that case we should
! * not throw an ERROR for problems, just LOG them. The worst consequence of
! * not removing files here would be failure to release some disk space, which
! * does not justify throwing an error that would require manual intervention
! * to get the database running again.
*
* Returns TRUE if successful, FALSE if some subdirectory is not empty
*/
*************** destroy_tablespace_directories(Oid table
*** 681,686 ****
--- 685,700 ----
pfree(linkloc_with_version_dir);
return true;
}
+ else if (redo)
+ {
+ /* in redo, just log other types of error */
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open directory \"%s\": %m",
+ linkloc_with_version_dir)));
+ pfree(linkloc_with_version_dir);
+ return false;
+ }
/* else let ReadDir report the error */
}
*************** destroy_tablespace_directories(Oid table
*** 704,710 ****
/* remove empty directory */
if (rmdir(subfile) < 0)
! ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not remove directory \"%s\": %m",
subfile)));
--- 718,724 ----
/* remove empty directory */
if (rmdir(subfile) < 0)
! ereport(redo ? LOG : ERROR,
(errcode_for_file_access(),
errmsg("could not remove directory \"%s\": %m",
subfile)));
*************** destroy_tablespace_directories(Oid table
*** 716,738 ****
/* remove version directory */
if (rmdir(linkloc_with_version_dir) < 0)
! ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not remove directory \"%s\": %m",
linkloc_with_version_dir)));
/*
* Try to remove the symlink. We must however deal with the possibility
* that it's a directory instead of a symlink --- this could happen during
* WAL replay (see TablespaceCreateDbspace), and it is also the case on
* Windows where junction points lstat() as directories.
*/
linkloc = pstrdup(linkloc_with_version_dir);
get_parent_directory(linkloc);
if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
{
if (rmdir(linkloc) < 0)
! ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not remove directory \"%s\": %m",
linkloc)));
--- 730,759 ----
/* remove version directory */
if (rmdir(linkloc_with_version_dir) < 0)
! {
! ereport(redo ? LOG : ERROR,
(errcode_for_file_access(),
errmsg("could not remove directory \"%s\": %m",
linkloc_with_version_dir)));
+ pfree(linkloc_with_version_dir);
+ return false;
+ }
/*
* Try to remove the symlink. We must however deal with the possibility
* that it's a directory instead of a symlink --- this could happen during
* WAL replay (see TablespaceCreateDbspace), and it is also the case on
* Windows where junction points lstat() as directories.
+ *
+ * Note: in the redo case, we'll return true if this final step fails;
+ * there's no point in retrying it.
*/
linkloc = pstrdup(linkloc_with_version_dir);
get_parent_directory(linkloc);
if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
{
if (rmdir(linkloc) < 0)
! ereport(redo ? LOG : ERROR,
(errcode_for_file_access(),
errmsg("could not remove directory \"%s\": %m",
linkloc)));
*************** destroy_tablespace_directories(Oid table
*** 740,746 ****
else
{
if (unlink(linkloc) < 0)
! ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not remove symbolic link \"%s\": %m",
linkloc)));
--- 761,767 ----
else
{
if (unlink(linkloc) < 0)
! ereport(redo ? LOG : ERROR,
(errcode_for_file_access(),
errmsg("could not remove symbolic link \"%s\": %m",
linkloc)));
*************** tblspc_redo(XLogRecPtr lsn, XLogRecord *
*** 1451,1464 ****
xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record);
/*
! * If we issued a WAL record for a drop tablespace it is because there
! * were no files in it at all. That means that no permanent objects
! * can exist in it at this point.
*
* It is possible for standby users to be using this tablespace as a
* location for their temporary files, so if we fail to remove all
* files then do conflict processing and try again, if currently
* enabled.
*/
if (!destroy_tablespace_directories(xlrec->ts_id, true))
{
--- 1472,1490 ----
xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record);
/*
! * If we issued a WAL record for a drop tablespace it implies that
! * there were no files in it at all when the DROP was done. That means
! * that no permanent objects can exist in it at this point.
*
* It is possible for standby users to be using this tablespace as a
* location for their temporary files, so if we fail to remove all
* files then do conflict processing and try again, if currently
* enabled.
+ *
+ * Other possible reasons for failure include bollixed file permissions
+ * on a standby server when they were okay on the primary, etc etc.
+ * There's not much we can do about that, so just remove what we can
+ * and press on.
*/
if (!destroy_tablespace_directories(xlrec->ts_id, true))
{
*************** tblspc_redo(XLogRecPtr lsn, XLogRecord *
*** 1466,1480 ****
/*
* If we did recovery processing then hopefully the backends who
! * wrote temp files should have cleaned up and exited by now. So
! * lets recheck before we throw an error. If !process_conflicts
! * then this will just fail again.
*/
if (!destroy_tablespace_directories(xlrec->ts_id, true))
! ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("tablespace %u is not empty",
! xlrec->ts_id)));
}
}
else
--- 1492,1509 ----
/*
* If we did recovery processing then hopefully the backends who
! * wrote temp files should have cleaned up and exited by now. So
! * retry before complaining. If we fail again, this is just a LOG
! * condition, because it's not worth throwing an ERROR for (as
! * that would crash the database and require manual intervention
! * before we could get past this WAL record on restart).
*/
if (!destroy_tablespace_directories(xlrec->ts_id, true))
! ereport(LOG,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("tablespace %u is not empty",
! xlrec->ts_id),
! errdetail("Continuing even though some files could not be removed.")));
}
}
else
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers