On Mon, Jan 15, 2018 at 07:25:11PM +0000, David Bright wrote:
> Author: dab
> Date: Mon Jan 15 19:25:11 2018
> New Revision: 328013
> URL: https://svnweb.freebsd.org/changeset/base/328013
> 
> Log:
>   Exit fsck_ffs with non-zero status when file system is not repaired.
>   
>   When the fsck_ffs program cannot fully repair a file system, it will
>   output the message PLEASE RERUN FSCK. However, it does not exit with a
>   non-zero status in this case (contradicting the man page claim that it
>   "exits with 0 on success, and >0 if an error occurs."  The fsck
>   rc-script (when running "fsck -y") tests the status from fsck (which
>   passes along the exit status from fsck_ffs) and issues a "stop_boot"
>   if the status fails. However, this is not effective since fsck_ffs can
>   return zero even on (some) errors. Effectively, it is left to a later
>   step in the boot process when the file systems are mounted to detect
>   the still-unclean file system and stop the boot.
>   
>   This change modifies fsck_ffs so that when it cannot fully repair the
>   file system and issues the PLEASE RERUN FSCK message it also exits
>   with a non-zero status.
>   
>   While here, the fsck_ffs man page has also been updated to document
>   the failing exit status codes used by fsck_ffs. Previously, only exit
>   status 7 was documented. Some of these exit statuses are tested for in
>   the fsck rc-script, so they are clearly depended upon and deserve
>   documentation.

etc/rc.d/fsck doesn't know how to interpret the new exit code and now
just drops to a single-user shell when it is encountered. This is
happening to me semi-regularly when my test systems crash, especially
when I test kernel panic handling. :)

Is there any reason etc/rc.d/fsck shouldn't automatically retry (up to
some configurable number of retries) when the new error code is seen?
The patch below seems to do the trick for me:

diff --git a/etc/defaults/rc.conf b/etc/defaults/rc.conf
index 584e842bba2c..63d2fcc0be8d 100644
--- a/etc/defaults/rc.conf
+++ b/etc/defaults/rc.conf
@@ -95,6 +95,7 @@ root_rw_mount="YES"   # Set to NO to inhibit remounting root 
read-write.
 root_hold_delay="30"   # Time to wait for root mount hold release.
 fsck_y_enable="NO"     # Set to YES to do fsck -y if the initial preen fails.
 fsck_y_flags="-T ffs:-R -T ufs:-R"     # Additional flags for fsck -y
+fsck_retries="3"        # Number of times to retry fsck before giving up.
 background_fsck="YES"  # Attempt to run fsck in the background where possible.
 background_fsck_delay="60" # Time to wait (seconds) before starting the fsck.
 growfs_enable="NO"     # Set to YES to attempt to grow the root filesystem on 
boot
diff --git a/etc/rc.d/fsck b/etc/rc.d/fsck
index bd3122a20110..708d92228e3d 100755
--- a/etc/rc.d/fsck
+++ b/etc/rc.d/fsck
@@ -14,8 +14,82 @@ desc="Run file system checks"
 start_cmd="fsck_start"
 stop_cmd=":"
 
+_fsck_run()
+{
+       local err
+
+       if checkyesno background_fsck; then
+               fsck -F -p
+       else
+               fsck -p
+       fi
+
+       err=$?
+       if [ ${err} -eq 3 ]; then
+               echo "Warning! Some of the devices might not be" \
+                   "available; retrying"
+               root_hold_wait
+               check_startmsgs && echo "Restarting file system checks:"
+               if checkyesno background_fsck; then
+                       fsck -F -p
+               else
+                       fsck -p
+               fi
+               err=$?
+       fi
+
+       case ${err} in
+       0)
+               ;;
+       2)
+               stop_boot
+               ;;
+       4)
+               echo "Rebooting..."
+               reboot
+               echo "Reboot failed; help!"
+               stop_boot
+               ;;
+       8)
+               if checkyesno fsck_y_enable; then
+                       echo "File system preen failed, trying fsck -y 
${fsck_y_flags}"
+                       fsck -y ${fsck_y_flags}
+                       case $? in
+                       0)
+                               ;;
+                       *)
+                       echo "Automatic file system check failed; help!"
+                               stop_boot
+                               ;;
+                       esac
+               else
+                       echo "Automatic file system check failed; help!"
+                       stop_boot
+               fi
+               ;;
+       12)
+               echo "Boot interrupted."
+               stop_boot
+               ;;
+       16)
+               echo "File system check retry requested."
+               ;;
+       130)
+               stop_boot
+               ;;
+       *)
+               echo "Unknown error ${err}; help!"
+               stop_boot
+               ;;
+       esac
+
+       return $err
+}
+
 fsck_start()
 {
+       local err tries
+
        if [ "$autoboot" = no ]; then
                echo "Fast boot: skipping disk checks."
        elif [ ! -r /etc/fstab ]; then
@@ -25,67 +99,13 @@ fsck_start()
                trap : 3
 
                check_startmsgs && echo "Starting file system checks:"
-               if checkyesno background_fsck; then
-                       fsck -F -p
-               else
-                       fsck -p
-               fi
-
-               err=$?
-               if [ ${err} -eq 3 ]; then
-                       echo "Warning! Some of the devices might not be" \
-                           "available; retrying"
-                       root_hold_wait
-                       check_startmsgs && echo "Restarting file system checks:"
-                       if checkyesno background_fsck; then
-                               fsck -F -p
-                       else
-                               fsck -p
-                       fi
+               tries=$fsck_retries
+               while [ $tries -gt 0 ]; do
+                       _fsck_run
                        err=$?
-               fi
-
-               case ${err} in
-               0)
-                       ;;
-               2)
-                       stop_boot
-                       ;;
-               4)
-                       echo "Rebooting..."
-                       reboot
-                       echo "Reboot failed; help!"
-                       stop_boot
-                       ;;
-               8)
-                       if checkyesno fsck_y_enable; then
-                               echo "File system preen failed, trying fsck -y 
${fsck_y_flags}"
-                               fsck -y ${fsck_y_flags}
-                               case $? in
-                               0)
-                                       ;;
-                               *)
-                               echo "Automatic file system check failed; help!"
-                                       stop_boot
-                                       ;;
-                               esac
-                       else
-                               echo "Automatic file system check failed; help!"
-                               stop_boot
-                       fi
-                       ;;
-               12)
-                       echo "Boot interrupted."
-                       stop_boot
-                       ;;
-               130)
-                       stop_boot
-                       ;;
-               *)
-                       echo "Unknown error ${err}; help!"
-                       stop_boot
-                       ;;
-               esac
+                       [ $err -eq 16 ] || break
+                       tries=$(($tries - 1))
+               done
        fi
 }
 
diff --git a/share/man/man5/rc.conf.5 b/share/man/man5/rc.conf.5
index c27a2134e6bc..c9a16ca9f65c 100644
--- a/share/man/man5/rc.conf.5
+++ b/share/man/man5/rc.conf.5
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd February 15, 2018
+.Dd March 9, 2018
 .Dt RC.CONF 5
 .Os
 .Sh NAME
@@ -2053,6 +2053,11 @@ will be run with the
 .Fl y
 flag if the initial preen
 of the file systems fails.
+.It Va fsck_retries
+.Pq Vt int
+Maximum number of times to re-run
+.Xr fsck 8
+if its exit status indicates that a re-run is required.
 .It Va background_fsck
 .Pq Vt bool
 If set to
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to