On 8/4/17 7:01 pm, Edward Tomasz Napierała wrote:
On 0313T1206, Pete French wrote:
I have a number of machines in Azure, all booting from ZFS and, until
the weekend, running 10.3 perfectly happily.

I started upgrading these to 11. The first went fine, the second would
not boot. Looking at the boot diagnistics it is having problems finding the
root pool to mount. I see this is the diagnostic output:

        storvsc0: <Hyper-V IDE Storage Interface> on vmbus0
        Solaris: NOTICE: Cannot find the pool label for 'rpool'
        Mounting from zfs:rpool/ROOT/default failed with error 5.
        Root mount waiting for: storvsc
        (probe0:blkvsc0:0:storvsc1: 0:<Hyper-V IDE Storage Interface>0):  on 
vmbus0
        storvsc scsi_status = 2
        (da0:blkvsc0:0:0:0): UNMAPPED
        (probe1:blkvsc1:0:1:0): storvsc scsi_status = 2
        hvheartbeat0: <Hyper-V Heartbeat> on vmbus0
        da0 at blkvsc0 bus 0 scbus2 target 0 lun 0

As you can see, the drive da0 only appears after it has tried, and failed,
to mount the root pool.
Does the same problem still happen with recent 11-STABLE?

There is a fix for this floating around,  we applied at work.
Our systems are 10.3, but I think it wouldn't be a bad thing to add generally as it could (if we let it) solve the problem we sometimes see with nfs as well
as with azure.

p4 diff2 -du //depot/bugatti/FreeBSD-PZ/10.3/sys/kern/vfs_mountroot.c#1 //depot/bugatti/FreeBSD-PZ/10.3/sys/kern/vfs_mountroot.c#3 ==== //depot/bugatti/FreeBSD-PZ/10.3/sys/kern/vfs_mountroot.c#1 (text) - //depot/bugatti/FreeBSD-PZ/10.3/sys/kern/vfs_mountroot.c#3 (text) ==== content
@@ -126,8 +126,8 @@
 static int root_mount_mddev;
 static int root_mount_complete;

-/* By default wait up to 3 seconds for devices to appear. */
-static int root_mount_timeout = 3;
+/* By default wait up to 30 seconds for devices to appear. */
+static int root_mount_timeout = 30;
 TUNABLE_INT("vfs.mountroot.timeout", &root_mount_timeout);

 struct root_hold_token *
@@ -690,7 +690,7 @@
     char *errmsg;
     struct mntarg *ma;
     char *dev, *fs, *opts, *tok;
-    int delay, error, timeout;
+    int delay, error, timeout, err_stride;

     error = parse_token(conf, &tok);
     if (error)
@@ -727,11 +727,20 @@
         goto out;
     }

+    /*
+     * For ZFS we can't simply wait for a specific device
+     * as we only know the pool name. To work around this,
+     * parse_mount() will retry the mount later on.
+     *
+     * While retrying for NFS could be implemented similarly
+     * it is currently not supported.
+     */
+    delay = hz / 10;
+    timeout = root_mount_timeout * hz;
+
     if (strcmp(fs, "zfs") != 0 && strstr(fs, "nfs") == NULL &&
         dev[0] != '\0' && !parse_mount_dev_present(dev)) {
         printf("mountroot: waiting for device %s ...\n", dev);
-        delay = hz / 10;
-        timeout = root_mount_timeout * hz;
         do {
             pause("rmdev", delay);
             timeout -= delay;
@@ -741,16 +750,34 @@
             goto out;
         }
     }
+    /* Timeout keeps counting down */

-    ma = NULL;
-    ma = mount_arg(ma, "fstype", fs, -1);
-    ma = mount_arg(ma, "fspath", "/", -1);
-    ma = mount_arg(ma, "from", dev, -1);
-    ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
-    ma = mount_arg(ma, "ro", NULL, 0);
-    ma = parse_mountroot_options(ma, opts);
-    error = kernel_mount(ma, MNT_ROOTFS);
+    err_stride=0;
+    do {
+        ma = NULL;
+        ma = mount_arg(ma, "fstype", fs, -1);
+        ma = mount_arg(ma, "fspath", "/", -1);
+        ma = mount_arg(ma, "from", dev, -1);
+        ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
+        ma = mount_arg(ma, "ro", NULL, 0);
+        ma = parse_mountroot_options(ma, opts);

+        error = kernel_mount(ma, MNT_ROOTFS);
+        /* UFS only does it once */
+        if (strcmp(fs, "zfs") != 0)
+            break;
+        timeout -= delay;
+        if (timeout > 0 && error) {
+            if (err_stride <= 0 ) {
+                    printf("Mounting from %s:%s failed with error %d. "
+                    "%d seconds left. Retrying.\n", fs, dev, error,
+                    timeout / hz);
+            }
+            err_stride += 1;
+            err_stride %= 50;
+            pause("rmzfs", delay);
+        }
+    } while (timeout > 0 && error);
  out:
     if (error) {
         printf("Mounting from %s:%s failed with error %d",


_______________________________________________
freebsd-stable@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "freebsd-stable-unsubscr...@freebsd.org"


_______________________________________________
freebsd-stable@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "freebsd-stable-unsubscr...@freebsd.org"

Reply via email to