If a FIFREEZE operation run by the hv_vss_daemon takes longer than the
VSS_USERSPACE_TIMEOUT set in the hv_snapshot module, instead of exiting
after a write failure, try to recover by reopening the hv_vss device and
performing the initial handshake again. Exiting causes all subsequent VSS
operations sent by the Hyper-V host to fail until the daemon is restarted.

Signed-off-by: Michael Gissing <m...@faulpeltz.net>

---
 tools/hv/hv_vss_daemon.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 5d51d6f..0ecbdab 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -176,6 +176,7 @@ int main(int argc, char *argv[])
     openlog("Hyper-V VSS", 0, LOG_USER);
     syslog(LOG_INFO, "VSS starting; pid is:%d", getpid());

+recover:
     vss_fd = open("/dev/vmbus/hv_vss", O_RDWR);
     if (vss_fd < 0) {
         syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s",
@@ -196,6 +197,7 @@ int main(int argc, char *argv[])
     }

     pfd.fd = vss_fd;
+    in_handshake = 1;

     while (1) {
         pfd.events = POLLIN;
@@ -258,7 +260,14 @@ int main(int argc, char *argv[])
         if (len != sizeof(struct hv_vss_msg)) {
             syslog(LOG_ERR, "write failed; error: %d %s", errno,
                    strerror(errno));
-            exit(EXIT_FAILURE);
+            /*
+             * try to recover from possible timeout by THAWing
+             * and restarting the message loop
+            */
+            vss_operate(VSS_OP_THAW);
+            close(vss_fd);
+            syslog(LOG_INFO, "trying to recover VSS connection");
+            goto recover;
         }
     }

--
2.7.4


Reply via email to