The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/1678

This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===
This makes it possible to have the container save its state at stop
time, then restore its state on restart.

The feature is mostly interesting as a way to do a "suspend to disk"
kinda of equivalent where there is a guarantee that no work will be done
after the tasks are dumped to disk.

Expected use of the feature is to stop containers when more important
containers need the memory resources as well as a way to do a quick host
reboot without loosing running state.

This branch requires the client to specifically ask for state to be save
and restored at both stop and start time. The command line client is set
so that state isn't capture on stop by default (requires --stateful) but
is restored automatically on start (unless --stateless is passed).

Once checkpoint/restore as proved to be reliable, we should probably
introduce a server option, or a container option to have this be used on
host reboot in place of a standard container shutdown.

Closes #1558

Signed-off-by: Stéphane Graber <stgra...@ubuntu.com>
From 14cbb2cd34d2dd27d8c0d9784720d311fab90ad3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com>
Date: Sat, 27 Feb 2016 01:30:02 -0500
Subject: [PATCH] Implement stateful container stop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes it possible to have the container save its state at stop
time, then restore its state on restart.

The feature is mostly interesting as a way to do a "suspend to disk"
kinda of equivalent where there is a guarantee that no work will be done
after the tasks are dumped to disk.

Expected use of the feature is to stop containers when more important
containers need the memory resources as well as a way to do a quick host
reboot without loosing running state.

This branch requires the client to specifically ask for state to be save
and restored at both stop and start time. The command line client is set
so that state isn't capture on stop by default (requires --stateful) but
is restored automatically on start (unless --stateless is passed).

Once checkpoint/restore as proved to be reliable, we should probably
introduce a server option, or a container option to have this be used on
host reboot in place of a standard container shutdown.

Closes #1558

Signed-off-by: Stéphane Graber <stgra...@ubuntu.com>
---
 client.go              | 14 +++++++++---
 lxc/action.go          | 18 ++++++++++++++-
 lxc/delete.go          |  2 +-
 lxc/launch.go          |  2 +-
 lxc/main.go            |  8 +++----
 lxc/publish.go         |  4 ++--
 lxd/container.go       |  4 ++--
 lxd/container_lxc.go   | 61 +++++++++++++++++++++++++++++++++++++++++++++-----
 lxd/container_state.go | 38 ++++++++++++++++++++++---------
 lxd/containers.go      |  4 ++--
 specs/rest-api.md      |  3 ++-
 11 files changed, 125 insertions(+), 33 deletions(-)

diff --git a/client.go b/client.go
index c5b1468..664efde 100644
--- a/client.go
+++ b/client.go
@@ -1435,15 +1435,23 @@ func (c *Client) Exec(name string, cmd []string, env 
map[string]string,
        return op.Metadata.GetInt("return")
 }
 
-func (c *Client) Action(name string, action shared.ContainerAction, timeout 
int, force bool) (*Response, error) {
+func (c *Client) Action(name string, action shared.ContainerAction, timeout 
int, force bool, stateful bool) (*Response, error) {
+       body := shared.Jmap{
+               "action":  action,
+               "timeout": timeout,
+               "force":   force}
+
        if action == "start" {
                current, err := c.ContainerState(name)
                if err == nil && current.StatusCode == shared.Frozen {
-                       action = "unfreeze"
+                       body["action"] = "unfreeze"
                }
        }
 
-       body := shared.Jmap{"action": action, "timeout": timeout, "force": 
force}
+       if shared.StringInSlice(string(action), []string{"start", "stop"}) {
+               body["stateful"] = stateful
+       }
+
        return c.put(fmt.Sprintf("containers/%s/state", name), body, Async)
 }
 
diff --git a/lxc/action.go b/lxc/action.go
index f358da4..4d35266 100644
--- a/lxc/action.go
+++ b/lxc/action.go
@@ -16,6 +16,8 @@ type actionCmd struct {
        name       string
        timeout    int
        force      bool
+       stateful   bool
+       stateless  bool
 }
 
 func (c *actionCmd) showByDefault() bool {
@@ -33,6 +35,8 @@ func (c *actionCmd) flags() {
        if c.hasTimeout {
                gnuflag.IntVar(&c.timeout, "timeout", -1, i18n.G("Time to wait 
for the container before killing it."))
                gnuflag.BoolVar(&c.force, "force", false, i18n.G("Force the 
container to shutdown."))
+               gnuflag.BoolVar(&c.stateful, "stateful", false, i18n.G("Store 
the container state (only for stop)."))
+               gnuflag.BoolVar(&c.stateless, "stateless", false, 
i18n.G("Ignore the container state (only forstart)."))
        }
 }
 
@@ -41,6 +45,18 @@ func (c *actionCmd) run(config *lxd.Config, args []string) 
error {
                return errArgs
        }
 
+       state := false
+
+       // Never store state unless asked to
+       if c.action == "start" && !c.stateless {
+               state = true
+       }
+
+       // Always restore state (if present) unless asked not to
+       if c.action == "stop" && c.stateful {
+               state = true
+       }
+
        for _, nameArg := range args {
                remote, name := config.ParseRemoteAndContainer(nameArg)
                d, err := lxd.NewClient(config, remote)
@@ -48,7 +64,7 @@ func (c *actionCmd) run(config *lxd.Config, args []string) 
error {
                        return err
                }
 
-               resp, err := d.Action(name, c.action, c.timeout, c.force)
+               resp, err := d.Action(name, c.action, c.timeout, c.force, state)
                if err != nil {
                        return err
                }
diff --git a/lxc/delete.go b/lxc/delete.go
index 716832b..afa3d7d 100644
--- a/lxc/delete.go
+++ b/lxc/delete.go
@@ -92,7 +92,7 @@ func (c *deleteCmd) run(config *lxd.Config, args []string) 
error {
                                return fmt.Errorf(i18n.G("The container is 
currently running, stop it first or pass --force."))
                        }
 
-                       resp, err := d.Action(name, shared.Stop, -1, true)
+                       resp, err := d.Action(name, shared.Stop, -1, true, 
false)
                        if err != nil {
                                return err
                        }
diff --git a/lxc/launch.go b/lxc/launch.go
index e2c9bd6..c065872 100644
--- a/lxc/launch.go
+++ b/lxc/launch.go
@@ -120,7 +120,7 @@ func (c *launchCmd) run(config *lxd.Config, args []string) 
error {
        }
 
        fmt.Printf(i18n.G("Starting %s")+"\n", name)
-       resp, err = d.Action(name, shared.Start, -1, false)
+       resp, err = d.Action(name, shared.Start, -1, false, false)
        if err != nil {
                return err
        }
diff --git a/lxc/main.go b/lxc/main.go
index 88845f5..a25cafe 100644
--- a/lxc/main.go
+++ b/lxc/main.go
@@ -182,15 +182,15 @@ var commands = map[string]command{
        "list":     &listCmd{},
        "monitor":  &monitorCmd{},
        "move":     &moveCmd{},
-       "pause":    &actionCmd{shared.Freeze, false, false, "pause", -1, false},
+       "pause":    &actionCmd{shared.Freeze, false, false, "pause", -1, false, 
false, false},
        "profile":  &profileCmd{},
        "publish":  &publishCmd{},
        "remote":   &remoteCmd{},
-       "restart":  &actionCmd{shared.Restart, true, true, "restart", -1, 
false},
+       "restart":  &actionCmd{shared.Restart, true, true, "restart", -1, 
false, false, false},
        "restore":  &restoreCmd{},
        "snapshot": &snapshotCmd{},
-       "start":    &actionCmd{shared.Start, false, true, "start", -1, false},
-       "stop":     &actionCmd{shared.Stop, true, true, "stop", -1, false},
+       "start":    &actionCmd{shared.Start, false, true, "start", -1, false, 
false, false},
+       "stop":     &actionCmd{shared.Stop, true, true, "stop", -1, false, 
false, false},
        "version":  &versionCmd{},
 }
 
diff --git a/lxc/publish.go b/lxc/publish.go
index 8ccd663..690dfdf 100644
--- a/lxc/publish.go
+++ b/lxc/publish.go
@@ -97,7 +97,7 @@ func (c *publishCmd) run(config *lxd.Config, args []string) 
error {
                                }
                        }
 
-                       resp, err := s.Action(cName, shared.Stop, -1, true)
+                       resp, err := s.Action(cName, shared.Stop, -1, true, 
false)
                        if err != nil {
                                return err
                        }
@@ -110,7 +110,7 @@ func (c *publishCmd) run(config *lxd.Config, args []string) 
error {
                        if op.StatusCode == shared.Failure {
                                return fmt.Errorf(i18n.G("Stopping container 
failed!"))
                        }
-                       defer s.Action(cName, shared.Start, -1, true)
+                       defer s.Action(cName, shared.Start, -1, true, false)
 
                        if wasEphemeral {
                                ct.Ephemeral = true
diff --git a/lxd/container.go b/lxd/container.go
index 0abfbd6..e5ab8ec 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -311,8 +311,8 @@ type container interface {
        // Container actions
        Freeze() error
        Shutdown(timeout time.Duration) error
-       Start() error
-       Stop() error
+       Start(stateful bool) error
+       Stop(stateful bool) error
        Unfreeze() error
 
        // Snapshots & migration
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index b52738c..b5b8438 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1070,7 +1070,7 @@ func (c *containerLXC) startCommon() (string, error) {
        return configPath, nil
 }
 
-func (c *containerLXC) Start() error {
+func (c *containerLXC) Start(stateful bool) error {
        // Wait for container tear down to finish
        wgStopping, stopping := lxcStoppingContainers[c.id]
        if stopping {
@@ -1083,6 +1083,25 @@ func (c *containerLXC) Start() error {
                return err
        }
 
+       // If stateful, restore now
+       if stateful && shared.PathExists(c.StatePath()) {
+               err := c.c.Restore(lxc.RestoreOptions{
+                       Directory: c.StatePath(),
+                       Verbose:   true,
+               })
+
+               err2 := os.RemoveAll(c.StatePath())
+               if err2 != nil {
+                       return err2
+               }
+
+               if err != nil {
+                       return err
+               }
+
+               return nil
+       }
+
        // Start the LXC container
        out, err := exec.Command(
                c.daemon.execPath,
@@ -1232,7 +1251,33 @@ func (c *containerLXC) setupStopping() *sync.WaitGroup {
 }
 
 // Stop functions
-func (c *containerLXC) Stop() error {
+func (c *containerLXC) Stop(stateful bool) error {
+       // Handle stateful stop
+       if stateful {
+               // Cleanup any existing state
+               stateDir := c.StatePath()
+               os.RemoveAll(stateDir)
+
+               err := os.MkdirAll(stateDir, 0700)
+               if err != nil {
+                       return err
+               }
+
+               // Checkpoint
+               opts := lxc.CheckpointOptions{Directory: stateDir, Stop: true, 
Verbose: true}
+               err = c.Checkpoint(opts)
+               err2 := CollectCRIULogFile(c, stateDir, "snapshot", "dump")
+               if err2 != nil {
+                       shared.Log.Warn("failed to collect criu log file", 
log.Ctx{"error": err2})
+               }
+
+               if err != nil {
+                       return err
+               }
+
+               return nil
+       }
+
        // Load the go-lxc struct
        err := c.initLXC()
        if err != nil {
@@ -1351,7 +1396,7 @@ func (c *containerLXC) OnStop(target string) error {
 
                // Reboot the container
                if target == "reboot" {
-                       c.Start()
+                       c.Start(false)
                        return
                }
 
@@ -1474,7 +1519,7 @@ func (c *containerLXC) Restore(sourceContainer container) 
error {
        wasRunning := false
        if c.IsRunning() {
                wasRunning = true
-               if err := c.Stop(); err != nil {
+               if err := c.Stop(false); err != nil {
                        shared.Log.Error(
                                "Could not stop container",
                                log.Ctx{
@@ -1528,12 +1573,16 @@ func (c *containerLXC) Restore(sourceContainer 
container) error {
                        shared.Log.Error("failed to delete snapshot state", 
"path", c.StatePath(), "err", err2)
                }
 
-               return err
+               if err != nil {
+                       return err
+               }
+
+               return nil
        }
 
        // Restart the container
        if wasRunning {
-               return c.Start()
+               return c.Start(false)
        }
 
        return nil
diff --git a/lxd/container_state.go b/lxd/container_state.go
index 9446617..5e4ced7 100644
--- a/lxd/container_state.go
+++ b/lxd/container_state.go
@@ -7,13 +7,15 @@ import (
        "time"
 
        "github.com/gorilla/mux"
+
        "github.com/lxc/lxd/shared"
 )
 
 type containerStatePutReq struct {
-       Action  string `json:"action"`
-       Timeout int    `json:"timeout"`
-       Force   bool   `json:"force"`
+       Action   string `json:"action"`
+       Timeout  int    `json:"timeout"`
+       Force    bool   `json:"force"`
+       Stateful bool   `json:"stateful"`
 }
 
 func containerState(d *Daemon, r *http.Request) Response {
@@ -53,15 +55,25 @@ func containerStatePut(d *Daemon, r *http.Request) Response 
{
        switch shared.ContainerAction(raw.Action) {
        case shared.Start:
                do = func(op *operation) error {
-                       if err = c.Start(); err != nil {
+                       if err = c.Start(raw.Stateful); err != nil {
                                return err
                        }
                        return nil
                }
        case shared.Stop:
-               if raw.Timeout == 0 || raw.Force {
+               if raw.Stateful {
                        do = func(op *operation) error {
-                               if err = c.Stop(); err != nil {
+                               err := c.Stop(raw.Stateful)
+                               if err != nil {
+                                       return err
+                               }
+
+                               return nil
+                       }
+               } else if raw.Timeout == 0 || raw.Force {
+                       do = func(op *operation) error {
+                               err = c.Stop(false)
+                               if err != nil {
                                        return err
                                }
 
@@ -73,30 +85,36 @@ func containerStatePut(d *Daemon, r *http.Request) Response 
{
                        }
                } else {
                        do = func(op *operation) error {
-                               if err = c.Shutdown(time.Duration(raw.Timeout) 
* time.Second); err != nil {
+                               err = c.Shutdown(time.Duration(raw.Timeout) * 
time.Second)
+                               if err != nil {
                                        return err
                                }
 
                                if c.IsEphemeral() {
                                        c.Delete()
                                }
+
                                return nil
                        }
                }
        case shared.Restart:
                do = func(op *operation) error {
                        if raw.Timeout == 0 || raw.Force {
-                               if err = c.Stop(); err != nil {
+                               err = c.Stop(false)
+                               if err != nil {
                                        return err
                                }
                        } else {
-                               if err = c.Shutdown(time.Duration(raw.Timeout) 
* time.Second); err != nil {
+                               err = c.Shutdown(time.Duration(raw.Timeout) * 
time.Second)
+                               if err != nil {
                                        return err
                                }
                        }
-                       if err = c.Start(); err != nil {
+                       err = c.Start(false)
+                       if err != nil {
                                return err
                        }
+
                        return nil
                }
        case shared.Freeze:
diff --git a/lxd/containers.go b/lxd/containers.go
index 520bb58..6a02273 100644
--- a/lxd/containers.go
+++ b/lxd/containers.go
@@ -114,7 +114,7 @@ func containersRestart(d *Daemon) error {
                                continue
                        }
 
-                       c.Start()
+                       c.Start(false)
 
                        autoStartDelayInt, err := strconv.Atoi(autoStartDelay)
                        if err == nil {
@@ -155,7 +155,7 @@ func containersShutdown(d *Daemon) error {
                        wg.Add(1)
                        go func() {
                                c.Shutdown(time.Second * 30)
-                               c.Stop()
+                               c.Stop(false)
                                wg.Done()
                        }()
                }
diff --git a/specs/rest-api.md b/specs/rest-api.md
index 09fca34..9dbacf9 100644
--- a/specs/rest-api.md
+++ b/specs/rest-api.md
@@ -754,7 +754,8 @@ Input:
     {
         "action": "stop",       # State change action (stop, start, restart, 
freeze or unfreeze)
         "timeout": 30,          # A timeout after which the state change is 
considered as failed
-        "force": true           # Force the state change (currently only valid 
for stop and restart where it means killing the container)
+        "force": true,          # Force the state change (currently only valid 
for stop and restart where it means killing the container)
+        "stateful": true        # Whether to store or restore runtime state 
before stopping or startiong (only valid for stop and start, defaults to false)
     }
 
 ## /1.0/containers/\<name\>/files
_______________________________________________
lxc-devel mailing list
lxc-devel@lists.linuxcontainers.org
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to